Merge tag 'drm/for-3.12-rc1' of git://anongit.freedesktop.org/tegra/linux into drm-next

drm/tegra: Changes for v3.12-rc1

Only a couple of small patches this time around. These are mostly fixes
for minor bugs that showed up, but there is also some preparatory work
that will come in handy for future patches.

* tag 'drm/for-3.12-rc1' of git://anongit.freedesktop.org/tegra/linux:
  drm/tegra: Parse device tree earlier
  gpu: host1x: Sort drivers by probe order
  gpu: host1x: Check for valid host1x pointer
  gpu: host1x: returning success instead of -ENOMEM
  gpu: host1x: fix an integer overflow check
  drm/tegra: hdmi: Make sure clock is enabled before dumping registers
diff --git a/.gitignore b/.gitignore
index 3b8b9b3..7e9932e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,6 +29,7 @@
 *.bz2
 *.lzma
 *.xz
+*.lz4
 *.lzo
 *.patch
 *.gcno
diff --git a/Documentation/ABI/testing/sysfs-driver-xen-blkback b/Documentation/ABI/testing/sysfs-driver-xen-blkback
new file mode 100644
index 0000000..8bb43b6
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-xen-blkback
@@ -0,0 +1,17 @@
+What:           /sys/module/xen_blkback/parameters/max_buffer_pages
+Date:           March 2013
+KernelVersion:  3.11
+Contact:        Roger Pau Monné <roger.pau@citrix.com>
+Description:
+                Maximum number of free pages to keep in each block
+                backend buffer.
+
+What:           /sys/module/xen_blkback/parameters/max_persistent_grants
+Date:           March 2013
+KernelVersion:  3.11
+Contact:        Roger Pau Monné <roger.pau@citrix.com>
+Description:
+                Maximum number of grants to map persistently in
+                blkback. If the frontend tries to use more than
+                max_persistent_grants, the LRU kicks in and starts
+                removing 5% of max_persistent_grants every 100ms.
diff --git a/Documentation/ABI/testing/sysfs-driver-xen-blkfront b/Documentation/ABI/testing/sysfs-driver-xen-blkfront
new file mode 100644
index 0000000..c0a6cb7
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-xen-blkfront
@@ -0,0 +1,10 @@
+What:           /sys/module/xen_blkfront/parameters/max
+Date:           June 2013
+KernelVersion:  3.11
+Contact:        Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+                Maximum number of segments that the frontend will negotiate
+                with the backend for indirect descriptors. The default value
+                is 32 - higher value means more potential throughput but more
+                memory usage. The backend picks the minimum of the frontend
+                and its default backend value.
diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl
index cbfdf54..fe397f9 100644
--- a/Documentation/DocBook/device-drivers.tmpl
+++ b/Documentation/DocBook/device-drivers.tmpl
@@ -84,7 +84,7 @@
 
      <sect1><title>Kernel utility functions</title>
 !Iinclude/linux/kernel.h
-!Ekernel/printk.c
+!Ekernel/printk/printk.c
 !Ekernel/panic.c
 !Ekernel/sys.c
 !Ekernel/rcupdate.c
diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl
index 7d1278e..ed1d6d2 100644
--- a/Documentation/DocBook/drm.tmpl
+++ b/Documentation/DocBook/drm.tmpl
@@ -156,13 +156,6 @@
             </para></listitem>
           </varlistentry>
           <varlistentry>
-            <term>DRIVER_USE_MTRR</term>
-            <listitem><para>
-              Driver uses MTRR interface for mapping memory, the DRM core will
-              manage MTRR resources. Deprecated.
-            </para></listitem>
-          </varlistentry>
-          <varlistentry>
             <term>DRIVER_PCI_DMA</term>
             <listitem><para>
               Driver is capable of PCI DMA, mapping of PCI DMA buffers to
@@ -195,28 +188,6 @@
             </para></listitem>
           </varlistentry>
           <varlistentry>
-            <term>DRIVER_IRQ_VBL</term>
-            <listitem><para>Unused. Deprecated.</para></listitem>
-          </varlistentry>
-          <varlistentry>
-            <term>DRIVER_DMA_QUEUE</term>
-            <listitem><para>
-              Should be set if the driver queues DMA requests and completes them
-              asynchronously.  Deprecated.
-            </para></listitem>
-          </varlistentry>
-          <varlistentry>
-            <term>DRIVER_FB_DMA</term>
-            <listitem><para>
-              Driver supports DMA to/from the framebuffer, mapping of frambuffer
-              DMA buffers to userspace will be supported. Deprecated.
-            </para></listitem>
-          </varlistentry>
-          <varlistentry>
-            <term>DRIVER_IRQ_VBL2</term>
-            <listitem><para>Unused. Deprecated.</para></listitem>
-          </varlistentry>
-          <varlistentry>
             <term>DRIVER_GEM</term>
             <listitem><para>
               Driver use the GEM memory manager.
@@ -234,6 +205,12 @@
               Driver implements DRM PRIME buffer sharing.
             </para></listitem>
           </varlistentry>
+          <varlistentry>
+            <term>DRIVER_RENDER</term>
+            <listitem><para>
+              Driver supports dedicated render nodes.
+            </para></listitem>
+          </varlistentry>
         </variablelist>
       </sect3>
       <sect3>
@@ -2212,6 +2189,18 @@
 !Iinclude/drm/drm_rect.h
 !Edrivers/gpu/drm/drm_rect.c
     </sect2>
+    <sect2>
+      <title>Flip-work Helper Reference</title>
+!Pinclude/drm/drm_flip_work.h flip utils
+!Iinclude/drm/drm_flip_work.h
+!Edrivers/gpu/drm/drm_flip_work.c
+    </sect2>
+    <sect2>
+      <title>VMA Offset Manager</title>
+!Pdrivers/gpu/drm/drm_vma_manager.c vma offset manager
+!Edrivers/gpu/drm/drm_vma_manager.c
+!Iinclude/drm/drm_vma_manager.h
+    </sect2>
   </sect1>
 
   <!-- Internals: kms properties -->
@@ -2422,18 +2411,18 @@
       </abstract>
       <para>
         The <methodname>firstopen</methodname> method is called by the DRM core
-	when an application opens a device that has no other opened file handle.
-	Similarly the <methodname>lastclose</methodname> method is called when
-	the last application holding a file handle opened on the device closes
-	it. Both methods are mostly used for UMS (User Mode Setting) drivers to
-	acquire and release device resources which should be done in the
-	<methodname>load</methodname> and <methodname>unload</methodname>
-	methods for KMS drivers.
+	for legacy UMS (User Mode Setting) drivers only when an application
+	opens a device that has no other opened file handle. UMS drivers can
+	implement it to acquire device resources. KMS drivers can't use the
+	method and must acquire resources in the <methodname>load</methodname>
+	method instead.
       </para>
       <para>
-        Note that the <methodname>lastclose</methodname> method is also called
-	at module unload time or, for hot-pluggable devices, when the device is
-	unplugged. The <methodname>firstopen</methodname> and
+	Similarly the <methodname>lastclose</methodname> method is called when
+	the last application holding a file handle opened on the device closes
+	it, for both UMS and KMS drivers. Additionally, the method is also
+	called at module unload time or, for hot-pluggable devices, when the
+	device is unplugged. The <methodname>firstopen</methodname> and
 	<methodname>lastclose</methodname> calls can thus be unbalanced.
       </para>
       <para>
@@ -2462,7 +2451,12 @@
       <para>
         The <methodname>lastclose</methodname> method should restore CRTC and
 	plane properties to default value, so that a subsequent open of the
-	device will not inherit state from the previous user.
+	device will not inherit state from the previous user. It can also be
+	used to execute delayed power switching state changes, e.g. in
+	conjunction with the vga-switcheroo infrastructure. Beyond that KMS
+	drivers should not do any further cleanup. Only legacy UMS drivers might
+	need to clean up device state so that the vga console or an independent
+	fbdev driver could take over.
       </para>
     </sect2>
     <sect2>
@@ -2498,7 +2492,6 @@
 	<programlisting>
 	.poll = drm_poll,
 	.read = drm_read,
-	.fasync = drm_fasync,
 	.llseek = no_llseek,
 	</programlisting>
       </para>
@@ -2657,6 +2650,69 @@
       info, since man pages should cover the rest.
     </para>
 
+  <!-- External: render nodes -->
+
+    <sect1>
+      <title>Render nodes</title>
+      <para>
+        DRM core provides multiple character-devices for user-space to use.
+        Depending on which device is opened, user-space can perform a different
+        set of operations (mainly ioctls). The primary node is always created
+        and called <term>card&lt;num&gt;</term>. Additionally, a currently
+        unused control node, called <term>controlD&lt;num&gt;</term> is also
+        created. The primary node provides all legacy operations and
+        historically was the only interface used by userspace. With KMS, the
+        control node was introduced. However, the planned KMS control interface
+        has never been written and so the control node stays unused to date.
+      </para>
+      <para>
+        With the increased use of offscreen renderers and GPGPU applications,
+        clients no longer require running compositors or graphics servers to
+        make use of a GPU. But the DRM API required unprivileged clients to
+        authenticate to a DRM-Master prior to getting GPU access. To avoid this
+        step and to grant clients GPU access without authenticating, render
+        nodes were introduced. Render nodes solely serve render clients, that
+        is, no modesetting or privileged ioctls can be issued on render nodes.
+        Only non-global rendering commands are allowed. If a driver supports
+        render nodes, it must advertise it via the <term>DRIVER_RENDER</term>
+        DRM driver capability. If not supported, the primary node must be used
+        for render clients together with the legacy drmAuth authentication
+        procedure.
+      </para>
+      <para>
+        If a driver advertises render node support, DRM core will create a
+        separate render node called <term>renderD&lt;num&gt;</term>. There will
+        be one render node per device. No ioctls except  PRIME-related ioctls
+        will be allowed on this node. Especially <term>GEM_OPEN</term> will be
+        explicitly prohibited. Render nodes are designed to avoid the
+        buffer-leaks, which occur if clients guess the flink names or mmap
+        offsets on the legacy interface. Additionally to this basic interface,
+        drivers must mark their driver-dependent render-only ioctls as
+        <term>DRM_RENDER_ALLOW</term> so render clients can use them. Driver
+        authors must be careful not to allow any privileged ioctls on render
+        nodes.
+      </para>
+      <para>
+        With render nodes, user-space can now control access to the render node
+        via basic file-system access-modes. A running graphics server which
+        authenticates clients on the privileged primary/legacy node is no longer
+        required. Instead, a client can open the render node and is immediately
+        granted GPU access. Communication between clients (or servers) is done
+        via PRIME. FLINK from render node to legacy node is not supported. New
+        clients must not use the insecure FLINK interface.
+      </para>
+      <para>
+        Besides dropping all modeset/global ioctls, render nodes also drop the
+        DRM-Master concept. There is no reason to associate render clients with
+        a DRM-Master as they are independent of any graphics server. Besides,
+        they must work without any running master, anyway.
+        Drivers must be able to run without a master object if they support
+        render nodes. If, on the other hand, a driver requires shared state
+        between clients which is visible to user-space and accessible beyond
+        open-file boundaries, they cannot support render nodes.
+      </para>
+    </sect1>
+
   <!-- External: vblank handling -->
 
     <sect1>
diff --git a/Documentation/DocBook/media_api.tmpl b/Documentation/DocBook/media_api.tmpl
index 6a8b715..9c92bb8 100644
--- a/Documentation/DocBook/media_api.tmpl
+++ b/Documentation/DocBook/media_api.tmpl
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
-<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
-	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" [
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+	"http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" [
 <!ENTITY % media-entities SYSTEM "./media-entities.tmpl"> %media-entities;
 <!ENTITY media-indices SYSTEM "./media-indices.tmpl">
 
diff --git a/Documentation/bcache.txt b/Documentation/bcache.txt
index c3365f2..32b6c31 100644
--- a/Documentation/bcache.txt
+++ b/Documentation/bcache.txt
@@ -46,29 +46,33 @@
 have to manually attach:
   make-bcache -B /dev/sda /dev/sdb -C /dev/sdc
 
-To make bcache devices known to the kernel, echo them to /sys/fs/bcache/register:
+bcache-tools now ships udev rules, and bcache devices are known to the kernel
+immediately.  Without udev, you can manually register devices like this:
 
   echo /dev/sdb > /sys/fs/bcache/register
   echo /dev/sdc > /sys/fs/bcache/register
 
-To register your bcache devices automatically, you could add something like
-this to an init script:
+Registering the backing device makes the bcache device show up in /dev; you can
+now format it and use it as normal. But the first time using a new bcache
+device, it'll be running in passthrough mode until you attach it to a cache.
+See the section on attaching.
 
-  echo /dev/sd* > /sys/fs/bcache/register_quiet
+The devices show up as:
 
-It'll look for bcache superblocks and ignore everything that doesn't have one.
+  /dev/bcache<N>
 
-Registering the backing device makes the bcache show up in /dev; you can now
-format it and use it as normal. But the first time using a new bcache device,
-it'll be running in passthrough mode until you attach it to a cache. See the
-section on attaching.
+As well as (with udev):
 
-The devices show up at /dev/bcacheN, and can be controlled via sysfs from
-/sys/block/bcacheN/bcache:
+  /dev/bcache/by-uuid/<uuid>
+  /dev/bcache/by-label/<label>
+
+To get started:
 
   mkfs.ext4 /dev/bcache0
   mount /dev/bcache0 /mnt
 
+You can control bcache devices through sysfs at /sys/block/bcache<N>/bcache .
+
 Cache devices are managed as sets; multiple caches per set isn't supported yet
 but will allow for mirroring of metadata and dirty data in the future. Your new
 cache set shows up as /sys/fs/bcache/<UUID>
@@ -80,11 +84,11 @@
 device to a cache set is done thusly, with the UUID of the cache set in
 /sys/fs/bcache:
 
-  echo <UUID> > /sys/block/bcache0/bcache/attach
+  echo <CSET-UUID> > /sys/block/bcache0/bcache/attach
 
 This only has to be done once. The next time you reboot, just reregister all
 your bcache devices. If a backing device has data in a cache somewhere, the
-/dev/bcache# device won't be created until the cache shows up - particularly
+/dev/bcache<N> device won't be created until the cache shows up - particularly
 important if you have writeback caching turned on.
 
 If you're booting up and your cache device is gone and never coming back, you
@@ -191,6 +195,9 @@
 
 SYSFS - BACKING DEVICE:
 
+Available at /sys/block/<bdev>/bcache, /sys/block/bcache*/bcache and
+(if attached) /sys/fs/bcache/<cset-uuid>/bdev*
+
 attach
   Echo the UUID of a cache set to this file to enable caching.
 
@@ -300,6 +307,8 @@
 
 SYSFS - CACHE SET:
 
+Available at /sys/fs/bcache/<cset-uuid>
+
 average_key_size
   Average data per key in the btree.
 
@@ -390,6 +399,8 @@
 
 SYSFS - CACHE DEVICE:
 
+Available at /sys/block/<cdev>/bcache
+
 block_size
   Minimum granularity of writes - should match hardware sector size.
 
diff --git a/Documentation/devicetree/bindings/clock/imx27-clock.txt b/Documentation/devicetree/bindings/clock/imx27-clock.txt
index ab1a56e..7a20703 100644
--- a/Documentation/devicetree/bindings/clock/imx27-clock.txt
+++ b/Documentation/devicetree/bindings/clock/imx27-clock.txt
@@ -98,6 +98,7 @@
 	fpm                  83
 	mpll_osc_sel         84
 	mpll_sel             85
+	spll_gate	     86
 
 Examples:
 
diff --git a/Documentation/devicetree/bindings/i2c/i2c-mv64xxx.txt b/Documentation/devicetree/bindings/i2c/i2c-mv64xxx.txt
index a1ee681..6113f92 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-mv64xxx.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-mv64xxx.txt
@@ -4,7 +4,7 @@
 Required properties :
 
  - reg             : Offset and length of the register set for the device
- - compatible      : Should be "marvell,mv64xxx-i2c"
+ - compatible      : Should be "marvell,mv64xxx-i2c" or "allwinner,sun4i-i2c"
  - interrupts      : The interrupt number
 
 Optional properties :
diff --git a/Documentation/devicetree/bindings/regulator/palmas-pmic.txt b/Documentation/devicetree/bindings/regulator/palmas-pmic.txt
index d5a3086..30b0581 100644
--- a/Documentation/devicetree/bindings/regulator/palmas-pmic.txt
+++ b/Documentation/devicetree/bindings/regulator/palmas-pmic.txt
@@ -31,9 +31,8 @@
 	       Optional sub-node properties:
 	       ti,warm-reset - maintain voltage during warm reset(boolean)
 	       ti,roof-floor - control voltage selection by pin(boolean)
-	       ti,sleep-mode - mode to adopt in pmic sleep 0 - off, 1 - auto,
+	       ti,mode-sleep - mode to adopt in pmic sleep 0 - off, 1 - auto,
 	       2 - eco, 3 - forced pwm
-	       ti,tstep - slope control 0 - Jump, 1 10mV/us, 2 5mV/us, 3 2.5mV/us
 	       ti,smps-range - OTP has the wrong range set for the hardware so override
 	       0 - low range, 1 - high range.
 
@@ -59,7 +58,6 @@
 			ti,warm-reset;
 			ti,roof-floor;
 			ti,mode-sleep = <0>;
-			ti,tstep = <0>;
 			ti,smps-range = <1>;
 		};
 
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index d5a79ca..366ce9b 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -26,6 +26,7 @@
 fsl	Freescale Semiconductor
 GEFanuc	GE Fanuc Intelligent Platforms Embedded Systems, Inc.
 gef	GE Fanuc Intelligent Platforms Embedded Systems, Inc.
+hisilicon	Hisilicon Limited.
 hp	Hewlett Packard
 ibm	International Business Machines (IBM)
 idt	Integrated Device Technologies, Inc.
@@ -43,6 +44,7 @@
 onnn	ON Semiconductor Corp.
 picochip	Picochip Ltd
 powervr	PowerVR (deprecated, use img)
+qca	Qualcomm Atheros, Inc.
 qcom	Qualcomm, Inc.
 ralink	Mediatek/Ralink Technology Corp.
 ramtron	Ramtron International
diff --git a/Documentation/ja_JP/HOWTO b/Documentation/ja_JP/HOWTO
index 050d37f..8148a47 100644
--- a/Documentation/ja_JP/HOWTO
+++ b/Documentation/ja_JP/HOWTO
@@ -11,14 +11,14 @@
 fork. So if you have any comments or updates for this file, please try
 to update the original English file first.
 
-Last Updated: 2011/03/31
+Last Updated: 2013/07/19
 ==================================
 これは、
-linux-2.6.38/Documentation/HOWTO
+linux-3.10/Documentation/HOWTO
 の和訳です。
 
-翻訳団体: JF プロジェクト < http://www.linux.or.jp/JF/ >
-翻訳日: 2011/3/28
+翻訳団体: JF プロジェクト < http://linuxjf.sourceforge.jp/ >
+翻訳日: 2013/7/19
 翻訳者: Tsugikazu Shibata <tshibata at ab dot jp dot nec dot com>
 校正者: 松倉さん <nbh--mats at nifty dot com>
          小林 雅典さん (Masanori Kobayasi) <zap03216 at nifty dot ne dot jp>
@@ -245,7 +245,7 @@
 自己参照方式で、索引がついた web 形式で、ソースコードを参照することが
 できます。この最新の素晴しいカーネルコードのリポジトリは以下で見つかり
 ます-
-	http://sosdg.org/~qiyong/lxr/
+	http://lxr.linux.no/+trees
 
 開発プロセス
 -----------------------
@@ -253,24 +253,24 @@
 Linux カーネルの開発プロセスは現在幾つかの異なるメインカーネル「ブラン
 チ」と多数のサブシステム毎のカーネルブランチから構成されます。
 これらのブランチとは-
-  - メインの 2.6.x カーネルツリー
-  - 2.6.x.y -stable カーネルツリー
-  - 2.6.x -git カーネルパッチ
+  - メインの 3.x カーネルツリー
+  - 3.x.y -stable カーネルツリー
+  - 3.x -git カーネルパッチ
   - サブシステム毎のカーネルツリーとパッチ
-  - 統合テストのための 2.6.x -next カーネルツリー
+  - 統合テストのための 3.x -next カーネルツリー
 
-2.6.x カーネルツリー
+3.x カーネルツリー
 -----------------
 
-2.6.x カーネルは Linus Torvalds によってメンテナンスされ、kernel.org
-の pub/linux/kernel/v2.6/ ディレクトリに存在します。この開発プロセスは
+3.x カーネルは Linus Torvalds によってメンテナンスされ、kernel.org
+の pub/linux/kernel/v3.x/ ディレクトリに存在します。この開発プロセスは
 以下のとおり-
 
   - 新しいカーネルがリリースされた直後に、2週間の特別期間が設けられ、
     この期間中に、メンテナ達は Linus に大きな差分を送ることができます。
     このような差分は通常 -next カーネルに数週間含まれてきたパッチです。
     大きな変更は git(カーネルのソース管理ツール、詳細は
-    http://git-scm.com/  参照) を使って送るのが好ましいやり方ですが、パッ
+    http://git-scm.com/ 参照) を使って送るのが好ましいやり方ですが、パッ
     チファイルの形式のまま送るのでも十分です。
 
   - 2週間後、-rc1 カーネルがリリースされ、この後にはカーネル全体の安定
@@ -302,20 +302,20 @@
   実に認識されたバグの状況によりリリースされるのであり、前もって決めら
   れた計画によってリリースされるものではないからです。」
 
-2.6.x.y -stable カーネルツリー
+3.x.y -stable カーネルツリー
 ---------------------------
 
-バージョン番号が4つの数字に分かれているカーネルは -stable カーネルです。
-これには、2.6.x カーネルで見つかったセキュリティ問題や重大な後戻りに対
+バージョン番号が3つの数字に分かれているカーネルは -stable カーネルです。
+これには、3.x カーネルで見つかったセキュリティ問題や重大な後戻りに対
 する比較的小さい重要な修正が含まれます。
 
 これは、開発/実験的バージョンのテストに協力することに興味が無く、
 最新の安定したカーネルを使いたいユーザに推奨するブランチです。
 
-もし、2.6.x.y カーネルが存在しない場合には、番号が一番大きい 2.6.x が
+もし、3.x.y カーネルが存在しない場合には、番号が一番大きい 3.x が
 最新の安定版カーネルです。
 
-2.6.x.y は "stable" チーム <stable@kernel.org> でメンテされており、必
+3.x.y は "stable" チーム <stable@kernel.org> でメンテされており、必
 要に応じてリリースされます。通常のリリース期間は 2週間毎ですが、差し迫っ
 た問題がなければもう少し長くなることもあります。セキュリティ関連の問題
 の場合はこれに対してだいたいの場合、すぐにリリースがされます。
@@ -324,7 +324,7 @@
 イルにはどのような種類の変更が -stable ツリーに受け入れ可能か、またリ
 リースプロセスがどう動くかが記述されています。
 
-2.6.x -git パッチ
+3.x -git パッチ
 ------------------
 
 git リポジトリで管理されているLinus のカーネルツリーの毎日のスナップ
@@ -358,14 +358,14 @@
 をつけることができます。大部分のこれらの patchwork のサイトは
 http://patchwork.kernel.org/ でリストされています。
 
-統合テストのための 2.6.x -next カーネルツリー
+統合テストのための 3.x -next カーネルツリー
 ---------------------------------------------
 
-サブシステムツリーの更新内容がメインラインの 2.6.x ツリーにマージされ
+サブシステムツリーの更新内容がメインラインの 3.x ツリーにマージされ
 る前に、それらは統合テストされる必要があります。この目的のため、実質的
 に全サブシステムツリーからほぼ毎日プルされてできる特別なテスト用のリ
 ポジトリが存在します-
-       http://git.kernel.org/?p=linux/kernel/git/sfr/linux-next.git
+       http://git.kernel.org/?p=linux/kernel/git/next/linux-next.git
        http://linux.f-seidel.de/linux-next/pmwiki/
 
 このやり方によって、-next カーネルは次のマージ機会でどんなものがメイン
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 15356ac..7f9d4f5 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2953,7 +2953,7 @@
 			improve throughput, but will also increase the
 			amount of memory reserved for use by the client.
 
-	swapaccount[=0|1]
+	swapaccount=[0|1]
 			[KNL] Enable accounting of swap in memory resource
 			controller if no parameter or 1 is given or disable
 			it if 0 is given (See Documentation/cgroups/memory.txt)
diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index 1c15043..d569f2a 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -52,7 +52,7 @@
 
 busy_read
 ----------------
-Low latency busy poll timeout for socket reads. (needs CONFIG_NET_LL_RX_POLL)
+Low latency busy poll timeout for socket reads. (needs CONFIG_NET_RX_BUSY_POLL)
 Approximate time in us to busy loop waiting for packets on the device queue.
 This sets the default value of the SO_BUSY_POLL socket option.
 Can be set or overridden per socket by setting socket option SO_BUSY_POLL,
@@ -63,7 +63,7 @@
 
 busy_poll
 ----------------
-Low latency busy poll timeout for poll and select. (needs CONFIG_NET_LL_RX_POLL)
+Low latency busy poll timeout for poll and select. (needs CONFIG_NET_RX_BUSY_POLL)
 Approximate time in us to busy loop waiting for events.
 Recommended value depends on the number of sockets you poll on.
 For several sockets 50, for several hundreds 100.
diff --git a/MAINTAINERS b/MAINTAINERS
index bf61e04..8197fbd 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -965,6 +965,12 @@
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
 
+ARM/TEXAS INSTRUMENT KEYSTONE ARCHITECTURE
+M:	Santosh Shilimkar <santosh.shilimkar@ti.com>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	arch/arm/mach-keystone/
+
 ARM/LOGICPD PXA270 MACHINE SUPPORT
 M:	Lennert Buytenhek <kernel@wantstofly.org>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -1259,7 +1265,6 @@
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-stericsson.git
 
 ARM/Ux500 ARM ARCHITECTURE
-M:	Srinidhi Kasagar <srinidhi.kasagar@stericsson.com>
 M:	Linus Walleij <linus.walleij@linaro.org>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:	Maintained
@@ -1406,7 +1411,7 @@
 M:	Kalle Valo <kvalo@qca.qualcomm.com>
 L:	linux-wireless@vger.kernel.org
 W:	http://wireless.kernel.org/en/users/Drivers/ath6kl
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath6kl.git
+T:	git git://github.com/kvalo/ath.git
 S:	Supported
 F:	drivers/net/wireless/ath/ath6kl/
 
@@ -1642,7 +1647,7 @@
 F:	drivers/net/hamradio/baycom*
 
 BCACHE (BLOCK LAYER CACHE)
-M:	Kent Overstreet <koverstreet@google.com>
+M:	Kent Overstreet <kmo@daterainc.com>
 L:	linux-bcache@vger.kernel.org
 W:	http://bcache.evilpiepirate.org
 S:	Maintained:
@@ -2871,7 +2876,7 @@
 F:	drivers/media/usb/dvb-usb-v2/usb_urb.c
 
 DYNAMIC DEBUG
-M:	Jason Baron <jbaron@redhat.com>
+M:	Jason Baron <jbaron@akamai.com>
 S:	Maintained
 F:	lib/dynamic_debug.c
 F:	include/linux/dynamic_debug.h
@@ -3346,7 +3351,7 @@
 F:	drivers/base/firmware*.c
 F:	include/linux/firmware.h
 
-FLASHSYSTEM DRIVER (IBM FlashSystem 70/80 PCI SSD Flash Card)
+FLASH ADAPTER DRIVER (IBM Flash Adapter 900GB Full Height PCI Flash Card)
 M:	Joshua Morris <josh.h.morris@us.ibm.com>
 M:	Philip Kelleher <pjk1939@linux.vnet.ibm.com>
 S:	Maintained
@@ -3622,11 +3627,9 @@
 F:	include/uapi/linux/gigaset_dev.h
 
 GPIO SUBSYSTEM
-M:	Grant Likely <grant.likely@linaro.org>
 M:	Linus Walleij <linus.walleij@linaro.org>
 S:	Maintained
 L:	linux-gpio@vger.kernel.org
-T:	git git://git.secretlab.ca/git/linux-2.6.git
 F:	Documentation/gpio.txt
 F:	drivers/gpio/
 F:	include/linux/gpio*
@@ -4472,8 +4475,6 @@
 
 IRQ DOMAINS (IRQ NUMBER MAPPING LIBRARY)
 M:	Benjamin Herrenschmidt <benh@kernel.crashing.org>
-M:	Grant Likely <grant.likely@linaro.org>
-T:	git git://git.secretlab.ca/git/linux-2.6.git irqdomain/next
 S:	Maintained
 F:	Documentation/IRQ-domain.txt
 F:	include/linux/irqdomain.h
@@ -4990,7 +4991,7 @@
 
 LINUX FOR POWERPC EMBEDDED XILINX VIRTEX
 L:	linuxppc-dev@lists.ozlabs.org
-S:	Unmaintained
+S:	Orphan
 F:	arch/powerpc/*/*virtex*
 F:	arch/powerpc/*/*/*virtex*
 
@@ -5580,9 +5581,9 @@
 F:	drivers/media/tuners/mxl5007t.*
 
 MYRICOM MYRI-10G 10GbE DRIVER (MYRI10GE)
-M:	Andrew Gallatin <gallatin@myri.com>
+M:	Hyong-Youb Kim <hykim@myri.com>
 L:	netdev@vger.kernel.org
-W:	http://www.myri.com/scs/download-Myri10GE.html
+W:	https://www.myricom.com/support/downloads/myri10ge.html
 S:	Supported
 F:	drivers/net/ethernet/myricom/myri10ge/
 
@@ -5883,10 +5884,10 @@
 F:	include/linux/i2c-omap.h
 
 OMAP DEVICE TREE SUPPORT
-M:	Benoît Cousson <b-cousson@ti.com>
+M:	Benoît Cousson <bcousson@baylibre.com>
 M:	Tony Lindgren <tony@atomide.com>
 L:	linux-omap@vger.kernel.org
-L:	devicetree-discuss@lists.ozlabs.org (moderated for non-subscribers)
+L:	devicetree@vger.kernel.org
 S:	Maintained
 F:	arch/arm/boot/dts/*omap*
 F:	arch/arm/boot/dts/*am3*
@@ -5963,14 +5964,14 @@
 F:	drivers/char/hw_random/omap-rng.c
 
 OMAP HWMOD SUPPORT
-M:	Benoît Cousson <b-cousson@ti.com>
+M:	Benoît Cousson <bcousson@baylibre.com>
 M:	Paul Walmsley <paul@pwsan.com>
 L:	linux-omap@vger.kernel.org
 S:	Maintained
 F:	arch/arm/mach-omap2/omap_hwmod.*
 
 OMAP HWMOD DATA FOR OMAP4-BASED DEVICES
-M:	Benoît Cousson <b-cousson@ti.com>
+M:	Benoît Cousson <bcousson@baylibre.com>
 L:	linux-omap@vger.kernel.org
 S:	Maintained
 F:	arch/arm/mach-omap2/omap_hwmod_44xx_data.c
@@ -6050,17 +6051,28 @@
 OPEN FIRMWARE AND FLATTENED DEVICE TREE
 M:	Grant Likely <grant.likely@linaro.org>
 M:	Rob Herring <rob.herring@calxeda.com>
-L:	devicetree-discuss@lists.ozlabs.org (moderated for non-subscribers)
+L:	devicetree@vger.kernel.org
 W:	http://fdt.secretlab.ca
 T:	git git://git.secretlab.ca/git/linux-2.6.git
 S:	Maintained
-F:	Documentation/devicetree
-F:	drivers/of
+F:	drivers/of/
 F:	include/linux/of*.h
-F:	scripts/dtc
+F:	scripts/dtc/
 K:	of_get_property
 K:	of_match_table
 
+OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS
+M:	Rob Herring <rob.herring@calxeda.com>
+M:	Pawel Moll <pawel.moll@arm.com>
+M:	Mark Rutland <mark.rutland@arm.com>
+M:	Stephen Warren <swarren@wwwdotorg.org>
+M:	Ian Campbell <ian.campbell@citrix.com>
+L:	devicetree@vger.kernel.org
+S:	Maintained
+F:	Documentation/devicetree/
+F:	arch/*/boot/dts/
+F:	include/dt-bindings/
+
 OPENRISC ARCHITECTURE
 M:	Jonas Bonn <jonas@southpole.se>
 W:	http://openrisc.net
@@ -6719,6 +6731,14 @@
 S:	Maintained
 F:	drivers/media/tuners/qt1010*
 
+QUALCOMM ATHEROS ATH10K WIRELESS DRIVER
+M:	Kalle Valo <kvalo@qca.qualcomm.com>
+L:	ath10k@lists.infradead.org
+W:	http://wireless.kernel.org/en/users/Drivers/ath10k
+T:	git git://github.com/kvalo/ath.git
+S:	Supported
+F:	drivers/net/wireless/ath/ath10k/
+
 QUALCOMM HEXAGON ARCHITECTURE
 M:	Richard Kuo <rkuo@codeaurora.org>
 L:	linux-hexagon@vger.kernel.org
@@ -7346,7 +7366,6 @@
 
 SGI GRU DRIVER
 M:	Dimitri Sivanich <sivanich@sgi.com>
-M:	Robin Holt <holt@sgi.com>
 S:	Maintained
 F:	drivers/misc/sgi-gru/
 
@@ -7366,7 +7385,8 @@
 F:	Documentation/sgi-visws.txt
 
 SGI XP/XPC/XPNET DRIVER
-M:	Robin Holt <holt@sgi.com>
+M:	Cliff Whickman <cpw@sgi.com>
+M:	Robin Holt <robinmholt@gmail.com>
 S:	Maintained
 F:	drivers/misc/sgi-xp/
 
@@ -7746,7 +7766,6 @@
 
 SPI SUBSYSTEM
 M:	Mark Brown <broonie@kernel.org>
-M:	Grant Likely <grant.likely@linaro.org>
 L:	linux-spi@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git
 Q:	http://patchwork.kernel.org/project/spi-devel-general/list/
@@ -7812,7 +7831,7 @@
 
 STAGING - COMEDI
 M:	Ian Abbott <abbotti@mev.co.uk>
-M:	Mori Hess <fmhess@users.sourceforge.net>
+M:	H Hartley Sweeten <hsweeten@visionengravers.com>
 S:	Odd Fixes
 F:	drivers/staging/comedi/
 
@@ -8264,7 +8283,7 @@
 F:	sound/soc/codecs/twl4030*
 
 TI WILINK WIRELESS DRIVERS
-M:	Luciano Coelho <coelho@ti.com>
+M:	Luciano Coelho <luca@coelho.fi>
 L:	linux-wireless@vger.kernel.org
 W:	http://wireless.kernel.org/en/users/Drivers/wl12xx
 W:	http://wireless.kernel.org/en/users/Drivers/wl1251
@@ -8650,6 +8669,11 @@
 S:	Maintained
 F:	sound/usb/midi.*
 
+USB NETWORKING DRIVERS
+L:	linux-usb@vger.kernel.org
+S:	Odd Fixes
+F:	drivers/net/usb/
+
 USB OHCI DRIVER
 M:	Alan Stern <stern@rowland.harvard.edu>
 L:	linux-usb@vger.kernel.org
@@ -9288,7 +9312,7 @@
 F:	drivers/net/ethernet/xilinx/xilinx_axienet*
 
 XILINX SYSTEMACE DRIVER
-S:	Unmaintained
+S:	Orphan
 F:	drivers/block/xsysace.c
 
 XILINX UARTLITE SERIAL DRIVER
diff --git a/Makefile b/Makefile
index a35f72a..369882e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 3
 PATCHLEVEL = 11
 SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc7
 NAME = Linux for Workgroups
 
 # *DOCUMENTATION*
diff --git a/arch/Kconfig b/arch/Kconfig
index 8d2ae24..1feb169 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -407,6 +407,12 @@
 	help
 	  Architecture has the first two arguments of clone(2) swapped.
 
+config CLONE_BACKWARDS3
+	bool
+	help
+	  Architecture has tls passed as the 3rd argument of clone(2),
+	  not the 5th one.
+
 config ODD_RT_SIGACTION
 	bool
 	help
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 837a1f2..082d9b4 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -15,6 +15,7 @@
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
+	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_CMOS_UPDATE
 	select GENERIC_STRNCPY_FROM_USER
diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h
index c2cbe4f..78b03ef 100644
--- a/arch/alpha/include/asm/atomic.h
+++ b/arch/alpha/include/asm/atomic.h
@@ -186,17 +186,24 @@
  */
 static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
 {
-	int c, old;
-	c = atomic_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c;
+	int c, new, old;
+	smp_mb();
+	__asm__ __volatile__(
+	"1:	ldl_l	%[old],%[mem]\n"
+	"	cmpeq	%[old],%[u],%[c]\n"
+	"	addl	%[old],%[a],%[new]\n"
+	"	bne	%[c],2f\n"
+	"	stl_c	%[new],%[mem]\n"
+	"	beq	%[new],3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br	1b\n"
+	".previous"
+	: [old] "=&r"(old), [new] "=&r"(new), [c] "=&r"(c)
+	: [mem] "m"(*v), [a] "rI"(a), [u] "rI"((long)u)
+	: "memory");
+	smp_mb();
+	return old;
 }
 
 
@@ -207,21 +214,56 @@
  * @u: ...unless v is equal to u.
  *
  * Atomically adds @a to @v, so long as it was not @u.
- * Returns the old value of @v.
+ * Returns true iff @v was not @u.
  */
 static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
 {
-	long c, old;
-	c = atomic64_read(v);
-	for (;;) {
-		if (unlikely(c == (u)))
-			break;
-		old = atomic64_cmpxchg((v), c, c + (a));
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return c != (u);
+	long c, tmp;
+	smp_mb();
+	__asm__ __volatile__(
+	"1:	ldq_l	%[tmp],%[mem]\n"
+	"	cmpeq	%[tmp],%[u],%[c]\n"
+	"	addq	%[tmp],%[a],%[tmp]\n"
+	"	bne	%[c],2f\n"
+	"	stq_c	%[tmp],%[mem]\n"
+	"	beq	%[tmp],3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br	1b\n"
+	".previous"
+	: [tmp] "=&r"(tmp), [c] "=&r"(c)
+	: [mem] "m"(*v), [a] "rI"(a), [u] "rI"(u)
+	: "memory");
+	smp_mb();
+	return !c;
+}
+
+/*
+ * atomic64_dec_if_positive - decrement by 1 if old value positive
+ * @v: pointer of type atomic_t
+ *
+ * The function returns the old value of *v minus 1, even if
+ * the atomic variable, v, was not decremented.
+ */
+static inline long atomic64_dec_if_positive(atomic64_t *v)
+{
+	long old, tmp;
+	smp_mb();
+	__asm__ __volatile__(
+	"1:	ldq_l	%[old],%[mem]\n"
+	"	subq	%[old],1,%[tmp]\n"
+	"	ble	%[old],2f\n"
+	"	stq_c	%[tmp],%[mem]\n"
+	"	beq	%[tmp],3f\n"
+	"2:\n"
+	".subsection 2\n"
+	"3:	br	1b\n"
+	".previous"
+	: [old] "=&r"(old), [tmp] "=&r"(tmp)
+	: [mem] "m"(*v)
+	: "memory");
+	smp_mb();
+	return old - 1;
 }
 
 #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
diff --git a/arch/alpha/include/asm/param.h b/arch/alpha/include/asm/param.h
index bf46af5..a5b68b2 100644
--- a/arch/alpha/include/asm/param.h
+++ b/arch/alpha/include/asm/param.h
@@ -3,7 +3,9 @@
 
 #include <uapi/asm/param.h>
 
-#define HZ		CONFIG_HZ
-#define USER_HZ		HZ
-# define CLOCKS_PER_SEC	HZ	/* frequency at which times() counts */
+# undef HZ
+# define HZ		CONFIG_HZ
+# define USER_HZ	1024
+# define CLOCKS_PER_SEC	USER_HZ	/* frequency at which times() counts */
+
 #endif /* _ASM_ALPHA_PARAM_H */
diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h
index 3bba21e..37b570d 100644
--- a/arch/alpha/include/asm/spinlock.h
+++ b/arch/alpha/include/asm/spinlock.h
@@ -168,8 +168,4 @@
 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
 #define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
 
-#define arch_spin_relax(lock)	cpu_relax()
-#define arch_read_relax(lock)	cpu_relax()
-#define arch_write_relax(lock)	cpu_relax()
-
 #endif /* _ALPHA_SPINLOCK_H */
diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index 43baee1..f2c9440 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -3,8 +3,7 @@
 
 #include <uapi/asm/unistd.h>
 
-
-#define NR_SYSCALLS			506
+#define NR_SYSCALLS			508
 
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_STAT64
diff --git a/arch/alpha/include/uapi/asm/param.h b/arch/alpha/include/uapi/asm/param.h
index 29daed8..dbcd983 100644
--- a/arch/alpha/include/uapi/asm/param.h
+++ b/arch/alpha/include/uapi/asm/param.h
@@ -1,13 +1,7 @@
 #ifndef _UAPI_ASM_ALPHA_PARAM_H
 #define _UAPI_ASM_ALPHA_PARAM_H
 
-/* ??? Gross.  I don't want to parameterize this, and supposedly the
-   hardware ignores reprogramming.  We also need userland buy-in to the 
-   change in HZ, since this is visible in the wait4 resources etc.  */
-
-#ifndef __KERNEL__
 #define HZ		1024
-#endif
 
 #define EXEC_PAGESIZE	8192
 
@@ -17,5 +11,4 @@
 
 #define MAXHOSTNAMELEN	64	/* max length of hostname */
 
-
 #endif /* _UAPI_ASM_ALPHA_PARAM_H */
diff --git a/arch/alpha/include/uapi/asm/unistd.h b/arch/alpha/include/uapi/asm/unistd.h
index 801d28b..53ae7bb 100644
--- a/arch/alpha/include/uapi/asm/unistd.h
+++ b/arch/alpha/include/uapi/asm/unistd.h
@@ -467,5 +467,7 @@
 #define __NR_sendmmsg			503
 #define __NR_process_vm_readv		504
 #define __NR_process_vm_writev		505
+#define __NR_kcmp			506
+#define __NR_finit_module		507
 
 #endif /* _UAPI_ALPHA_UNISTD_H */
diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S
index f62a994..a969b95 100644
--- a/arch/alpha/kernel/entry.S
+++ b/arch/alpha/kernel/entry.S
@@ -12,11 +12,32 @@
 
 	.text
 	.set noat
+	.cfi_sections	.debug_frame
 
 /* Stack offsets.  */
 #define SP_OFF			184
 #define SWITCH_STACK_SIZE	320
 
+.macro	CFI_START_OSF_FRAME	func
+	.align	4
+	.globl	\func
+	.type	\func,@function
+\func:
+	.cfi_startproc simple
+	.cfi_return_column 64
+	.cfi_def_cfa	$sp, 48
+	.cfi_rel_offset	64, 8
+	.cfi_rel_offset	$gp, 16
+	.cfi_rel_offset	$16, 24
+	.cfi_rel_offset	$17, 32
+	.cfi_rel_offset	$18, 40
+.endm
+
+.macro	CFI_END_OSF_FRAME	func
+	.cfi_endproc
+	.size	\func, . - \func
+.endm
+
 /*
  * This defines the normal kernel pt-regs layout.
  *
@@ -27,100 +48,158 @@
  * the palcode-provided values are available to the signal handler.
  */
 
-#define SAVE_ALL			\
-	subq	$sp, SP_OFF, $sp;	\
-	stq	$0, 0($sp);		\
-	stq	$1, 8($sp);		\
-	stq	$2, 16($sp);		\
-	stq	$3, 24($sp);		\
-	stq	$4, 32($sp);		\
-	stq	$28, 144($sp);		\
-	lda	$2, alpha_mv;		\
-	stq	$5, 40($sp);		\
-	stq	$6, 48($sp);		\
-	stq	$7, 56($sp);		\
-	stq	$8, 64($sp);		\
-	stq	$19, 72($sp);		\
-	stq	$20, 80($sp);		\
-	stq	$21, 88($sp);		\
-	ldq	$2, HAE_CACHE($2);	\
-	stq	$22, 96($sp);		\
-	stq	$23, 104($sp);		\
-	stq	$24, 112($sp);		\
-	stq	$25, 120($sp);		\
-	stq	$26, 128($sp);		\
-	stq	$27, 136($sp);		\
-	stq	$2, 152($sp);		\
-	stq	$16, 160($sp);		\
-	stq	$17, 168($sp);		\
+.macro	SAVE_ALL
+	subq	$sp, SP_OFF, $sp
+	.cfi_adjust_cfa_offset	SP_OFF
+	stq	$0, 0($sp)
+	stq	$1, 8($sp)
+	stq	$2, 16($sp)
+	stq	$3, 24($sp)
+	stq	$4, 32($sp)
+	stq	$28, 144($sp)
+	.cfi_rel_offset	$0, 0
+	.cfi_rel_offset $1, 8
+	.cfi_rel_offset	$2, 16
+	.cfi_rel_offset	$3, 24
+	.cfi_rel_offset	$4, 32
+	.cfi_rel_offset	$28, 144
+	lda	$2, alpha_mv
+	stq	$5, 40($sp)
+	stq	$6, 48($sp)
+	stq	$7, 56($sp)
+	stq	$8, 64($sp)
+	stq	$19, 72($sp)
+	stq	$20, 80($sp)
+	stq	$21, 88($sp)
+	ldq	$2, HAE_CACHE($2)
+	stq	$22, 96($sp)
+	stq	$23, 104($sp)
+	stq	$24, 112($sp)
+	stq	$25, 120($sp)
+	stq	$26, 128($sp)
+	stq	$27, 136($sp)
+	stq	$2, 152($sp)
+	stq	$16, 160($sp)
+	stq	$17, 168($sp)
 	stq	$18, 176($sp)
+	.cfi_rel_offset	$5, 40
+	.cfi_rel_offset	$6, 48
+	.cfi_rel_offset	$7, 56
+	.cfi_rel_offset	$8, 64
+	.cfi_rel_offset $19, 72
+	.cfi_rel_offset	$20, 80
+	.cfi_rel_offset	$21, 88
+	.cfi_rel_offset $22, 96
+	.cfi_rel_offset	$23, 104
+	.cfi_rel_offset	$24, 112
+	.cfi_rel_offset	$25, 120
+	.cfi_rel_offset	$26, 128
+	.cfi_rel_offset	$27, 136
+.endm
 
-#define RESTORE_ALL			\
-	lda	$19, alpha_mv;		\
-	ldq	$0, 0($sp);		\
-	ldq	$1, 8($sp);		\
-	ldq	$2, 16($sp);		\
-	ldq	$3, 24($sp);		\
-	ldq	$21, 152($sp);		\
-	ldq	$20, HAE_CACHE($19);	\
-	ldq	$4, 32($sp);		\
-	ldq	$5, 40($sp);		\
-	ldq	$6, 48($sp);		\
-	ldq	$7, 56($sp);		\
-	subq	$20, $21, $20;		\
-	ldq	$8, 64($sp);		\
-	beq	$20, 99f;		\
-	ldq	$20, HAE_REG($19);	\
-	stq	$21, HAE_CACHE($19);	\
-	stq	$21, 0($20);		\
-99:;					\
-	ldq	$19, 72($sp);		\
-	ldq	$20, 80($sp);		\
-	ldq	$21, 88($sp);		\
-	ldq	$22, 96($sp);		\
-	ldq	$23, 104($sp);		\
-	ldq	$24, 112($sp);		\
-	ldq	$25, 120($sp);		\
-	ldq	$26, 128($sp);		\
-	ldq	$27, 136($sp);		\
-	ldq	$28, 144($sp);		\
+.macro	RESTORE_ALL
+	lda	$19, alpha_mv
+	ldq	$0, 0($sp)
+	ldq	$1, 8($sp)
+	ldq	$2, 16($sp)
+	ldq	$3, 24($sp)
+	ldq	$21, 152($sp)
+	ldq	$20, HAE_CACHE($19)
+	ldq	$4, 32($sp)
+	ldq	$5, 40($sp)
+	ldq	$6, 48($sp)
+	ldq	$7, 56($sp)
+	subq	$20, $21, $20
+	ldq	$8, 64($sp)
+	beq	$20, 99f
+	ldq	$20, HAE_REG($19)
+	stq	$21, HAE_CACHE($19)
+	stq	$21, 0($20)
+99:	ldq	$19, 72($sp)
+	ldq	$20, 80($sp)
+	ldq	$21, 88($sp)
+	ldq	$22, 96($sp)
+	ldq	$23, 104($sp)
+	ldq	$24, 112($sp)
+	ldq	$25, 120($sp)
+	ldq	$26, 128($sp)
+	ldq	$27, 136($sp)
+	ldq	$28, 144($sp)
 	addq	$sp, SP_OFF, $sp
+	.cfi_restore	$0
+	.cfi_restore	$1
+	.cfi_restore	$2
+	.cfi_restore	$3
+	.cfi_restore	$4
+	.cfi_restore	$5
+	.cfi_restore	$6
+	.cfi_restore	$7
+	.cfi_restore	$8
+	.cfi_restore	$19
+	.cfi_restore	$20
+	.cfi_restore	$21
+	.cfi_restore	$22
+	.cfi_restore	$23
+	.cfi_restore	$24
+	.cfi_restore	$25
+	.cfi_restore	$26
+	.cfi_restore	$27
+	.cfi_restore	$28
+	.cfi_adjust_cfa_offset	-SP_OFF
+.endm
+
+.macro	DO_SWITCH_STACK
+	bsr	$1, do_switch_stack
+	.cfi_adjust_cfa_offset	SWITCH_STACK_SIZE
+	.cfi_rel_offset	$9, 0
+	.cfi_rel_offset	$10, 8
+	.cfi_rel_offset	$11, 16
+	.cfi_rel_offset	$12, 24
+	.cfi_rel_offset	$13, 32
+	.cfi_rel_offset	$14, 40
+	.cfi_rel_offset	$15, 48
+	/* We don't really care about the FP registers for debugging.  */
+.endm
+
+.macro	UNDO_SWITCH_STACK
+	bsr	$1, undo_switch_stack
+	.cfi_restore	$9
+	.cfi_restore	$10
+	.cfi_restore	$11
+	.cfi_restore	$12
+	.cfi_restore	$13
+	.cfi_restore	$14
+	.cfi_restore	$15
+	.cfi_adjust_cfa_offset	-SWITCH_STACK_SIZE
+.endm
 
 /*
  * Non-syscall kernel entry points.
  */
 
-	.align	4
-	.globl	entInt
-	.ent	entInt
-entInt:
+CFI_START_OSF_FRAME entInt
 	SAVE_ALL
 	lda	$8, 0x3fff
 	lda	$26, ret_from_sys_call
 	bic	$sp, $8, $8
 	mov	$sp, $19
 	jsr	$31, do_entInt
-.end entInt
+CFI_END_OSF_FRAME entInt
 
-	.align	4
-	.globl	entArith
-	.ent	entArith
-entArith:
+CFI_START_OSF_FRAME entArith
 	SAVE_ALL
 	lda	$8, 0x3fff
 	lda	$26, ret_from_sys_call
 	bic	$sp, $8, $8
 	mov	$sp, $18
 	jsr	$31, do_entArith
-.end entArith
+CFI_END_OSF_FRAME entArith
 
-	.align	4
-	.globl	entMM
-	.ent	entMM
-entMM:
+CFI_START_OSF_FRAME entMM
 	SAVE_ALL
 /* save $9 - $15 so the inline exception code can manipulate them.  */
 	subq	$sp, 56, $sp
+	.cfi_adjust_cfa_offset	56
 	stq	$9, 0($sp)
 	stq	$10, 8($sp)
 	stq	$11, 16($sp)
@@ -128,6 +207,13 @@
 	stq	$13, 32($sp)
 	stq	$14, 40($sp)
 	stq	$15, 48($sp)
+	.cfi_rel_offset	$9, 0
+	.cfi_rel_offset	$10, 8
+	.cfi_rel_offset	$11, 16
+	.cfi_rel_offset	$12, 24
+	.cfi_rel_offset	$13, 32
+	.cfi_rel_offset	$14, 40
+	.cfi_rel_offset	$15, 48
 	addq	$sp, 56, $19
 /* handle the fault */
 	lda	$8, 0x3fff
@@ -142,28 +228,33 @@
 	ldq	$14, 40($sp)
 	ldq	$15, 48($sp)
 	addq	$sp, 56, $sp
+	.cfi_restore	$9
+	.cfi_restore	$10
+	.cfi_restore	$11
+	.cfi_restore	$12
+	.cfi_restore	$13
+	.cfi_restore	$14
+	.cfi_restore	$15
+	.cfi_adjust_cfa_offset	-56
 /* finish up the syscall as normal.  */
 	br	ret_from_sys_call
-.end entMM
+CFI_END_OSF_FRAME entMM
 
-	.align	4
-	.globl	entIF
-	.ent	entIF
-entIF:
+CFI_START_OSF_FRAME entIF
 	SAVE_ALL
 	lda	$8, 0x3fff
 	lda	$26, ret_from_sys_call
 	bic	$sp, $8, $8
 	mov	$sp, $17
 	jsr	$31, do_entIF
-.end entIF
+CFI_END_OSF_FRAME entIF
 
-	.align	4
-	.globl	entUna
-	.ent	entUna
-entUna:
+CFI_START_OSF_FRAME entUna
 	lda	$sp, -256($sp)
+	.cfi_adjust_cfa_offset	256
 	stq	$0, 0($sp)
+	.cfi_rel_offset	$0, 0
+	.cfi_remember_state
 	ldq	$0, 256($sp)	/* get PS */
 	stq	$1, 8($sp)
 	stq	$2, 16($sp)
@@ -195,6 +286,32 @@
 	stq	$28, 224($sp)
 	mov	$sp, $19
 	stq	$gp, 232($sp)
+	.cfi_rel_offset	$1, 1*8
+	.cfi_rel_offset	$2, 2*8
+	.cfi_rel_offset	$3, 3*8
+	.cfi_rel_offset	$4, 4*8
+	.cfi_rel_offset	$5, 5*8
+	.cfi_rel_offset	$6, 6*8
+	.cfi_rel_offset	$7, 7*8
+	.cfi_rel_offset	$8, 8*8
+	.cfi_rel_offset	$9, 9*8
+	.cfi_rel_offset	$10, 10*8
+	.cfi_rel_offset	$11, 11*8
+	.cfi_rel_offset	$12, 12*8
+	.cfi_rel_offset	$13, 13*8
+	.cfi_rel_offset	$14, 14*8
+	.cfi_rel_offset	$15, 15*8
+	.cfi_rel_offset	$19, 19*8
+	.cfi_rel_offset	$20, 20*8
+	.cfi_rel_offset	$21, 21*8
+	.cfi_rel_offset	$22, 22*8
+	.cfi_rel_offset	$23, 23*8
+	.cfi_rel_offset	$24, 24*8
+	.cfi_rel_offset	$25, 25*8
+	.cfi_rel_offset	$26, 26*8
+	.cfi_rel_offset	$27, 27*8
+	.cfi_rel_offset	$28, 28*8
+	.cfi_rel_offset	$29, 29*8
 	lda	$8, 0x3fff
 	stq	$31, 248($sp)
 	bic	$sp, $8, $8
@@ -228,16 +345,45 @@
 	ldq	$28, 224($sp)
 	ldq	$gp, 232($sp)
 	lda	$sp, 256($sp)
+	.cfi_restore	$1
+	.cfi_restore	$2
+	.cfi_restore	$3
+	.cfi_restore	$4
+	.cfi_restore	$5
+	.cfi_restore	$6
+	.cfi_restore	$7
+	.cfi_restore	$8
+	.cfi_restore	$9
+	.cfi_restore	$10
+	.cfi_restore	$11
+	.cfi_restore	$12
+	.cfi_restore	$13
+	.cfi_restore	$14
+	.cfi_restore	$15
+	.cfi_restore	$19
+	.cfi_restore	$20
+	.cfi_restore	$21
+	.cfi_restore	$22
+	.cfi_restore	$23
+	.cfi_restore	$24
+	.cfi_restore	$25
+	.cfi_restore	$26
+	.cfi_restore	$27
+	.cfi_restore	$28
+	.cfi_restore	$29
+	.cfi_adjust_cfa_offset	-256
 	call_pal PAL_rti
-.end entUna
 
 	.align	4
-	.ent	entUnaUser
 entUnaUser:
+	.cfi_restore_state
 	ldq	$0, 0($sp)	/* restore original $0 */
 	lda	$sp, 256($sp)	/* pop entUna's stack frame */
+	.cfi_restore	$0
+	.cfi_adjust_cfa_offset	-256
 	SAVE_ALL		/* setup normal kernel stack */
 	lda	$sp, -56($sp)
+	.cfi_adjust_cfa_offset	56
 	stq	$9, 0($sp)
 	stq	$10, 8($sp)
 	stq	$11, 16($sp)
@@ -245,6 +391,13 @@
 	stq	$13, 32($sp)
 	stq	$14, 40($sp)
 	stq	$15, 48($sp)
+	.cfi_rel_offset	$9, 0
+	.cfi_rel_offset	$10, 8
+	.cfi_rel_offset	$11, 16
+	.cfi_rel_offset	$12, 24
+	.cfi_rel_offset	$13, 32
+	.cfi_rel_offset	$14, 40
+	.cfi_rel_offset	$15, 48
 	lda	$8, 0x3fff
 	addq	$sp, 56, $19
 	bic	$sp, $8, $8
@@ -257,20 +410,25 @@
 	ldq	$14, 40($sp)
 	ldq	$15, 48($sp)
 	lda	$sp, 56($sp)
+	.cfi_restore	$9
+	.cfi_restore	$10
+	.cfi_restore	$11
+	.cfi_restore	$12
+	.cfi_restore	$13
+	.cfi_restore	$14
+	.cfi_restore	$15
+	.cfi_adjust_cfa_offset	-56
 	br	ret_from_sys_call
-.end entUnaUser
+CFI_END_OSF_FRAME entUna
 
-	.align	4
-	.globl	entDbg
-	.ent	entDbg
-entDbg:
+CFI_START_OSF_FRAME entDbg
 	SAVE_ALL
 	lda	$8, 0x3fff
 	lda	$26, ret_from_sys_call
 	bic	$sp, $8, $8
 	mov	$sp, $16
 	jsr	$31, do_entDbg
-.end entDbg
+CFI_END_OSF_FRAME entDbg
 
 /*
  * The system call entry point is special.  Most importantly, it looks
@@ -285,8 +443,12 @@
 
 	.align	4
 	.globl	entSys
-	.globl	ret_from_sys_call
-	.ent	entSys
+	.type	entSys, @function
+	.cfi_startproc simple
+	.cfi_return_column 64
+	.cfi_def_cfa	$sp, 48
+	.cfi_rel_offset	64, 8
+	.cfi_rel_offset	$gp, 16
 entSys:
 	SAVE_ALL
 	lda	$8, 0x3fff
@@ -300,6 +462,9 @@
 	stq	$17, SP_OFF+32($sp)
 	s8addq	$0, $5, $5
 	stq	$18, SP_OFF+40($sp)
+	.cfi_rel_offset	$16, SP_OFF+24
+	.cfi_rel_offset	$17, SP_OFF+32
+	.cfi_rel_offset	$18, SP_OFF+40
 	blbs	$3, strace
 	beq	$4, 1f
 	ldq	$27, 0($5)
@@ -310,6 +475,7 @@
 	stq	$31, 72($sp)		/* a3=0 => no error */
 
 	.align	4
+	.globl	ret_from_sys_call
 ret_from_sys_call:
 	cmovne	$26, 0, $18		/* $18 = 0 => non-restartable */
 	ldq	$0, SP_OFF($sp)
@@ -324,10 +490,12 @@
 	and	$17, _TIF_WORK_MASK, $2
 	bne	$2, work_pending
 restore_all:
+	.cfi_remember_state
 	RESTORE_ALL
 	call_pal PAL_rti
 
 ret_to_kernel:
+	.cfi_restore_state
 	lda	$16, 7
 	call_pal PAL_swpipl
 	br restore_all
@@ -356,7 +524,6 @@
 	stq	$0, 0($sp)
 	stq	$31, 72($sp)	/* a3=0 => no error */
 	br	ret_from_sys_call
-.end entSys
 
 /*
  * Do all cleanup when returning from all interrupts and system calls.
@@ -370,7 +537,7 @@
  */
 
 	.align	4
-	.ent	work_pending
+	.type	work_pending, @function
 work_pending:
 	and	$17, _TIF_NOTIFY_RESUME | _TIF_SIGPENDING, $2
 	bne	$2, $work_notifysig
@@ -387,23 +554,22 @@
 
 $work_notifysig:
 	mov	$sp, $16
-	bsr	$1, do_switch_stack
+	DO_SWITCH_STACK
 	jsr	$26, do_work_pending
-	bsr	$1, undo_switch_stack
+	UNDO_SWITCH_STACK
 	br	restore_all
-.end work_pending
 
 /*
  * PTRACE syscall handler
  */
 
 	.align	4
-	.ent	strace
+	.type	strace, @function
 strace:
 	/* set up signal stack, call syscall_trace */
-	bsr	$1, do_switch_stack
+	DO_SWITCH_STACK
 	jsr	$26, syscall_trace_enter /* returns the syscall number */
-	bsr	$1, undo_switch_stack
+	UNDO_SWITCH_STACK
 
 	/* get the arguments back.. */
 	ldq	$16, SP_OFF+24($sp)
@@ -431,9 +597,9 @@
 $strace_success:
 	stq	$0, 0($sp)		/* save return value */
 
-	bsr	$1, do_switch_stack
+	DO_SWITCH_STACK
 	jsr	$26, syscall_trace_leave
-	bsr	$1, undo_switch_stack
+	UNDO_SWITCH_STACK
 	br	$31, ret_from_sys_call
 
 	.align	3
@@ -447,26 +613,31 @@
 	stq	$0, 0($sp)
 	stq	$1, 72($sp)	/* a3 for return */
 
-	bsr	$1, do_switch_stack
+	DO_SWITCH_STACK
 	mov	$18, $9		/* save old syscall number */
 	mov	$19, $10	/* save old a3 */
 	jsr	$26, syscall_trace_leave
 	mov	$9, $18
 	mov	$10, $19
-	bsr	$1, undo_switch_stack
+	UNDO_SWITCH_STACK
 
 	mov	$31, $26	/* tell "ret_from_sys_call" we can restart */
 	br	ret_from_sys_call
-.end strace
+CFI_END_OSF_FRAME entSys
 
 /*
  * Save and restore the switch stack -- aka the balance of the user context.
  */
 
 	.align	4
-	.ent	do_switch_stack
+	.type	do_switch_stack, @function
+	.cfi_startproc simple
+	.cfi_return_column 64
+	.cfi_def_cfa $sp, 0
+	.cfi_register 64, $1
 do_switch_stack:
 	lda	$sp, -SWITCH_STACK_SIZE($sp)
+	.cfi_adjust_cfa_offset	SWITCH_STACK_SIZE
 	stq	$9, 0($sp)
 	stq	$10, 8($sp)
 	stq	$11, 16($sp)
@@ -510,10 +681,14 @@
 	stt	$f0, 312($sp)	# save fpcr in slot of $f31
 	ldt	$f0, 64($sp)	# dont let "do_switch_stack" change fp state.
 	ret	$31, ($1), 1
-.end do_switch_stack
+	.cfi_endproc
+	.size	do_switch_stack, .-do_switch_stack
 
 	.align	4
-	.ent	undo_switch_stack
+	.type	undo_switch_stack, @function
+	.cfi_startproc simple
+	.cfi_def_cfa $sp, 0
+	.cfi_register 64, $1
 undo_switch_stack:
 	ldq	$9, 0($sp)
 	ldq	$10, 8($sp)
@@ -558,7 +733,8 @@
 	ldt	$f30, 304($sp)
 	lda	$sp, SWITCH_STACK_SIZE($sp)
 	ret	$31, ($1), 1
-.end undo_switch_stack
+	.cfi_endproc
+	.size	undo_switch_stack, .-undo_switch_stack
 
 /*
  * The meat of the context switch code.
@@ -566,17 +742,18 @@
 
 	.align	4
 	.globl	alpha_switch_to
-	.ent	alpha_switch_to
+	.type	alpha_switch_to, @function
+	.cfi_startproc
 alpha_switch_to:
-	.prologue 0
-	bsr	$1, do_switch_stack
+	DO_SWITCH_STACK
 	call_pal PAL_swpctx
 	lda	$8, 0x3fff
-	bsr	$1, undo_switch_stack
+	UNDO_SWITCH_STACK
 	bic	$sp, $8, $8
 	mov	$17, $0
 	ret
-.end alpha_switch_to
+	.cfi_endproc
+	.size	alpha_switch_to, .-alpha_switch_to
 
 /*
  * New processes begin life here.
diff --git a/arch/alpha/kernel/irq_alpha.c b/arch/alpha/kernel/irq_alpha.c
index f433fc1..28e4429 100644
--- a/arch/alpha/kernel/irq_alpha.c
+++ b/arch/alpha/kernel/irq_alpha.c
@@ -236,7 +236,7 @@
 init_rtc_irq(void)
 {
 	irq_set_chip_and_handler_name(RTC_IRQ, &dummy_irq_chip,
-				      handle_simple_irq, "RTC");
+				      handle_percpu_irq, "RTC");
 	setup_irq(RTC_IRQ, &timer_irqaction);
 }
 
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 53b18a6..9dbbcb3 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -264,9 +264,10 @@
 		if (cnt <= 0 || cnt >= 80)
 			strcpy(buf, "<<< BOGUS MSG >>>");
 		else {
-			cp1 = (char *) &cpu->ipc_buffer[11];
+			cp1 = (char *) &cpu->ipc_buffer[1];
 			cp2 = buf;
-			strcpy(cp2, cp1);
+			memcpy(cp2, cp1, cnt);
+			cp2[cnt] = '\0';
 			
 			while ((cp2 = strchr(cp2, '\r')) != 0) {
 				*cp2 = ' ';
diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c
index 5bf401f..6c35159 100644
--- a/arch/alpha/kernel/sys_dp264.c
+++ b/arch/alpha/kernel/sys_dp264.c
@@ -190,9 +190,6 @@
 static void
 dp264_device_interrupt(unsigned long vector)
 {
-#if 1
-	printk("dp264_device_interrupt: NOT IMPLEMENTED YET!!\n");
-#else
 	unsigned long pld;
 	unsigned int i;
 
@@ -210,12 +207,7 @@
 			isa_device_interrupt(vector);
 		else
 			handle_irq(16 + i);
-#if 0
-		TSUNAMI_cchip->dir0.csr = 1UL << i; mb();
-		tmp = TSUNAMI_cchip->dir0.csr;
-#endif
 	}
-#endif
 }
 
 static void 
diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c
index 407accc..c92e389 100644
--- a/arch/alpha/kernel/sys_marvel.c
+++ b/arch/alpha/kernel/sys_marvel.c
@@ -317,8 +317,9 @@
 }
 
 static int 
-marvel_map_irq(struct pci_dev *dev, u8 slot, u8 pin)
+marvel_map_irq(const struct pci_dev *cdev, u8 slot, u8 pin)
 {
+	struct pci_dev *dev = (struct pci_dev *)cdev;
 	struct pci_controller *hose = dev->sysdata;
 	struct io7_port *io7_port = hose->sysdata;
 	struct io7 *io7 = io7_port->io7;
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index 4284ec7..dca9b3f 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -524,6 +524,8 @@
 	.quad sys_sendmmsg
 	.quad sys_process_vm_readv
 	.quad sys_process_vm_writev		/* 505 */
+	.quad sys_kcmp
+	.quad sys_finit_module
 
 	.size sys_call_table, . - sys_call_table
 	.type sys_call_table, @object
diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c
index e336694..ea33950 100644
--- a/arch/alpha/kernel/time.c
+++ b/arch/alpha/kernel/time.c
@@ -105,9 +105,7 @@
 
 static inline __u32 rpcc(void)
 {
-    __u32 result;
-    asm volatile ("rpcc %0" : "=r"(result));
-    return result;
+	return __builtin_alpha_rpcc();
 }
 
 int update_persistent_clock(struct timespec now)
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index be1fba3..bd0665c 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -66,8 +66,8 @@
 {
 	printk("pc = [<%016lx>]  ra = [<%016lx>]  ps = %04lx    %s\n",
 	       regs->pc, regs->r26, regs->ps, print_tainted());
-	print_symbol("pc is at %s\n", regs->pc);
-	print_symbol("ra is at %s\n", regs->r26 );
+	printk("pc is at %pSR\n", (void *)regs->pc);
+	printk("ra is at %pSR\n", (void *)regs->r26);
 	printk("v0 = %016lx  t0 = %016lx  t1 = %016lx\n",
 	       regs->r0, regs->r1, regs->r2);
 	printk("t2 = %016lx  t3 = %016lx  t4 = %016lx\n",
@@ -132,9 +132,7 @@
 			continue;
 		if (tmp >= (unsigned long) &_etext)
 			continue;
-		printk("[<%lx>]", tmp);
-		print_symbol(" %s", tmp);
-		printk("\n");
+		printk("[<%lx>] %pSR\n", tmp, (void *)tmp);
 		if (i > 40) {
 			printk(" ...");
 			break;
diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h
index 8943c02..df57611 100644
--- a/arch/arc/include/asm/entry.h
+++ b/arch/arc/include/asm/entry.h
@@ -38,6 +38,7 @@
 #include <asm/ptrace.h>
 #include <asm/processor.h>	/* For VMALLOC_START */
 #include <asm/thread_info.h>	/* For THREAD_SIZE */
+#include <asm/mmu.h>
 
 /* Note on the LD/ST addr modes with addr reg wback
  *
diff --git a/arch/arc/lib/strchr-700.S b/arch/arc/lib/strchr-700.S
index 99c1047..9c548c7 100644
--- a/arch/arc/lib/strchr-700.S
+++ b/arch/arc/lib/strchr-700.S
@@ -39,9 +39,18 @@
 	ld.a	r2,[r0,4]
 	sub	r12,r6,r7
 	bic	r12,r12,r6
+#ifdef __LITTLE_ENDIAN__
 	and	r7,r12,r4
 	breq	r7,0,.Loop ; For speed, we want this branch to be unaligned.
 	b	.Lfound_char ; Likewise this one.
+#else
+	and	r12,r12,r4
+	breq	r12,0,.Loop ; For speed, we want this branch to be unaligned.
+	lsr_s	r12,r12,7
+	bic 	r2,r7,r6
+	b.d	.Lfound_char_b
+	and_s	r2,r2,r12
+#endif
 ; /* We require this code address to be unaligned for speed...  */
 .Laligned:
 	ld_s	r2,[r0]
@@ -95,6 +104,7 @@
 	lsr	r7,r7,7
 
 	bic	r2,r7,r6
+.Lfound_char_b:
 	norm	r2,r2
 	sub_s	r0,r0,4
 	asr_s	r2,r2,3
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index ba412e0..43594d5 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -20,7 +20,6 @@
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select HARDIRQS_SW_RESEND
-	select HAVE_AOUT
 	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_SECCOMP_FILTER
@@ -218,7 +217,8 @@
 	default DRAM_BASE if REMAP_VECTORS_TO_RAM
 	default 0x00000000
 	help
-	  The base address of exception vectors.
+	  The base address of exception vectors.  This must be two pages
+	  in size.
 
 config ARM_PATCH_PHYS_VIRT
 	bool "Patch physical to virtual translations at runtime" if EMBEDDED
@@ -1600,8 +1600,7 @@
 config ARCH_NR_GPIO
 	int
 	default 1024 if ARCH_SHMOBILE || ARCH_TEGRA
-	default 512 if SOC_OMAP5
-	default 512 if ARCH_KEYSTONE
+	default 512 if ARCH_EXYNOS || ARCH_KEYSTONE || SOC_OMAP5
 	default 392 if ARCH_U8500
 	default 352 if ARCH_VT8500
 	default 288 if ARCH_SUNXI
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index e401a76..583f4a0 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -804,9 +804,19 @@
 
 config DEBUG_UNCOMPRESS
 	bool
-	default y if ARCH_MULTIPLATFORM && DEBUG_LL && \
-		     !DEBUG_OMAP2PLUS_UART && \
+	depends on ARCH_MULTIPLATFORM
+	default y if DEBUG_LL && !DEBUG_OMAP2PLUS_UART && \
 		     !DEBUG_TEGRA_UART
+	help
+	  This option influences the normal decompressor output for
+	  multiplatform kernels.  Normally, multiplatform kernels disable
+	  decompressor output because it is not possible to know where to
+	  send the decompressor output.
+
+	  When this option is set, the selected DEBUG_LL output method
+	  will be re-used for normal decompressor output on multiplatform
+	  kernels.
+	  
 
 config UNCOMPRESS_INCLUDE
 	string
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index c0ac0f5..6fd2cea 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -153,6 +153,7 @@
 machine-$(CONFIG_ARCH_DOVE)		+= dove
 machine-$(CONFIG_ARCH_EBSA110)		+= ebsa110
 machine-$(CONFIG_ARCH_EP93XX)		+= ep93xx
+machine-$(CONFIG_ARCH_EXYNOS)		+= exynos
 machine-$(CONFIG_ARCH_GEMINI)		+= gemini
 machine-$(CONFIG_ARCH_HIGHBANK)		+= highbank
 machine-$(CONFIG_ARCH_INTEGRATOR)	+= integrator
@@ -160,15 +161,16 @@
 machine-$(CONFIG_ARCH_IOP32X)		+= iop32x
 machine-$(CONFIG_ARCH_IOP33X)		+= iop33x
 machine-$(CONFIG_ARCH_IXP4XX)		+= ixp4xx
+machine-$(CONFIG_ARCH_KEYSTONE)		+= keystone
 machine-$(CONFIG_ARCH_KIRKWOOD)		+= kirkwood
 machine-$(CONFIG_ARCH_KS8695)		+= ks8695
 machine-$(CONFIG_ARCH_LPC32XX)		+= lpc32xx
 machine-$(CONFIG_ARCH_MMP)		+= mmp
 machine-$(CONFIG_ARCH_MSM)		+= msm
 machine-$(CONFIG_ARCH_MV78XX0)		+= mv78xx0
+machine-$(CONFIG_ARCH_MVEBU)		+= mvebu
 machine-$(CONFIG_ARCH_MXC)		+= imx
 machine-$(CONFIG_ARCH_MXS)		+= mxs
-machine-$(CONFIG_ARCH_MVEBU)		+= mvebu
 machine-$(CONFIG_ARCH_NETX)		+= netx
 machine-$(CONFIG_ARCH_NOMADIK)		+= nomadik
 machine-$(CONFIG_ARCH_NSPIRE)		+= nspire
@@ -176,7 +178,6 @@
 machine-$(CONFIG_ARCH_OMAP2PLUS)	+= omap2
 machine-$(CONFIG_ARCH_ORION5X)		+= orion5x
 machine-$(CONFIG_ARCH_PICOXCELL)	+= picoxcell
-machine-$(CONFIG_ARCH_SIRF)		+= prima2
 machine-$(CONFIG_ARCH_PXA)		+= pxa
 machine-$(CONFIG_ARCH_REALVIEW)		+= realview
 machine-$(CONFIG_ARCH_ROCKCHIP)		+= rockchip
@@ -186,25 +187,24 @@
 machine-$(CONFIG_ARCH_S5P64X0)		+= s5p64x0
 machine-$(CONFIG_ARCH_S5PC100)		+= s5pc100
 machine-$(CONFIG_ARCH_S5PV210)		+= s5pv210
-machine-$(CONFIG_ARCH_EXYNOS)		+= exynos
 machine-$(CONFIG_ARCH_SA1100)		+= sa1100
 machine-$(CONFIG_ARCH_SHARK)		+= shark
 machine-$(CONFIG_ARCH_SHMOBILE) 	+= shmobile
+machine-$(CONFIG_ARCH_SIRF)		+= prima2
+machine-$(CONFIG_ARCH_SOCFPGA)		+= socfpga
+machine-$(CONFIG_ARCH_STI)		+= sti
+machine-$(CONFIG_ARCH_SUNXI)		+= sunxi
 machine-$(CONFIG_ARCH_TEGRA)		+= tegra
 machine-$(CONFIG_ARCH_U300)		+= u300
 machine-$(CONFIG_ARCH_U8500)		+= ux500
 machine-$(CONFIG_ARCH_VERSATILE)	+= versatile
 machine-$(CONFIG_ARCH_VEXPRESS)		+= vexpress
+machine-$(CONFIG_ARCH_VIRT)		+= virt
 machine-$(CONFIG_ARCH_VT8500)		+= vt8500
 machine-$(CONFIG_ARCH_W90X900)		+= w90x900
-machine-$(CONFIG_FOOTBRIDGE)		+= footbridge
-machine-$(CONFIG_ARCH_SOCFPGA)		+= socfpga
-machine-$(CONFIG_PLAT_SPEAR)		+= spear
-machine-$(CONFIG_ARCH_STI)		+= sti
-machine-$(CONFIG_ARCH_VIRT)		+= virt
 machine-$(CONFIG_ARCH_ZYNQ)		+= zynq
-machine-$(CONFIG_ARCH_SUNXI)		+= sunxi
-machine-$(CONFIG_ARCH_KEYSTONE)		+= keystone
+machine-$(CONFIG_FOOTBRIDGE)		+= footbridge
+machine-$(CONFIG_PLAT_SPEAR)		+= spear
 
 # Platform directory name.  This list is sorted alphanumerically
 # by CONFIG_* macro name.
diff --git a/arch/arm/boot/dts/at91sam9n12ek.dts b/arch/arm/boot/dts/at91sam9n12ek.dts
index d59b70c..3d77dbe 100644
--- a/arch/arm/boot/dts/at91sam9n12ek.dts
+++ b/arch/arm/boot/dts/at91sam9n12ek.dts
@@ -14,11 +14,11 @@
 	compatible = "atmel,at91sam9n12ek", "atmel,at91sam9n12", "atmel,at91sam9";
 
 	chosen {
-		bootargs = "mem=128M console=ttyS0,115200 root=/dev/mtdblock1 rw rootfstype=jffs2";
+		bootargs = "console=ttyS0,115200 root=/dev/mtdblock1 rw rootfstype=jffs2";
 	};
 
 	memory {
-		reg = <0x20000000 0x10000000>;
+		reg = <0x20000000 0x8000000>;
 	};
 
 	clocks {
diff --git a/arch/arm/boot/dts/at91sam9x5ek.dtsi b/arch/arm/boot/dts/at91sam9x5ek.dtsi
index b753855..49e3c45 100644
--- a/arch/arm/boot/dts/at91sam9x5ek.dtsi
+++ b/arch/arm/boot/dts/at91sam9x5ek.dtsi
@@ -94,8 +94,9 @@
 
 		usb0: ohci@00600000 {
 			status = "okay";
-			num-ports = <2>;
-			atmel,vbus-gpio = <&pioD 19 GPIO_ACTIVE_LOW
+			num-ports = <3>;
+			atmel,vbus-gpio = <0 /* &pioD 18 GPIO_ACTIVE_LOW *//* Activate to have access to port A */
+					   &pioD 19 GPIO_ACTIVE_LOW
 					   &pioD 20 GPIO_ACTIVE_LOW
 					  >;
 		};
diff --git a/arch/arm/boot/dts/atlas6.dtsi b/arch/arm/boot/dts/atlas6.dtsi
index 9866cd7..a0f2721 100644
--- a/arch/arm/boot/dts/atlas6.dtsi
+++ b/arch/arm/boot/dts/atlas6.dtsi
@@ -485,6 +485,12 @@
                                                 sirf,function = "usp0";
                                         };
                                 };
+				usp0_uart_nostreamctrl_pins_a: usp0@1 {
+                                        usp0 {
+                                                sirf,pins = "usp0_uart_nostreamctrl_grp";
+                                                sirf,function = "usp0_uart_nostreamctrl";
+                                        };
+                                };
                                 usp1_pins_a: usp1@0 {
                                         usp1 {
                                                 sirf,pins = "usp1grp";
@@ -515,16 +521,16 @@
                                                 sirf,function = "pulse_count";
                                         };
                                 };
-                                cko0_rst_pins_a: cko0_rst@0 {
-                                        cko0_rst {
-                                                sirf,pins = "cko0_rstgrp";
-                                                sirf,function = "cko0_rst";
+                                cko0_pins_a: cko0@0 {
+                                        cko0 {
+                                                sirf,pins = "cko0grp";
+                                                sirf,function = "cko0";
                                         };
                                 };
-                                cko1_rst_pins_a: cko1_rst@0 {
-                                        cko1_rst {
-                                                sirf,pins = "cko1_rstgrp";
-                                                sirf,function = "cko1_rst";
+                                cko1_pins_a: cko1@0 {
+                                        cko1 {
+                                                sirf,pins = "cko1grp";
+                                                sirf,function = "cko1";
                                         };
                                 };
 			};
diff --git a/arch/arm/boot/dts/imx28-apx4devkit.dts b/arch/arm/boot/dts/imx28-apx4devkit.dts
index 43bf3c7..0e7fed4 100644
--- a/arch/arm/boot/dts/imx28-apx4devkit.dts
+++ b/arch/arm/boot/dts/imx28-apx4devkit.dts
@@ -147,7 +147,7 @@
 					reg = <0x0a>;
 					VDDA-supply = <&reg_3p3v>;
 					VDDIO-supply = <&reg_3p3v>;
-
+					clocks = <&saif0>;
 				};
 
 				pcf8563: rtc@51 {
diff --git a/arch/arm/boot/dts/imx28-evk.dts b/arch/arm/boot/dts/imx28-evk.dts
index 1f0d38d..e035f46 100644
--- a/arch/arm/boot/dts/imx28-evk.dts
+++ b/arch/arm/boot/dts/imx28-evk.dts
@@ -195,7 +195,7 @@
 					reg = <0x0a>;
 					VDDA-supply = <&reg_3p3v>;
 					VDDIO-supply = <&reg_3p3v>;
-
+					clocks = <&saif0>;
 				};
 
 				at24@51 {
diff --git a/arch/arm/boot/dts/imx28-m28evk.dts b/arch/arm/boot/dts/imx28-m28evk.dts
index 880df2f..44d9da5 100644
--- a/arch/arm/boot/dts/imx28-m28evk.dts
+++ b/arch/arm/boot/dts/imx28-m28evk.dts
@@ -184,7 +184,7 @@
 					reg = <0x0a>;
 					VDDA-supply = <&reg_3p3v>;
 					VDDIO-supply = <&reg_3p3v>;
-
+					clocks = <&saif0>;
 				};
 
 				eeprom: eeprom@51 {
diff --git a/arch/arm/boot/dts/imx28.dtsi b/arch/arm/boot/dts/imx28.dtsi
index 6a8acb0..9524a05 100644
--- a/arch/arm/boot/dts/imx28.dtsi
+++ b/arch/arm/boot/dts/imx28.dtsi
@@ -837,6 +837,7 @@
 				compatible = "fsl,imx28-saif";
 				reg = <0x80042000 0x2000>;
 				interrupts = <59 80>;
+				#clock-cells = <0>;
 				clocks = <&clks 53>;
 				dmas = <&dma_apbx 4>;
 				dma-names = "rx-tx";
diff --git a/arch/arm/boot/dts/imx51-babbage.dts b/arch/arm/boot/dts/imx51-babbage.dts
index 6dd9486..ad3471c 100644
--- a/arch/arm/boot/dts/imx51-babbage.dts
+++ b/arch/arm/boot/dts/imx51-babbage.dts
@@ -61,6 +61,16 @@
 		mux-int-port = <2>;
 		mux-ext-port = <3>;
 	};
+
+	clocks {
+		clk_26M: codec_clock {
+			compatible = "fixed-clock";
+			reg=<0>;
+			#clock-cells = <0>;
+			clock-frequency = <26000000>;
+			gpios = <&gpio4 26 1>;
+		};
+	};
 };
 
 &esdhc1 {
@@ -229,6 +239,7 @@
 				MX51_PAD_EIM_A27__GPIO2_21   0x5
 				MX51_PAD_CSPI1_SS0__GPIO4_24 0x85
 				MX51_PAD_CSPI1_SS1__GPIO4_25 0x85
+				MX51_PAD_CSPI1_RDY__GPIO4_26 0x80000000
 			>;
 		};
 	};
@@ -255,7 +266,7 @@
 	sgtl5000: codec@0a {
 		compatible = "fsl,sgtl5000";
 		reg = <0x0a>;
-		clock-frequency = <26000000>;
+		clocks = <&clk_26M>;
 		VDDA-supply = <&vdig_reg>;
 		VDDIO-supply = <&vvideo_reg>;
 	};
diff --git a/arch/arm/boot/dts/imx53-mba53.dts b/arch/arm/boot/dts/imx53-mba53.dts
index aaa33bc..a630902 100644
--- a/arch/arm/boot/dts/imx53-mba53.dts
+++ b/arch/arm/boot/dts/imx53-mba53.dts
@@ -27,7 +27,7 @@
 
 	backlight {
 		compatible = "pwm-backlight";
-		pwms = <&pwm2 0 50000 0 0>;
+		pwms = <&pwm2 0 50000>;
 		brightness-levels = <0 24 28 32 36 40 44 48 52 56 60 64 68 72 76 80 84 88 92 96 100>;
 		default-brightness-level = <10>;
 		enable-gpios = <&gpio7 7 0>;
diff --git a/arch/arm/boot/dts/imx53.dtsi b/arch/arm/boot/dts/imx53.dtsi
index 3895fbb..569aa9f 100644
--- a/arch/arm/boot/dts/imx53.dtsi
+++ b/arch/arm/boot/dts/imx53.dtsi
@@ -725,15 +725,15 @@
 				uart1 {
 					pinctrl_uart1_1: uart1grp-1 {
 						fsl,pins = <
-							MX53_PAD_CSI0_DAT10__UART1_TXD_MUX 0x1c5
-							MX53_PAD_CSI0_DAT11__UART1_RXD_MUX 0x1c5
+							MX53_PAD_CSI0_DAT10__UART1_TXD_MUX 0x1e4
+							MX53_PAD_CSI0_DAT11__UART1_RXD_MUX 0x1e4
 						>;
 					};
 
 					pinctrl_uart1_2: uart1grp-2 {
 						fsl,pins = <
-							MX53_PAD_PATA_DIOW__UART1_TXD_MUX  0x1c5
-							MX53_PAD_PATA_DMACK__UART1_RXD_MUX 0x1c5
+							MX53_PAD_PATA_DIOW__UART1_TXD_MUX  0x1e4
+							MX53_PAD_PATA_DMACK__UART1_RXD_MUX 0x1e4
 						>;
 					};
 
@@ -748,8 +748,8 @@
 				uart2 {
 					pinctrl_uart2_1: uart2grp-1 {
 						fsl,pins = <
-							MX53_PAD_PATA_BUFFER_EN__UART2_RXD_MUX 0x1c5
-							MX53_PAD_PATA_DMARQ__UART2_TXD_MUX     0x1c5
+							MX53_PAD_PATA_BUFFER_EN__UART2_RXD_MUX 0x1e4
+							MX53_PAD_PATA_DMARQ__UART2_TXD_MUX     0x1e4
 						>;
 					};
 
@@ -766,17 +766,17 @@
 				uart3 {
 					pinctrl_uart3_1: uart3grp-1 {
 						fsl,pins = <
-							MX53_PAD_PATA_CS_0__UART3_TXD_MUX 0x1c5
-							MX53_PAD_PATA_CS_1__UART3_RXD_MUX 0x1c5
-							MX53_PAD_PATA_DA_1__UART3_CTS	  0x1c5
-							MX53_PAD_PATA_DA_2__UART3_RTS	  0x1c5
+							MX53_PAD_PATA_CS_0__UART3_TXD_MUX 0x1e4
+							MX53_PAD_PATA_CS_1__UART3_RXD_MUX 0x1e4
+							MX53_PAD_PATA_DA_1__UART3_CTS	  0x1e4
+							MX53_PAD_PATA_DA_2__UART3_RTS	  0x1e4
 						>;
 					};
 
 					pinctrl_uart3_2: uart3grp-2 {
 						fsl,pins = <
-							MX53_PAD_PATA_CS_0__UART3_TXD_MUX 0x1c5
-							MX53_PAD_PATA_CS_1__UART3_RXD_MUX 0x1c5
+							MX53_PAD_PATA_CS_0__UART3_TXD_MUX 0x1e4
+							MX53_PAD_PATA_CS_1__UART3_RXD_MUX 0x1e4
 						>;
 					};
 
@@ -785,8 +785,8 @@
 				uart4 {
 					pinctrl_uart4_1: uart4grp-1 {
 						fsl,pins = <
-							MX53_PAD_KEY_COL0__UART4_TXD_MUX 0x1c5
-							MX53_PAD_KEY_ROW0__UART4_RXD_MUX 0x1c5
+							MX53_PAD_KEY_COL0__UART4_TXD_MUX 0x1e4
+							MX53_PAD_KEY_ROW0__UART4_RXD_MUX 0x1e4
 						>;
 					};
 				};
@@ -794,8 +794,8 @@
 				uart5 {
 					pinctrl_uart5_1: uart5grp-1 {
 						fsl,pins = <
-							MX53_PAD_KEY_COL1__UART5_TXD_MUX 0x1c5
-							MX53_PAD_KEY_ROW1__UART5_RXD_MUX 0x1c5
+							MX53_PAD_KEY_COL1__UART5_TXD_MUX 0x1e4
+							MX53_PAD_KEY_ROW1__UART5_RXD_MUX 0x1e4
 						>;
 					};
 				};
diff --git a/arch/arm/boot/dts/msm8960-cdp.dts b/arch/arm/boot/dts/msm8960-cdp.dts
index db2060c..9c1167b0 100644
--- a/arch/arm/boot/dts/msm8960-cdp.dts
+++ b/arch/arm/boot/dts/msm8960-cdp.dts
@@ -26,7 +26,7 @@
 		cpu-offset = <0x80000>;
 	};
 
-	msmgpio: gpio@fd510000 {
+	msmgpio: gpio@800000 {
 		compatible = "qcom,msm-gpio";
 		gpio-controller;
 		#gpio-cells = <2>;
@@ -34,7 +34,7 @@
 		interrupts = <0 32 0x4>;
 		interrupt-controller;
 		#interrupt-cells = <2>;
-		reg = <0xfd510000 0x4000>;
+		reg = <0x800000 0x4000>;
 	};
 
 	serial@16440000 {
diff --git a/arch/arm/boot/dts/omap5-uevm.dts b/arch/arm/boot/dts/omap5-uevm.dts
index 08b7267..65d7b60 100644
--- a/arch/arm/boot/dts/omap5-uevm.dts
+++ b/arch/arm/boot/dts/omap5-uevm.dts
@@ -235,7 +235,7 @@
 };
 
 &mmc1 {
-	vmmc-supply = <&vmmcsd_fixed>;
+	vmmc-supply = <&ldo9_reg>;
 	bus-width = <4>;
 };
 
@@ -282,6 +282,7 @@
 
 			regulators {
 				smps123_reg: smps123 {
+					/* VDD_OPP_MPU */
 					regulator-name = "smps123";
 					regulator-min-microvolt = < 600000>;
 					regulator-max-microvolt = <1500000>;
@@ -290,6 +291,7 @@
 				};
 
 				smps45_reg: smps45 {
+					/* VDD_OPP_MM */
 					regulator-name = "smps45";
 					regulator-min-microvolt = < 600000>;
 					regulator-max-microvolt = <1310000>;
@@ -298,6 +300,7 @@
 				};
 
 				smps6_reg: smps6 {
+					/* VDD_DDR3 - over VDD_SMPS6 */
 					regulator-name = "smps6";
 					regulator-min-microvolt = <1200000>;
 					regulator-max-microvolt = <1200000>;
@@ -306,6 +309,7 @@
 				};
 
 				smps7_reg: smps7 {
+					/* VDDS_1v8_OMAP over VDDS_1v8_MAIN */
 					regulator-name = "smps7";
 					regulator-min-microvolt = <1800000>;
 					regulator-max-microvolt = <1800000>;
@@ -314,6 +318,7 @@
 				};
 
 				smps8_reg: smps8 {
+					/* VDD_OPP_CORE */
 					regulator-name = "smps8";
 					regulator-min-microvolt = < 600000>;
 					regulator-max-microvolt = <1310000>;
@@ -322,15 +327,15 @@
 				};
 
 				smps9_reg: smps9 {
+					/* VDDA_2v1_AUD over VDD_2v1 */
 					regulator-name = "smps9";
 					regulator-min-microvolt = <2100000>;
 					regulator-max-microvolt = <2100000>;
-					regulator-always-on;
-					regulator-boot-on;
 					ti,smps-range = <0x80>;
 				};
 
 				smps10_reg: smps10 {
+					/* VBUS_5V_OTG */
 					regulator-name = "smps10";
 					regulator-min-microvolt = <5000000>;
 					regulator-max-microvolt = <5000000>;
@@ -339,38 +344,40 @@
 				};
 
 				ldo1_reg: ldo1 {
+					/* VDDAPHY_CAM: vdda_csiport */
 					regulator-name = "ldo1";
-					regulator-min-microvolt = <2800000>;
-					regulator-max-microvolt = <2800000>;
-					regulator-always-on;
-					regulator-boot-on;
+					regulator-min-microvolt = <1500000>;
+					regulator-max-microvolt = <1800000>;
 				};
 
 				ldo2_reg: ldo2 {
+					/* VCC_2V8_DISP: Does not go anywhere */
 					regulator-name = "ldo2";
-					regulator-min-microvolt = <2900000>;
-					regulator-max-microvolt = <2900000>;
-					regulator-always-on;
-					regulator-boot-on;
+					regulator-min-microvolt = <2800000>;
+					regulator-max-microvolt = <2800000>;
+					/* Unused */
+					status = "disabled";
 				};
 
 				ldo3_reg: ldo3 {
+					/* VDDAPHY_MDM: vdda_lli */
 					regulator-name = "ldo3";
-					regulator-min-microvolt = <3000000>;
-					regulator-max-microvolt = <3000000>;
-					regulator-always-on;
+					regulator-min-microvolt = <1500000>;
+					regulator-max-microvolt = <1500000>;
 					regulator-boot-on;
+					/* Only if Modem is used */
+					status = "disabled";
 				};
 
 				ldo4_reg: ldo4 {
+					/* VDDAPHY_DISP: vdda_dsiport/hdmi */
 					regulator-name = "ldo4";
-					regulator-min-microvolt = <2200000>;
-					regulator-max-microvolt = <2200000>;
-					regulator-always-on;
-					regulator-boot-on;
+					regulator-min-microvolt = <1500000>;
+					regulator-max-microvolt = <1800000>;
 				};
 
 				ldo5_reg: ldo5 {
+					/* VDDA_1V8_PHY: usb/sata/hdmi.. */
 					regulator-name = "ldo5";
 					regulator-min-microvolt = <1800000>;
 					regulator-max-microvolt = <1800000>;
@@ -379,38 +386,43 @@
 				};
 
 				ldo6_reg: ldo6 {
+					/* VDDS_1V2_WKUP: hsic/ldo_emu_wkup */
 					regulator-name = "ldo6";
-					regulator-min-microvolt = <1500000>;
-					regulator-max-microvolt = <1500000>;
+					regulator-min-microvolt = <1200000>;
+					regulator-max-microvolt = <1200000>;
 					regulator-always-on;
 					regulator-boot-on;
 				};
 
 				ldo7_reg: ldo7 {
+					/* VDD_VPP: vpp1 */
 					regulator-name = "ldo7";
-					regulator-min-microvolt = <1500000>;
-					regulator-max-microvolt = <1500000>;
-					regulator-always-on;
-					regulator-boot-on;
+					regulator-min-microvolt = <2000000>;
+					regulator-max-microvolt = <2000000>;
+					/* Only for efuse reprograming! */
+					status = "disabled";
 				};
 
 				ldo8_reg: ldo8 {
+					/* VDD_3v0: Does not go anywhere */
 					regulator-name = "ldo8";
-					regulator-min-microvolt = <1500000>;
-					regulator-max-microvolt = <1500000>;
-					regulator-always-on;
+					regulator-min-microvolt = <3000000>;
+					regulator-max-microvolt = <3000000>;
 					regulator-boot-on;
+					/* Unused */
+					status = "disabled";
 				};
 
 				ldo9_reg: ldo9 {
+					/* VCC_DV_SDIO: vdds_sdcard */
 					regulator-name = "ldo9";
 					regulator-min-microvolt = <1800000>;
-					regulator-max-microvolt = <3300000>;
-					regulator-always-on;
+					regulator-max-microvolt = <3000000>;
 					regulator-boot-on;
 				};
 
 				ldoln_reg: ldoln {
+					/* VDDA_1v8_REF: vdds_osc/mm_l4per.. */
 					regulator-name = "ldoln";
 					regulator-min-microvolt = <1800000>;
 					regulator-max-microvolt = <1800000>;
@@ -419,12 +431,20 @@
 				};
 
 				ldousb_reg: ldousb {
+					/* VDDA_3V_USB: VDDA_USBHS33 */
 					regulator-name = "ldousb";
 					regulator-min-microvolt = <3250000>;
 					regulator-max-microvolt = <3250000>;
 					regulator-always-on;
 					regulator-boot-on;
 				};
+
+				regen3_reg: regen3 {
+					/* REGEN3 controls LDO9 supply to card */
+					regulator-name = "regen3";
+					regulator-always-on;
+					regulator-boot-on;
+				};
 			};
 		};
 	};
diff --git a/arch/arm/boot/dts/prima2.dtsi b/arch/arm/boot/dts/prima2.dtsi
index 05e9489..bbeb623 100644
--- a/arch/arm/boot/dts/prima2.dtsi
+++ b/arch/arm/boot/dts/prima2.dtsi
@@ -515,16 +515,16 @@
                                                 sirf,function = "pulse_count";
                                         };
                                 };
-                                cko0_rst_pins_a: cko0_rst@0 {
-                                        cko0_rst {
-                                                sirf,pins = "cko0_rstgrp";
-                                                sirf,function = "cko0_rst";
+                                cko0_pins_a: cko0@0 {
+                                        cko0 {
+                                                sirf,pins = "cko0grp";
+                                                sirf,function = "cko0";
                                         };
                                 };
-                                cko1_rst_pins_a: cko1_rst@0 {
-                                        cko1_rst {
-                                                sirf,pins = "cko1_rstgrp";
-                                                sirf,function = "cko1_rst";
+                                cko1_pins_a: cko1@0 {
+                                        cko1 {
+                                                sirf,pins = "cko1grp";
+                                                sirf,function = "cko1";
                                         };
                                 };
 			};
diff --git a/arch/arm/boot/dts/stih416-pinctrl.dtsi b/arch/arm/boot/dts/stih416-pinctrl.dtsi
index 957b21a..0f246c9 100644
--- a/arch/arm/boot/dts/stih416-pinctrl.dtsi
+++ b/arch/arm/boot/dts/stih416-pinctrl.dtsi
@@ -166,6 +166,15 @@
 				reg		= <0x9000 0x100>;
 				st,bank-name	= "PIO31";
 			};
+
+			serial2-oe {
+				pinctrl_serial2_oe: serial2-1 {
+					st,pins {
+						output-enable	= <&PIO11 3 ALT2 OUT>;
+					};
+				};
+			};
+
 		};
 
 		pin-controller-rear {
@@ -218,7 +227,6 @@
 					st,pins {
 						tx	= <&PIO17 4 ALT2 OUT>;
 						rx	= <&PIO17 5 ALT2 IN>;
-						output-enable	= <&PIO11 3 ALT2 OUT>;
 					};
 				};
 			};
diff --git a/arch/arm/boot/dts/stih416.dtsi b/arch/arm/boot/dts/stih416.dtsi
index 3cecd96..1a0326e 100644
--- a/arch/arm/boot/dts/stih416.dtsi
+++ b/arch/arm/boot/dts/stih416.dtsi
@@ -79,7 +79,7 @@
 			interrupts	= <0 197 0>;
 			clocks          = <&CLK_S_ICN_REG_0>;
 			pinctrl-names 	= "default";
-			pinctrl-0 	= <&pinctrl_serial2>;
+			pinctrl-0 	= <&pinctrl_serial2 &pinctrl_serial2_oe>;
 		};
 
 		/* SBC_UART1 */
diff --git a/arch/arm/boot/dts/stih41x.dtsi b/arch/arm/boot/dts/stih41x.dtsi
index 7321403..f5b9898 100644
--- a/arch/arm/boot/dts/stih41x.dtsi
+++ b/arch/arm/boot/dts/stih41x.dtsi
@@ -6,10 +6,12 @@
 		#address-cells = <1>;
 		#size-cells = <0>;
 		cpu@0 {
+			device_type = "cpu";
 			compatible = "arm,cortex-a9";
 			reg = <0>;
 		};
 		cpu@1 {
+			device_type = "cpu";
 			compatible = "arm,cortex-a9";
 			reg = <1>;
 		};
diff --git a/arch/arm/boot/dts/tegra20-colibri-512.dtsi b/arch/arm/boot/dts/tegra20-colibri-512.dtsi
index 2fcb3f2..5592be6 100644
--- a/arch/arm/boot/dts/tegra20-colibri-512.dtsi
+++ b/arch/arm/boot/dts/tegra20-colibri-512.dtsi
@@ -457,6 +457,7 @@
 	};
 
 	usb-phy@c5004000 {
+		status = "okay";
 		nvidia,phy-reset-gpio = <&gpio TEGRA_GPIO(V, 1)
 			GPIO_ACTIVE_LOW>;
 	};
diff --git a/arch/arm/boot/dts/tegra20-seaboard.dts b/arch/arm/boot/dts/tegra20-seaboard.dts
index 365760b..40e6fb2 100644
--- a/arch/arm/boot/dts/tegra20-seaboard.dts
+++ b/arch/arm/boot/dts/tegra20-seaboard.dts
@@ -830,6 +830,8 @@
 			regulator-max-microvolt = <5000000>;
 			enable-active-high;
 			gpio = <&gpio 24 0>; /* PD0 */
+			regulator-always-on;
+			regulator-boot-on;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/tegra20-trimslice.dts b/arch/arm/boot/dts/tegra20-trimslice.dts
index ed4b901..37c93d3 100644
--- a/arch/arm/boot/dts/tegra20-trimslice.dts
+++ b/arch/arm/boot/dts/tegra20-trimslice.dts
@@ -412,6 +412,8 @@
 			regulator-max-microvolt = <5000000>;
 			enable-active-high;
 			gpio = <&gpio 170 0>; /* PV2 */
+			regulator-always-on;
+			regulator-boot-on;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/tegra20-whistler.dts b/arch/arm/boot/dts/tegra20-whistler.dts
index ab67c94..a3d0eba 100644
--- a/arch/arm/boot/dts/tegra20-whistler.dts
+++ b/arch/arm/boot/dts/tegra20-whistler.dts
@@ -588,6 +588,8 @@
 			regulator-max-microvolt = <5000000>;
 			enable-active-high;
 			gpio = <&tca6416 0 0>; /* GPIO_PMU0 */
+			regulator-always-on;
+			regulator-boot-on;
 		};
 
 		vbus3_reg: regulator@3 {
@@ -598,6 +600,8 @@
 			regulator-max-microvolt = <5000000>;
 			enable-active-high;
 			gpio = <&tca6416 1 0>; /* GPIO_PMU1 */
+			regulator-always-on;
+			regulator-boot-on;
 		};
 	};
 
diff --git a/arch/arm/boot/dts/twl4030.dtsi b/arch/arm/boot/dts/twl4030.dtsi
index b3034da..ae6a17a 100644
--- a/arch/arm/boot/dts/twl4030.dtsi
+++ b/arch/arm/boot/dts/twl4030.dtsi
@@ -47,6 +47,12 @@
 		regulator-max-microvolt = <3150000>;
 	};
 
+	vmmc2: regulator-vmmc2 {
+		compatible = "ti,twl4030-vmmc2";
+		regulator-min-microvolt = <1850000>;
+		regulator-max-microvolt = <3150000>;
+	};
+
 	vusb1v5: regulator-vusb1v5 {
 		compatible = "ti,twl4030-vusb1v5";
 	};
diff --git a/arch/arm/boot/dts/vf610.dtsi b/arch/arm/boot/dts/vf610.dtsi
index e1eb7da..67d929c 100644
--- a/arch/arm/boot/dts/vf610.dtsi
+++ b/arch/arm/boot/dts/vf610.dtsi
@@ -442,8 +442,8 @@
 				compatible = "fsl,mvf600-fec";
 				reg = <0x400d0000 0x1000>;
 				interrupts = <0 78 0x04>;
-				clocks = <&clks VF610_CLK_ENET>,
-					<&clks VF610_CLK_ENET>,
+				clocks = <&clks VF610_CLK_ENET0>,
+					<&clks VF610_CLK_ENET0>,
 					<&clks VF610_CLK_ENET>;
 				clock-names = "ipg", "ahb", "ptp";
 				status = "disabled";
@@ -453,8 +453,8 @@
 				compatible = "fsl,mvf600-fec";
 				reg = <0x400d1000 0x1000>;
 				interrupts = <0 79 0x04>;
-				clocks = <&clks VF610_CLK_ENET>,
-					<&clks VF610_CLK_ENET>,
+				clocks = <&clks VF610_CLK_ENET1>,
+					<&clks VF610_CLK_ENET1>,
 					<&clks VF610_CLK_ENET>;
 				clock-names = "ipg", "ahb", "ptp";
 				status = "disabled";
diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
index a432e6c..39ad030 100644
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -26,7 +26,6 @@
 #include <linux/io.h>
 #include <linux/slab.h>
 #include <linux/edma.h>
-#include <linux/err.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/of_dma.h>
diff --git a/arch/arm/configs/da8xx_omapl_defconfig b/arch/arm/configs/da8xx_omapl_defconfig
index 7c86813..1571bea 100644
--- a/arch/arm/configs/da8xx_omapl_defconfig
+++ b/arch/arm/configs/da8xx_omapl_defconfig
@@ -102,6 +102,8 @@
 CONFIG_SND_DAVINCI_SOC=m
 # CONFIG_HID_SUPPORT is not set
 # CONFIG_USB_SUPPORT is not set
+CONFIG_DMADEVICES=y
+CONFIG_TI_EDMA=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
 CONFIG_XFS_FS=m
diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig
index c86fd75..ab2f737 100644
--- a/arch/arm/configs/davinci_all_defconfig
+++ b/arch/arm/configs/davinci_all_defconfig
@@ -162,6 +162,8 @@
 CONFIG_LEDS_TRIGGER_TIMER=m
 CONFIG_LEDS_TRIGGER_HEARTBEAT=m
 CONFIG_RTC_CLASS=y
+CONFIG_DMADEVICES=y
+CONFIG_TI_EDMA=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
 CONFIG_XFS_FS=m
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index fe0bdc3..6e572c6 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -53,6 +53,7 @@
 CONFIG_IP_PNP_DHCP=y
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_OMAP_OCP2SCP=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_ATA=y
 CONFIG_SATA_AHCI_PLATFORM=y
@@ -61,6 +62,7 @@
 CONFIG_NETDEVICES=y
 CONFIG_SUN4I_EMAC=y
 CONFIG_NET_CALXEDA_XGMAC=y
+CONFIG_KS8851=y
 CONFIG_SMSC911X=y
 CONFIG_STMMAC_ETH=y
 CONFIG_MDIO_SUN4I=y
@@ -89,6 +91,7 @@
 CONFIG_I2C_SIRF=y
 CONFIG_I2C_TEGRA=y
 CONFIG_SPI=y
+CONFIG_SPI_OMAP24XX=y
 CONFIG_SPI_PL022=y
 CONFIG_SPI_SIRF=y
 CONFIG_SPI_TEGRA114=y
@@ -111,11 +114,12 @@
 CONFIG_USB=y
 CONFIG_USB_XHCI_HCD=y
 CONFIG_USB_EHCI_HCD=y
-CONFIG_USB_EHCI_MXC=y
 CONFIG_USB_EHCI_TEGRA=y
 CONFIG_USB_EHCI_HCD_PLATFORM=y
 CONFIG_USB_ISP1760_HCD=y
 CONFIG_USB_STORAGE=y
+CONFIG_USB_CHIPIDEA=y
+CONFIG_USB_CHIPIDEA_HOST=y
 CONFIG_AB8500_USB=y
 CONFIG_NOP_USB_XCEIV=y
 CONFIG_OMAP_USB2=y
diff --git a/arch/arm/configs/nhk8815_defconfig b/arch/arm/configs/nhk8815_defconfig
index 35f8cf2..263ae38 100644
--- a/arch/arm/configs/nhk8815_defconfig
+++ b/arch/arm/configs/nhk8815_defconfig
@@ -1,6 +1,8 @@
 # CONFIG_LOCALVERSION_AUTO is not set
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_HIGH_RES_TIMERS=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
@@ -48,7 +50,6 @@
 CONFIG_MTD=y
 CONFIG_MTD_TESTS=m
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
 CONFIG_MTD_BLOCK=y
 CONFIG_MTD_NAND_ECC_SMC=y
 CONFIG_MTD_NAND=y
@@ -94,8 +95,10 @@
 CONFIG_I2C_NOMADIK=y
 CONFIG_DEBUG_GPIO=y
 # CONFIG_HWMON is not set
+CONFIG_REGULATOR=y
 CONFIG_MMC=y
-CONFIG_MMC_CLKGATE=y
+CONFIG_MMC_UNSAFE_RESUME=y
+# CONFIG_MMC_BLOCK_BOUNCE is not set
 CONFIG_MMC_ARMMMCI=y
 CONFIG_NEW_LEDS=y
 CONFIG_LEDS_CLASS=y
diff --git a/arch/arm/include/asm/a.out-core.h b/arch/arm/include/asm/a.out-core.h
deleted file mode 100644
index 92f10cb..0000000
--- a/arch/arm/include/asm/a.out-core.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/* a.out coredump register dumper
- *
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public Licence
- * as published by the Free Software Foundation; either version
- * 2 of the Licence, or (at your option) any later version.
- */
-
-#ifndef _ASM_A_OUT_CORE_H
-#define _ASM_A_OUT_CORE_H
-
-#ifdef __KERNEL__
-
-#include <linux/user.h>
-#include <linux/elfcore.h>
-
-/*
- * fill in the user structure for an a.out core dump
- */
-static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
-{
-	struct task_struct *tsk = current;
-
-	dump->magic = CMAGIC;
-	dump->start_code = tsk->mm->start_code;
-	dump->start_stack = regs->ARM_sp & ~(PAGE_SIZE - 1);
-
-	dump->u_tsize = (tsk->mm->end_code - tsk->mm->start_code) >> PAGE_SHIFT;
-	dump->u_dsize = (tsk->mm->brk - tsk->mm->start_data + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	dump->u_ssize = 0;
-
-	memset(dump->u_debugreg, 0, sizeof(dump->u_debugreg));
-
-	if (dump->start_stack < 0x04000000)
-		dump->u_ssize = (0x04000000 - dump->start_stack) >> PAGE_SHIFT;
-
-	dump->regs = *regs;
-	dump->u_fpvalid = dump_fpu (regs, &dump->u_fp);
-}
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_A_OUT_CORE_H */
diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h
index 8c25dc4..9672e97 100644
--- a/arch/arm/include/asm/cputype.h
+++ b/arch/arm/include/asm/cputype.h
@@ -89,13 +89,18 @@
 		__val;							\
 	})
 
+/*
+ * The memory clobber prevents gcc 4.5 from reordering the mrc before
+ * any is_smp() tests, which can cause undefined instruction aborts on
+ * ARM1136 r0 due to the missing extended CP15 registers.
+ */
 #define read_cpuid_ext(ext_reg)						\
 	({								\
 		unsigned int __val;					\
 		asm("mrc	p15, 0, %0, c0, " ext_reg		\
 		    : "=r" (__val)					\
 		    :							\
-		    : "cc");						\
+		    : "memory");					\
 		__val;							\
 	})
 
diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h
index 38050b1..56211f2 100644
--- a/arch/arm/include/asm/elf.h
+++ b/arch/arm/include/asm/elf.h
@@ -130,4 +130,10 @@
 extern unsigned long arch_randomize_brk(struct mm_struct *mm);
 #define arch_randomize_brk arch_randomize_brk
 
+#ifdef CONFIG_MMU
+#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
+struct linux_binprm;
+int arch_setup_additional_pages(struct linux_binprm *, int);
+#endif
+
 #endif
diff --git a/arch/arm/include/asm/mmu.h b/arch/arm/include/asm/mmu.h
index e3d5554..6f18da0 100644
--- a/arch/arm/include/asm/mmu.h
+++ b/arch/arm/include/asm/mmu.h
@@ -6,8 +6,11 @@
 typedef struct {
 #ifdef CONFIG_CPU_HAS_ASID
 	atomic64_t	id;
+#else
+	int		switch_pending;
 #endif
 	unsigned int	vmalloc_seq;
+	unsigned long	sigpage;
 } mm_context_t;
 
 #ifdef CONFIG_CPU_HAS_ASID
diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h
index b5792b7..9b32f76 100644
--- a/arch/arm/include/asm/mmu_context.h
+++ b/arch/arm/include/asm/mmu_context.h
@@ -56,7 +56,7 @@
 		 * on non-ASID CPUs, the old mm will remain valid until the
 		 * finish_arch_post_lock_switch() call.
 		 */
-		set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM);
+		mm->context.switch_pending = 1;
 	else
 		cpu_switch_mm(mm->pgd, mm);
 }
@@ -65,9 +65,21 @@
 	finish_arch_post_lock_switch
 static inline void finish_arch_post_lock_switch(void)
 {
-	if (test_and_clear_thread_flag(TIF_SWITCH_MM)) {
-		struct mm_struct *mm = current->mm;
-		cpu_switch_mm(mm->pgd, mm);
+	struct mm_struct *mm = current->mm;
+
+	if (mm && mm->context.switch_pending) {
+		/*
+		 * Preemption must be disabled during cpu_switch_mm() as we
+		 * have some stateful cache flush implementations. Check
+		 * switch_pending again in case we were preempted and the
+		 * switch to this mm was already done.
+		 */
+		preempt_disable();
+		if (mm->context.switch_pending) {
+			mm->context.switch_pending = 0;
+			cpu_switch_mm(mm->pgd, mm);
+		}
+		preempt_enable_no_resched();
 	}
 }
 
diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index 6363f3d..4355f0e 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -142,7 +142,9 @@
 #define clear_page(page)	memset((void *)(page), 0, PAGE_SIZE)
 extern void copy_page(void *to, const void *from);
 
+#ifdef CONFIG_KUSER_HELPERS
 #define __HAVE_ARCH_GATE_AREA 1
+#endif
 
 #ifdef CONFIG_ARM_LPAE
 #include <asm/pgtable-3level-types.h>
diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h
index 06e7d50..413f387 100644
--- a/arch/arm/include/asm/processor.h
+++ b/arch/arm/include/asm/processor.h
@@ -54,7 +54,6 @@
 
 #define start_thread(regs,pc,sp)					\
 ({									\
-	unsigned long *stack = (unsigned long *)sp;			\
 	memset(regs->uregs, 0, sizeof(regs->uregs));			\
 	if (current->personality & ADDR_LIMIT_32BIT)			\
 		regs->ARM_cpsr = USR_MODE;				\
@@ -65,9 +64,6 @@
 	regs->ARM_cpsr |= PSR_ENDSTATE;					\
 	regs->ARM_pc = pc & ~1;		/* pc */			\
 	regs->ARM_sp = sp;		/* sp */			\
-	regs->ARM_r2 = stack[2];	/* r2 (envp) */			\
-	regs->ARM_r1 = stack[1];	/* r1 (argv) */			\
-	regs->ARM_r0 = stack[0];	/* r0 (argc) */			\
 	nommu_start_thread(regs);					\
 })
 
diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h
index 6462a72..a252c0b 100644
--- a/arch/arm/include/asm/smp_plat.h
+++ b/arch/arm/include/asm/smp_plat.h
@@ -88,4 +88,7 @@
 {
 	return 1 << mpidr_hash.bits;
 }
+
+extern int platform_can_cpu_hotplug(void);
+
 #endif
diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index f8b8965..b07c09e 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h
@@ -107,7 +107,7 @@
 		"	subs	%1, %0, %0, ror #16\n"
 		"	addeq	%0, %0, %4\n"
 		"	strexeq	%2, %0, [%3]"
-		: "=&r" (slock), "=&r" (contended), "=r" (res)
+		: "=&r" (slock), "=&r" (contended), "=&r" (res)
 		: "r" (&lock->slock), "I" (1 << TICKET_SHIFT)
 		: "cc");
 	} while (res);
@@ -168,17 +168,20 @@
 
 static inline int arch_write_trylock(arch_rwlock_t *rw)
 {
-	unsigned long tmp;
+	unsigned long contended, res;
 
-	__asm__ __volatile__(
-"	ldrex	%0, [%1]\n"
-"	teq	%0, #0\n"
-"	strexeq	%0, %2, [%1]"
-	: "=&r" (tmp)
-	: "r" (&rw->lock), "r" (0x80000000)
-	: "cc");
+	do {
+		__asm__ __volatile__(
+		"	ldrex	%0, [%2]\n"
+		"	mov	%1, #0\n"
+		"	teq	%0, #0\n"
+		"	strexeq	%1, %3, [%2]"
+		: "=&r" (contended), "=&r" (res)
+		: "r" (&rw->lock), "r" (0x80000000)
+		: "cc");
+	} while (res);
 
-	if (tmp == 0) {
+	if (!contended) {
 		smp_mb();
 		return 1;
 	} else {
@@ -254,18 +257,26 @@
 
 static inline int arch_read_trylock(arch_rwlock_t *rw)
 {
-	unsigned long tmp, tmp2 = 1;
+	unsigned long contended, res;
 
-	__asm__ __volatile__(
-"	ldrex	%0, [%2]\n"
-"	adds	%0, %0, #1\n"
-"	strexpl	%1, %0, [%2]\n"
-	: "=&r" (tmp), "+r" (tmp2)
-	: "r" (&rw->lock)
-	: "cc");
+	do {
+		__asm__ __volatile__(
+		"	ldrex	%0, [%2]\n"
+		"	mov	%1, #0\n"
+		"	adds	%0, %0, #1\n"
+		"	strexpl	%1, %0, [%2]"
+		: "=&r" (contended), "=&r" (res)
+		: "r" (&rw->lock)
+		: "cc");
+	} while (res);
 
-	smp_mb();
-	return tmp2 == 0;
+	/* If the lock is negative, then it is already held for write. */
+	if (contended < 0x80000000) {
+		smp_mb();
+		return 1;
+	} else {
+		return 0;
+	}
 }
 
 /* read_can_lock - would read_trylock() succeed? */
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 214d415..2b8114f 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -156,7 +156,6 @@
 #define TIF_USING_IWMMXT	17
 #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
 #define TIF_RESTORE_SIGMASK	20
-#define TIF_SWITCH_MM		22	/* deferred switch_mm */
 
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index 46e7cfb..0baf7f0 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -43,6 +43,7 @@
 	struct mm_struct	*mm;
 	unsigned int		fullmm;
 	struct vm_area_struct	*vma;
+	unsigned long		start, end;
 	unsigned long		range_start;
 	unsigned long		range_end;
 	unsigned int		nr;
@@ -107,10 +108,12 @@
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int fullmm)
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
-	tlb->fullmm = fullmm;
+	tlb->fullmm = !(start | (end+1));
+	tlb->start = start;
+	tlb->end = end;
 	tlb->vma = NULL;
 	tlb->max = ARRAY_SIZE(tlb->local);
 	tlb->pages = tlb->local;
diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
index fdbb9e3..f467e9b 100644
--- a/arch/arm/include/asm/tlbflush.h
+++ b/arch/arm/include/asm/tlbflush.h
@@ -443,7 +443,18 @@
 		isb();
 }
 
+#include <asm/cputype.h>
 #ifdef CONFIG_ARM_ERRATA_798181
+static inline int erratum_a15_798181(void)
+{
+	unsigned int midr = read_cpuid_id();
+
+	/* Cortex-A15 r0p0..r3p2 affected */
+	if ((midr & 0xff0ffff0) != 0x410fc0f0 || midr > 0x413fc0f2)
+		return 0;
+	return 1;
+}
+
 static inline void dummy_flush_tlb_a15_erratum(void)
 {
 	/*
@@ -453,6 +464,11 @@
 	dsb();
 }
 #else
+static inline int erratum_a15_798181(void)
+{
+	return 0;
+}
+
 static inline void dummy_flush_tlb_a15_erratum(void)
 {
 }
diff --git a/arch/arm/include/asm/virt.h b/arch/arm/include/asm/virt.h
index 50af92b..4371f45 100644
--- a/arch/arm/include/asm/virt.h
+++ b/arch/arm/include/asm/virt.h
@@ -29,6 +29,7 @@
 #define BOOT_CPU_MODE_MISMATCH	PSR_N_BIT
 
 #ifndef __ASSEMBLY__
+#include <asm/cacheflush.h>
 
 #ifdef CONFIG_ARM_VIRT_EXT
 /*
@@ -41,10 +42,21 @@
  */
 extern int __boot_cpu_mode;
 
+static inline void sync_boot_mode(void)
+{
+	/*
+	 * As secondaries write to __boot_cpu_mode with caches disabled, we
+	 * must flush the corresponding cache entries to ensure the visibility
+	 * of their writes.
+	 */
+	sync_cache_r(&__boot_cpu_mode);
+}
+
 void __hyp_set_vectors(unsigned long phys_vector_base);
 unsigned long __hyp_get_vectors(void);
 #else
 #define __boot_cpu_mode	(SVC_MODE)
+#define sync_boot_mode()
 #endif
 
 #ifndef ZIMAGE
diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild
index 47bcb2d..18d76fd 100644
--- a/arch/arm/include/uapi/asm/Kbuild
+++ b/arch/arm/include/uapi/asm/Kbuild
@@ -1,7 +1,6 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
-header-y += a.out.h
 header-y += byteorder.h
 header-y += fcntl.h
 header-y += hwcap.h
diff --git a/arch/arm/include/uapi/asm/a.out.h b/arch/arm/include/uapi/asm/a.out.h
deleted file mode 100644
index 083894b..0000000
--- a/arch/arm/include/uapi/asm/a.out.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#ifndef __ARM_A_OUT_H__
-#define __ARM_A_OUT_H__
-
-#include <linux/personality.h>
-#include <linux/types.h>
-
-struct exec
-{
-  __u32 a_info;		/* Use macros N_MAGIC, etc for access */
-  __u32 a_text;		/* length of text, in bytes */
-  __u32 a_data;		/* length of data, in bytes */
-  __u32 a_bss;		/* length of uninitialized data area for file, in bytes */
-  __u32 a_syms;		/* length of symbol table data in file, in bytes */
-  __u32 a_entry;	/* start address */
-  __u32 a_trsize;	/* length of relocation info for text, in bytes */
-  __u32 a_drsize;	/* length of relocation info for data, in bytes */
-};
-
-/*
- * This is always the same
- */
-#define N_TXTADDR(a)	(0x00008000)
-
-#define N_TRSIZE(a)	((a).a_trsize)
-#define N_DRSIZE(a)	((a).a_drsize)
-#define N_SYMSIZE(a)	((a).a_syms)
-
-#define M_ARM 103
-
-#ifndef LIBRARY_START_TEXT
-#define LIBRARY_START_TEXT	(0x00c00000)
-#endif
-
-#endif /* __A_OUT_GNU_H__ */
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index a39cfc2a1..9cbe70c 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -357,7 +357,8 @@
 	.endm
 
 	.macro	kuser_cmpxchg_check
-#if !defined(CONFIG_CPU_32v6K) && !defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG)
+#if !defined(CONFIG_CPU_32v6K) && defined(CONFIG_KUSER_HELPERS) && \
+    !defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG)
 #ifndef CONFIG_MMU
 #warning "NPTL on non MMU needs fixing"
 #else
@@ -742,6 +743,18 @@
 #endif
 	.endm
 
+	.macro	kuser_pad, sym, size
+	.if	(. - \sym) & 3
+	.rept	4 - (. - \sym) & 3
+	.byte	0
+	.endr
+	.endif
+	.rept	(\size - (. - \sym)) / 4
+	.word	0xe7fddef1
+	.endr
+	.endm
+
+#ifdef CONFIG_KUSER_HELPERS
 	.align	5
 	.globl	__kuser_helper_start
 __kuser_helper_start:
@@ -832,18 +845,13 @@
 #error "incoherent kernel configuration"
 #endif
 
-	/* pad to next slot */
-	.rept	(16 - (. - __kuser_cmpxchg64)/4)
-	.word	0
-	.endr
-
-	.align	5
+	kuser_pad __kuser_cmpxchg64, 64
 
 __kuser_memory_barrier:				@ 0xffff0fa0
 	smp_dmb	arm
 	usr_ret	lr
 
-	.align	5
+	kuser_pad __kuser_memory_barrier, 32
 
 __kuser_cmpxchg:				@ 0xffff0fc0
 
@@ -916,13 +924,14 @@
 
 #endif
 
-	.align	5
+	kuser_pad __kuser_cmpxchg, 32
 
 __kuser_get_tls:				@ 0xffff0fe0
 	ldr	r0, [pc, #(16 - 8)]	@ read TLS, set in kuser_get_tls_init
 	usr_ret	lr
 	mrc	p15, 0, r0, c13, c0, 3	@ 0xffff0fe8 hardware TLS code
-	.rep	4
+	kuser_pad __kuser_get_tls, 16
+	.rep	3
 	.word	0			@ 0xffff0ff0 software TLS value, then
 	.endr				@ pad up to __kuser_helper_version
 
@@ -932,14 +941,16 @@
 	.globl	__kuser_helper_end
 __kuser_helper_end:
 
+#endif
+
  THUMB(	.thumb	)
 
 /*
  * Vector stubs.
  *
- * This code is copied to 0xffff0200 so we can use branches in the
- * vectors, rather than ldr's.  Note that this code must not
- * exceed 0x300 bytes.
+ * This code is copied to 0xffff1000 so we can use branches in the
+ * vectors, rather than ldr's.  Note that this code must not exceed
+ * a page size.
  *
  * Common stub entry macro:
  *   Enter in IRQ mode, spsr = SVC/USR CPSR, lr = SVC/USR PC
@@ -986,8 +997,17 @@
 1:
 	.endm
 
-	.globl	__stubs_start
+	.section .stubs, "ax", %progbits
 __stubs_start:
+	@ This must be the first word
+	.word	vector_swi
+
+vector_rst:
+ ARM(	swi	SYS_ERROR0	)
+ THUMB(	svc	#0		)
+ THUMB(	nop			)
+	b	vector_und
+
 /*
  * Interrupt dispatcher
  */
@@ -1082,6 +1102,16 @@
 	.align	5
 
 /*=============================================================================
+ * Address exception handler
+ *-----------------------------------------------------------------------------
+ * These aren't too critical.
+ * (they're not supposed to happen, and won't happen in 32-bit data mode).
+ */
+
+vector_addrexcptn:
+	b	vector_addrexcptn
+
+/*=============================================================================
  * Undefined FIQs
  *-----------------------------------------------------------------------------
  * Enter in FIQ mode, spsr = ANY CPSR, lr = ANY PC
@@ -1094,45 +1124,19 @@
 vector_fiq:
 	subs	pc, lr, #4
 
-/*=============================================================================
- * Address exception handler
- *-----------------------------------------------------------------------------
- * These aren't too critical.
- * (they're not supposed to happen, and won't happen in 32-bit data mode).
- */
+	.globl	vector_fiq_offset
+	.equ	vector_fiq_offset, vector_fiq
 
-vector_addrexcptn:
-	b	vector_addrexcptn
-
-/*
- * We group all the following data together to optimise
- * for CPUs with separate I & D caches.
- */
-	.align	5
-
-.LCvswi:
-	.word	vector_swi
-
-	.globl	__stubs_end
-__stubs_end:
-
-	.equ	stubs_offset, __vectors_start + 0x200 - __stubs_start
-
-	.globl	__vectors_start
+	.section .vectors, "ax", %progbits
 __vectors_start:
- ARM(	swi	SYS_ERROR0	)
- THUMB(	svc	#0		)
- THUMB(	nop			)
-	W(b)	vector_und + stubs_offset
-	W(ldr)	pc, .LCvswi + stubs_offset
-	W(b)	vector_pabt + stubs_offset
-	W(b)	vector_dabt + stubs_offset
-	W(b)	vector_addrexcptn + stubs_offset
-	W(b)	vector_irq + stubs_offset
-	W(b)	vector_fiq + stubs_offset
-
-	.globl	__vectors_end
-__vectors_end:
+	W(b)	vector_rst
+	W(b)	vector_und
+	W(ldr)	pc, __vectors_start + 0x1000
+	W(b)	vector_pabt
+	W(b)	vector_dabt
+	W(b)	vector_addrexcptn
+	W(b)	vector_irq
+	W(b)	vector_fiq
 
 	.data
 
diff --git a/arch/arm/kernel/entry-v7m.S b/arch/arm/kernel/entry-v7m.S
index e00621f..52b2643 100644
--- a/arch/arm/kernel/entry-v7m.S
+++ b/arch/arm/kernel/entry-v7m.S
@@ -49,7 +49,7 @@
 	mov	r1, sp
 	stmdb	sp!, {lr}
 	@ routine called with r0 = irq number, r1 = struct pt_regs *
-	bl	nvic_do_IRQ
+	bl	nvic_handle_irq
 
 	pop	{lr}
 	@
diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c
index 2adda11..918875d 100644
--- a/arch/arm/kernel/fiq.c
+++ b/arch/arm/kernel/fiq.c
@@ -47,6 +47,11 @@
 #include <asm/irq.h>
 #include <asm/traps.h>
 
+#define FIQ_OFFSET ({					\
+		extern void *vector_fiq_offset;		\
+		(unsigned)&vector_fiq_offset;		\
+	})
+
 static unsigned long no_fiq_insn;
 
 /* Default reacquire function
@@ -79,14 +84,14 @@
 
 void set_fiq_handler(void *start, unsigned int length)
 {
-#if defined(CONFIG_CPU_USE_DOMAINS)
-	memcpy((void *)0xffff001c, start, length);
-#else
-	memcpy(vectors_page + 0x1c, start, length);
-#endif
-	flush_icache_range(0xffff001c, 0xffff001c + length);
-	if (!vectors_high())
-		flush_icache_range(0x1c, 0x1c + length);
+	void *base = vectors_page;
+	unsigned offset = FIQ_OFFSET;
+
+	memcpy(base + offset, start, length);
+	if (!cache_is_vipt_nonaliasing())
+		flush_icache_range((unsigned long)base + offset, offset +
+				   length);
+	flush_icache_range(0xffff0000 + offset, 0xffff0000 + offset + length);
 }
 
 int claim_fiq(struct fiq_handler *f)
@@ -144,6 +149,7 @@
 
 void __init init_FIQ(int start)
 {
-	no_fiq_insn = *(unsigned long *)0xffff001c;
+	unsigned offset = FIQ_OFFSET;
+	no_fiq_insn = *(unsigned long *)(0xffff0000 + offset);
 	fiq_start = start;
 }
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index b361de1..14235ba 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -87,6 +87,7 @@
 ENDPROC(stext)
 
 #ifdef CONFIG_SMP
+	.text
 ENTRY(secondary_startup)
 	/*
 	 * Common entry point for secondary CPUs.
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 9cf6063..2c7cc1e 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -343,6 +343,7 @@
 	.long	__turn_mmu_on_end
 
 #if defined(CONFIG_SMP)
+	.text
 ENTRY(secondary_startup)
 	/*
 	 * Common entry point for secondary CPUs.
diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S
index 4910232..797b1a6 100644
--- a/arch/arm/kernel/hyp-stub.S
+++ b/arch/arm/kernel/hyp-stub.S
@@ -56,8 +56,8 @@
 	ldr	\reg3, [\reg2]
 	ldr	\reg1, [\reg2, \reg3]
 	cmp	\mode, \reg1		@ matches primary CPU boot mode?
-	orrne	r7, r7, #BOOT_CPU_MODE_MISMATCH
-	strne	r7, [r5, r6]		@ record what happened and give up
+	orrne	\reg1, \reg1, #BOOT_CPU_MODE_MISMATCH
+	strne	\reg1, [\reg2, \reg3]	@ record what happened and give up
 	.endm
 
 #else	/* ZIMAGE */
diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c
index 4fb074c..57221e3 100644
--- a/arch/arm/kernel/machine_kexec.c
+++ b/arch/arm/kernel/machine_kexec.c
@@ -15,6 +15,7 @@
 #include <asm/mmu_context.h>
 #include <asm/cacheflush.h>
 #include <asm/mach-types.h>
+#include <asm/smp_plat.h>
 #include <asm/system_misc.h>
 
 extern const unsigned char relocate_new_kernel[];
@@ -39,6 +40,14 @@
 	int i, err;
 
 	/*
+	 * Validate that if the current HW supports SMP, then the SW supports
+	 * and implements CPU hotplug for the current HW. If not, we won't be
+	 * able to kexec reliably, so fail the prepare operation.
+	 */
+	if (num_possible_cpus() > 1 && !platform_can_cpu_hotplug())
+		return -EINVAL;
+
+	/*
 	 * No segment at default ATAGs address. try to locate
 	 * a dtb using magic.
 	 */
@@ -73,6 +82,7 @@
 	crash_save_cpu(&regs, smp_processor_id());
 	flush_cache_all();
 
+	set_cpu_online(smp_processor_id(), false);
 	atomic_dec(&waiting_for_crash_ipi);
 	while (1)
 		cpu_relax();
@@ -134,10 +144,13 @@
 	unsigned long reboot_code_buffer_phys;
 	void *reboot_code_buffer;
 
-	if (num_online_cpus() > 1) {
-		pr_err("kexec: error: multiple CPUs still online\n");
-		return;
-	}
+	/*
+	 * This can only happen if machine_shutdown() failed to disable some
+	 * CPU, and that can only happen if the checks in
+	 * machine_kexec_prepare() were not correct. If this fails, we can't
+	 * reliably kexec anyway, so BUG_ON is appropriate.
+	 */
+	BUG_ON(num_online_cpus() > 1);
 
 	page_list = image->head & PAGE_MASK;
 
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index d9f5cd4..e186ee1 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -53,7 +53,12 @@
 static int
 armpmu_map_hw_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
 {
-	int mapping = (*event_map)[config];
+	int mapping;
+
+	if (config >= PERF_COUNT_HW_MAX)
+		return -EINVAL;
+
+	mapping = (*event_map)[config];
 	return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
 }
 
@@ -253,6 +258,9 @@
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	struct pmu *leader_pmu = event->group_leader->pmu;
 
+	if (is_software_event(event))
+		return 1;
+
 	if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF)
 		return 1;
 
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index d3ca4f6..94f6b05 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -197,6 +197,7 @@
  */
 void machine_halt(void)
 {
+	local_irq_disable();
 	smp_send_stop();
 
 	local_irq_disable();
@@ -211,6 +212,7 @@
  */
 void machine_power_off(void)
 {
+	local_irq_disable();
 	smp_send_stop();
 
 	if (pm_power_off)
@@ -230,6 +232,7 @@
  */
 void machine_restart(char *cmd)
 {
+	local_irq_disable();
 	smp_send_stop();
 
 	arm_pm_restart(reboot_mode, cmd);
@@ -426,10 +429,11 @@
 }
 
 #ifdef CONFIG_MMU
+#ifdef CONFIG_KUSER_HELPERS
 /*
  * The vectors page is always readable from user space for the
- * atomic helpers and the signal restart code. Insert it into the
- * gate_vma so that it is visible through ptrace and /proc/<pid>/mem.
+ * atomic helpers. Insert it into the gate_vma so that it is visible
+ * through ptrace and /proc/<pid>/mem.
  */
 static struct vm_area_struct gate_vma = {
 	.vm_start	= 0xffff0000,
@@ -458,9 +462,48 @@
 {
 	return in_gate_area(NULL, addr);
 }
+#define is_gate_vma(vma)	((vma) == &gate_vma)
+#else
+#define is_gate_vma(vma)	0
+#endif
 
 const char *arch_vma_name(struct vm_area_struct *vma)
 {
-	return (vma == &gate_vma) ? "[vectors]" : NULL;
+	return is_gate_vma(vma) ? "[vectors]" :
+		(vma->vm_mm && vma->vm_start == vma->vm_mm->context.sigpage) ?
+		 "[sigpage]" : NULL;
+}
+
+static struct page *signal_page;
+extern struct page *get_signal_page(void);
+
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long addr;
+	int ret;
+
+	if (!signal_page)
+		signal_page = get_signal_page();
+	if (!signal_page)
+		return -ENOMEM;
+
+	down_write(&mm->mmap_sem);
+	addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
+	if (IS_ERR_VALUE(addr)) {
+		ret = addr;
+		goto up_fail;
+	}
+
+	ret = install_special_mapping(mm, addr, PAGE_SIZE,
+		VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
+		&signal_page);
+
+	if (ret == 0)
+		mm->context.sigpage = addr;
+
+ up_fail:
+	up_write(&mm->mmap_sem);
+	return ret;
 }
 #endif
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 63af9a7..afc2489 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -836,6 +836,8 @@
 void __init hyp_mode_check(void)
 {
 #ifdef CONFIG_ARM_VIRT_EXT
+	sync_boot_mode();
+
 	if (is_hyp_mode_available()) {
 		pr_info("CPU: All CPU(s) started in HYP mode.\n");
 		pr_info("CPU: Virtualization extensions available.\n");
@@ -971,6 +973,7 @@
 	"vfpv4",
 	"idiva",
 	"idivt",
+	"vfpd32",
 	"lpae",
 	NULL
 };
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 1c16c35..ab33042 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -8,6 +8,7 @@
  * published by the Free Software Foundation.
  */
 #include <linux/errno.h>
+#include <linux/random.h>
 #include <linux/signal.h>
 #include <linux/personality.h>
 #include <linux/uaccess.h>
@@ -15,12 +16,11 @@
 
 #include <asm/elf.h>
 #include <asm/cacheflush.h>
+#include <asm/traps.h>
 #include <asm/ucontext.h>
 #include <asm/unistd.h>
 #include <asm/vfp.h>
 
-#include "signal.h"
-
 /*
  * For ARM syscalls, we encode the syscall number into the instruction.
  */
@@ -40,11 +40,13 @@
 #define SWI_THUMB_SIGRETURN	(0xdf00 << 16 | 0x2700 | (__NR_sigreturn - __NR_SYSCALL_BASE))
 #define SWI_THUMB_RT_SIGRETURN	(0xdf00 << 16 | 0x2700 | (__NR_rt_sigreturn - __NR_SYSCALL_BASE))
 
-const unsigned long sigreturn_codes[7] = {
+static const unsigned long sigreturn_codes[7] = {
 	MOV_R7_NR_SIGRETURN,    SWI_SYS_SIGRETURN,    SWI_THUMB_SIGRETURN,
 	MOV_R7_NR_RT_SIGRETURN, SWI_SYS_RT_SIGRETURN, SWI_THUMB_RT_SIGRETURN,
 };
 
+static unsigned long signal_return_offset;
+
 #ifdef CONFIG_CRUNCH
 static int preserve_crunch_context(struct crunch_sigframe __user *frame)
 {
@@ -400,14 +402,20 @@
 		    __put_user(sigreturn_codes[idx+1], rc+1))
 			return 1;
 
-		if ((cpsr & MODE32_BIT) && !IS_ENABLED(CONFIG_ARM_MPU)) {
+#ifdef CONFIG_MMU
+		if (cpsr & MODE32_BIT) {
+			struct mm_struct *mm = current->mm;
+
 			/*
-			 * 32-bit code can use the new high-page
-			 * signal return code support except when the MPU has
-			 * protected the vectors page from PL0
+			 * 32-bit code can use the signal return page
+			 * except when the MPU has protected the vectors
+			 * page from PL0
 			 */
-			retcode = KERN_SIGRETURN_CODE + (idx << 2) + thumb;
-		} else {
+			retcode = mm->context.sigpage + signal_return_offset +
+				  (idx << 2) + thumb;
+		} else
+#endif
+		{
 			/*
 			 * Ensure that the instruction cache sees
 			 * the return code written onto the stack.
@@ -608,3 +616,33 @@
 	} while (thread_flags & _TIF_WORK_MASK);
 	return 0;
 }
+
+struct page *get_signal_page(void)
+{
+	unsigned long ptr;
+	unsigned offset;
+	struct page *page;
+	void *addr;
+
+	page = alloc_pages(GFP_KERNEL, 0);
+
+	if (!page)
+		return NULL;
+
+	addr = page_address(page);
+
+	/* Give the signal return code some randomness */
+	offset = 0x200 + (get_random_int() & 0x7fc);
+	signal_return_offset = offset;
+
+	/*
+	 * Copy signal return handlers into the vector page, and
+	 * set sigreturn to be a pointer to these.
+	 */
+	memcpy(addr + offset, sigreturn_codes, sizeof(sigreturn_codes));
+
+	ptr = (unsigned long)addr + offset;
+	flush_icache_range(ptr, ptr + sizeof(sigreturn_codes));
+
+	return page;
+}
diff --git a/arch/arm/kernel/signal.h b/arch/arm/kernel/signal.h
deleted file mode 100644
index 5ff067b7..0000000
--- a/arch/arm/kernel/signal.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/*
- *  linux/arch/arm/kernel/signal.h
- *
- *  Copyright (C) 2005-2009 Russell King.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#define KERN_SIGRETURN_CODE	(CONFIG_VECTORS_BASE + 0x00000500)
-
-extern const unsigned long sigreturn_codes[7];
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index c2b4f8f..2dc19349e 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -145,6 +145,16 @@
 	return -ENOSYS;
 }
 
+int platform_can_cpu_hotplug(void)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+	if (smp_ops.cpu_kill)
+		return 1;
+#endif
+
+	return 0;
+}
+
 #ifdef CONFIG_HOTPLUG_CPU
 static void percpu_timer_stop(void);
 
diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c
index a98b62d..c2edfff 100644
--- a/arch/arm/kernel/smp_tlb.c
+++ b/arch/arm/kernel/smp_tlb.c
@@ -70,23 +70,6 @@
 	local_flush_bp_all();
 }
 
-#ifdef CONFIG_ARM_ERRATA_798181
-static int erratum_a15_798181(void)
-{
-	unsigned int midr = read_cpuid_id();
-
-	/* Cortex-A15 r0p0..r3p2 affected */
-	if ((midr & 0xff0ffff0) != 0x410fc0f0 || midr > 0x413fc0f2)
-		return 0;
-	return 1;
-}
-#else
-static int erratum_a15_798181(void)
-{
-	return 0;
-}
-#endif
-
 static void ipi_flush_tlb_a15_erratum(void *arg)
 {
 	dmb();
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index cab094c..ab517fc 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -35,8 +35,6 @@
 #include <asm/tls.h>
 #include <asm/system_misc.h>
 
-#include "signal.h"
-
 static const char *handler[]= { "prefetch abort", "data abort", "address exception", "interrupt" };
 
 void *vectors_page;
@@ -800,15 +798,26 @@
 	return;
 }
 
-static void __init kuser_get_tls_init(unsigned long vectors)
+#ifdef CONFIG_KUSER_HELPERS
+static void __init kuser_init(void *vectors)
 {
+	extern char __kuser_helper_start[], __kuser_helper_end[];
+	int kuser_sz = __kuser_helper_end - __kuser_helper_start;
+
+	memcpy(vectors + 0x1000 - kuser_sz, __kuser_helper_start, kuser_sz);
+
 	/*
 	 * vectors + 0xfe0 = __kuser_get_tls
 	 * vectors + 0xfe8 = hardware TLS instruction at 0xffff0fe8
 	 */
 	if (tls_emu || has_tls_reg)
-		memcpy((void *)vectors + 0xfe0, (void *)vectors + 0xfe8, 4);
+		memcpy(vectors + 0xfe0, vectors + 0xfe8, 4);
 }
+#else
+static void __init kuser_init(void *vectors)
+{
+}
+#endif
 
 void __init early_trap_init(void *vectors_base)
 {
@@ -816,33 +825,30 @@
 	unsigned long vectors = (unsigned long)vectors_base;
 	extern char __stubs_start[], __stubs_end[];
 	extern char __vectors_start[], __vectors_end[];
-	extern char __kuser_helper_start[], __kuser_helper_end[];
-	int kuser_sz = __kuser_helper_end - __kuser_helper_start;
+	unsigned i;
 
 	vectors_page = vectors_base;
 
 	/*
+	 * Poison the vectors page with an undefined instruction.  This
+	 * instruction is chosen to be undefined for both ARM and Thumb
+	 * ISAs.  The Thumb version is an undefined instruction with a
+	 * branch back to the undefined instruction.
+	 */
+	for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
+		((u32 *)vectors_base)[i] = 0xe7fddef1;
+
+	/*
 	 * Copy the vectors, stubs and kuser helpers (in entry-armv.S)
 	 * into the vector page, mapped at 0xffff0000, and ensure these
 	 * are visible to the instruction stream.
 	 */
 	memcpy((void *)vectors, __vectors_start, __vectors_end - __vectors_start);
-	memcpy((void *)vectors + 0x200, __stubs_start, __stubs_end - __stubs_start);
-	memcpy((void *)vectors + 0x1000 - kuser_sz, __kuser_helper_start, kuser_sz);
+	memcpy((void *)vectors + 0x1000, __stubs_start, __stubs_end - __stubs_start);
 
-	/*
-	 * Do processor specific fixups for the kuser helpers
-	 */
-	kuser_get_tls_init(vectors);
+	kuser_init(vectors_base);
 
-	/*
-	 * Copy signal return handlers into the vector page, and
-	 * set sigreturn to be a pointer to these.
-	 */
-	memcpy((void *)(vectors + KERN_SIGRETURN_CODE - CONFIG_VECTORS_BASE),
-	       sigreturn_codes, sizeof(sigreturn_codes));
-
-	flush_icache_range(vectors, vectors + PAGE_SIZE);
+	flush_icache_range(vectors, vectors + PAGE_SIZE * 2);
 	modify_domain(DOMAIN_USER, DOMAIN_CLIENT);
 #else /* ifndef CONFIG_CPU_V7M */
 	/*
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index fa25e4e..7bcee5c 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -148,6 +148,23 @@
 	. = ALIGN(PAGE_SIZE);
 	__init_begin = .;
 #endif
+	/*
+	 * The vectors and stubs are relocatable code, and the
+	 * only thing that matters is their relative offsets
+	 */
+	__vectors_start = .;
+	.vectors 0 : AT(__vectors_start) {
+		*(.vectors)
+	}
+	. = __vectors_start + SIZEOF(.vectors);
+	__vectors_end = .;
+
+	__stubs_start = .;
+	.stubs 0x1000 : AT(__stubs_start) {
+		*(.stubs)
+	}
+	. = __stubs_start + SIZEOF(.stubs);
+	__stubs_end = .;
 
 	INIT_TEXT_SECTION(8)
 	.exit.text : {
diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c
index 4a51990..db9cf69 100644
--- a/arch/arm/kvm/coproc.c
+++ b/arch/arm/kvm/coproc.c
@@ -146,7 +146,11 @@
 #define access_pmintenclr pm_fake
 
 /* Architected CP15 registers.
- * Important: Must be sorted ascending by CRn, CRM, Op1, Op2
+ * CRn denotes the primary register number, but is copied to the CRm in the
+ * user space API for 64-bit register access in line with the terminology used
+ * in the ARM ARM.
+ * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit
+ *            registers preceding 32-bit ones.
  */
 static const struct coproc_reg cp15_regs[] = {
 	/* CSSELR: swapped by interrupt.S. */
@@ -154,8 +158,8 @@
 			NULL, reset_unknown, c0_CSSELR },
 
 	/* TTBR0/TTBR1: swapped by interrupt.S. */
-	{ CRm( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 },
-	{ CRm( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 },
+	{ CRm64( 2), Op1( 0), is64, NULL, reset_unknown64, c2_TTBR0 },
+	{ CRm64( 2), Op1( 1), is64, NULL, reset_unknown64, c2_TTBR1 },
 
 	/* TTBCR: swapped by interrupt.S. */
 	{ CRn( 2), CRm( 0), Op1( 0), Op2( 2), is32,
@@ -182,7 +186,7 @@
 			NULL, reset_unknown, c6_IFAR },
 
 	/* PAR swapped by interrupt.S */
-	{ CRn( 7), Op1( 0), is64, NULL, reset_unknown64, c7_PAR },
+	{ CRm64( 7), Op1( 0), is64, NULL, reset_unknown64, c7_PAR },
 
 	/*
 	 * DC{C,I,CI}SW operations:
@@ -399,12 +403,13 @@
 			      | KVM_REG_ARM_OPC1_MASK))
 			return false;
 		params->is_64bit = true;
-		params->CRm = ((id & KVM_REG_ARM_CRM_MASK)
+		/* CRm to CRn: see cp15_to_index for details */
+		params->CRn = ((id & KVM_REG_ARM_CRM_MASK)
 			       >> KVM_REG_ARM_CRM_SHIFT);
 		params->Op1 = ((id & KVM_REG_ARM_OPC1_MASK)
 			       >> KVM_REG_ARM_OPC1_SHIFT);
 		params->Op2 = 0;
-		params->CRn = 0;
+		params->CRm = 0;
 		return true;
 	default:
 		return false;
@@ -898,7 +903,14 @@
 	if (reg->is_64) {
 		val |= KVM_REG_SIZE_U64;
 		val |= (reg->Op1 << KVM_REG_ARM_OPC1_SHIFT);
-		val |= (reg->CRm << KVM_REG_ARM_CRM_SHIFT);
+		/*
+		 * CRn always denotes the primary coproc. reg. nr. for the
+		 * in-kernel representation, but the user space API uses the
+		 * CRm for the encoding, because it is modelled after the
+		 * MRRC/MCRR instructions: see the ARM ARM rev. c page
+		 * B3-1445
+		 */
+		val |= (reg->CRn << KVM_REG_ARM_CRM_SHIFT);
 	} else {
 		val |= KVM_REG_SIZE_U32;
 		val |= (reg->Op1 << KVM_REG_ARM_OPC1_SHIFT);
diff --git a/arch/arm/kvm/coproc.h b/arch/arm/kvm/coproc.h
index b7301d3..0461d5c 100644
--- a/arch/arm/kvm/coproc.h
+++ b/arch/arm/kvm/coproc.h
@@ -135,6 +135,8 @@
 		return -1;
 	if (i1->CRn != i2->CRn)
 		return i1->CRn - i2->CRn;
+	if (i1->is_64 != i2->is_64)
+		return i2->is_64 - i1->is_64;
 	if (i1->CRm != i2->CRm)
 		return i1->CRm - i2->CRm;
 	if (i1->Op1 != i2->Op1)
@@ -145,6 +147,7 @@
 
 #define CRn(_x)		.CRn = _x
 #define CRm(_x) 	.CRm = _x
+#define CRm64(_x)       .CRn = _x, .CRm = 0
 #define Op1(_x) 	.Op1 = _x
 #define Op2(_x) 	.Op2 = _x
 #define is64		.is_64 = true
diff --git a/arch/arm/kvm/coproc_a15.c b/arch/arm/kvm/coproc_a15.c
index 685063a..cf93472 100644
--- a/arch/arm/kvm/coproc_a15.c
+++ b/arch/arm/kvm/coproc_a15.c
@@ -114,7 +114,11 @@
 
 /*
  * A15-specific CP15 registers.
- * Important: Must be sorted ascending by CRn, CRM, Op1, Op2
+ * CRn denotes the primary register number, but is copied to the CRm in the
+ * user space API for 64-bit register access in line with the terminology used
+ * in the ARM ARM.
+ * Important: Must be sorted ascending by CRn, CRM, Op1, Op2 and with 64-bit
+ *            registers preceding 32-bit ones.
  */
 static const struct coproc_reg a15_regs[] = {
 	/* MPIDR: we use VMPIDR for guest access. */
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index b8e06b7..0c25d94 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -63,7 +63,8 @@
 static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		      struct kvm_exit_mmio *mmio)
 {
-	unsigned long rt, len;
+	unsigned long rt;
+	int len;
 	bool is_write, sign_extend;
 
 	if (kvm_vcpu_dabt_isextabt(vcpu)) {
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index ca6bea4..0988d9e 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -85,6 +85,12 @@
 	return p;
 }
 
+static bool page_empty(void *ptr)
+{
+	struct page *ptr_page = virt_to_page(ptr);
+	return page_count(ptr_page) == 1;
+}
+
 static void clear_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr)
 {
 	pmd_t *pmd_table = pmd_offset(pud, 0);
@@ -103,12 +109,6 @@
 	put_page(virt_to_page(pmd));
 }
 
-static bool pmd_empty(pmd_t *pmd)
-{
-	struct page *pmd_page = virt_to_page(pmd);
-	return page_count(pmd_page) == 1;
-}
-
 static void clear_pte_entry(struct kvm *kvm, pte_t *pte, phys_addr_t addr)
 {
 	if (pte_present(*pte)) {
@@ -118,12 +118,6 @@
 	}
 }
 
-static bool pte_empty(pte_t *pte)
-{
-	struct page *pte_page = virt_to_page(pte);
-	return page_count(pte_page) == 1;
-}
-
 static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
 			unsigned long long start, u64 size)
 {
@@ -132,37 +126,37 @@
 	pmd_t *pmd;
 	pte_t *pte;
 	unsigned long long addr = start, end = start + size;
-	u64 range;
+	u64 next;
 
 	while (addr < end) {
 		pgd = pgdp + pgd_index(addr);
 		pud = pud_offset(pgd, addr);
 		if (pud_none(*pud)) {
-			addr += PUD_SIZE;
+			addr = pud_addr_end(addr, end);
 			continue;
 		}
 
 		pmd = pmd_offset(pud, addr);
 		if (pmd_none(*pmd)) {
-			addr += PMD_SIZE;
+			addr = pmd_addr_end(addr, end);
 			continue;
 		}
 
 		pte = pte_offset_kernel(pmd, addr);
 		clear_pte_entry(kvm, pte, addr);
-		range = PAGE_SIZE;
+		next = addr + PAGE_SIZE;
 
 		/* If we emptied the pte, walk back up the ladder */
-		if (pte_empty(pte)) {
+		if (page_empty(pte)) {
 			clear_pmd_entry(kvm, pmd, addr);
-			range = PMD_SIZE;
-			if (pmd_empty(pmd)) {
+			next = pmd_addr_end(addr, end);
+			if (page_empty(pmd) && !page_empty(pud)) {
 				clear_pud_entry(kvm, pud, addr);
-				range = PUD_SIZE;
+				next = pud_addr_end(addr, end);
 			}
 		}
 
-		addr += range;
+		addr = next;
 	}
 }
 
diff --git a/arch/arm/mach-at91/at91sam9x5.c b/arch/arm/mach-at91/at91sam9x5.c
index 2abee66..916e5a1 100644
--- a/arch/arm/mach-at91/at91sam9x5.c
+++ b/arch/arm/mach-at91/at91sam9x5.c
@@ -227,6 +227,8 @@
 	CLKDEV_CON_DEV_ID("usart", "f8020000.serial", &usart1_clk),
 	CLKDEV_CON_DEV_ID("usart", "f8024000.serial", &usart2_clk),
 	CLKDEV_CON_DEV_ID("usart", "f8028000.serial", &usart3_clk),
+	CLKDEV_CON_DEV_ID("usart", "f8040000.serial", &uart0_clk),
+	CLKDEV_CON_DEV_ID("usart", "f8044000.serial", &uart1_clk),
 	CLKDEV_CON_DEV_ID("t0_clk", "f8008000.timer", &tcb0_clk),
 	CLKDEV_CON_DEV_ID("t0_clk", "f800c000.timer", &tcb0_clk),
 	CLKDEV_CON_DEV_ID("mci_clk", "f0008000.mmc", &mmc0_clk),
diff --git a/arch/arm/mach-davinci/board-dm355-leopard.c b/arch/arm/mach-davinci/board-dm355-leopard.c
index dff4ddc..139e42d 100644
--- a/arch/arm/mach-davinci/board-dm355-leopard.c
+++ b/arch/arm/mach-davinci/board-dm355-leopard.c
@@ -75,6 +75,7 @@
 	.parts			= davinci_nand_partitions,
 	.nr_parts		= ARRAY_SIZE(davinci_nand_partitions),
 	.ecc_mode		= NAND_ECC_HW_SYNDROME,
+	.ecc_bits		= 4,
 	.bbt_options		= NAND_BBT_USE_FLASH,
 };
 
diff --git a/arch/arm/mach-davinci/board-dm365-evm.c b/arch/arm/mach-davinci/board-dm365-evm.c
index afbc439..4cdb61c 100644
--- a/arch/arm/mach-davinci/board-dm365-evm.c
+++ b/arch/arm/mach-davinci/board-dm365-evm.c
@@ -505,7 +505,7 @@
 /*
  * Amplifiers on the board
  */
-struct ths7303_platform_data ths7303_pdata = {
+static struct ths7303_platform_data ths7303_pdata = {
 	.ch_1 = 3,
 	.ch_2 = 3,
 	.ch_3 = 3,
diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c
index a33686a..fa4bfaf 100644
--- a/arch/arm/mach-davinci/board-dm644x-evm.c
+++ b/arch/arm/mach-davinci/board-dm644x-evm.c
@@ -153,6 +153,7 @@
 	.parts		= davinci_evm_nandflash_partition,
 	.nr_parts	= ARRAY_SIZE(davinci_evm_nandflash_partition),
 	.ecc_mode	= NAND_ECC_HW,
+	.ecc_bits	= 1,
 	.bbt_options	= NAND_BBT_USE_FLASH,
 	.timing		= &davinci_evm_nandflash_timing,
 };
diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c
index fbb8e5a..0c005e8 100644
--- a/arch/arm/mach-davinci/board-dm646x-evm.c
+++ b/arch/arm/mach-davinci/board-dm646x-evm.c
@@ -90,6 +90,7 @@
 	.parts			= davinci_nand_partitions,
 	.nr_parts		= ARRAY_SIZE(davinci_nand_partitions),
 	.ecc_mode		= NAND_ECC_HW,
+	.ecc_bits		= 1,
 	.options		= 0,
 };
 
diff --git a/arch/arm/mach-davinci/board-neuros-osd2.c b/arch/arm/mach-davinci/board-neuros-osd2.c
index 2bc112a..808233b 100644
--- a/arch/arm/mach-davinci/board-neuros-osd2.c
+++ b/arch/arm/mach-davinci/board-neuros-osd2.c
@@ -88,6 +88,7 @@
 	.parts		= davinci_ntosd2_nandflash_partition,
 	.nr_parts	= ARRAY_SIZE(davinci_ntosd2_nandflash_partition),
 	.ecc_mode	= NAND_ECC_HW,
+	.ecc_bits	= 1,
 	.bbt_options	= NAND_BBT_USE_FLASH,
 };
 
diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c
index 42ef53f..86100d1 100644
--- a/arch/arm/mach-davinci/dm355.c
+++ b/arch/arm/mach-davinci/dm355.c
@@ -860,7 +860,7 @@
 	},
 };
 
-struct venc_platform_data dm355_venc_pdata = {
+static struct venc_platform_data dm355_venc_pdata = {
 	.setup_pinmux	= dm355_vpbe_setup_pinmux,
 	.setup_clock	= dm355_venc_setup_clock,
 };
diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c
index fa7af5e..dad2802 100644
--- a/arch/arm/mach-davinci/dm365.c
+++ b/arch/arm/mach-davinci/dm365.c
@@ -1349,7 +1349,7 @@
 	},
 };
 
-struct venc_platform_data dm365_venc_pdata = {
+static struct venc_platform_data dm365_venc_pdata = {
 	.setup_pinmux	= dm365_vpbe_setup_pinmux,
 	.setup_clock	= dm365_venc_setup_clock,
 };
diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
index 855d4a7..5952e68 100644
--- a/arch/arm/mach-exynos/Kconfig
+++ b/arch/arm/mach-exynos/Kconfig
@@ -92,6 +92,7 @@
 	bool "SAMSUNG EXYNOS5440"
 	default y
 	depends on ARCH_EXYNOS5
+	select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE
 	select ARCH_HAS_OPP
 	select HAVE_ARM_ARCH_TIMER
 	select AUTO_ZRELADDR
diff --git a/arch/arm/mach-exynos/Makefile b/arch/arm/mach-exynos/Makefile
index e970a7a..5369615 100644
--- a/arch/arm/mach-exynos/Makefile
+++ b/arch/arm/mach-exynos/Makefile
@@ -14,7 +14,7 @@
 
 obj-$(CONFIG_ARCH_EXYNOS)	+= common.o
 
-obj-$(CONFIG_PM)		+= pm.o
+obj-$(CONFIG_S5P_PM)		+= pm.o
 obj-$(CONFIG_PM_GENERIC_DOMAINS) += pm_domains.o
 obj-$(CONFIG_CPU_IDLE)		+= cpuidle.o
 
diff --git a/arch/arm/mach-exynos/common.c b/arch/arm/mach-exynos/common.c
index 164685b..ba95e5d 100644
--- a/arch/arm/mach-exynos/common.c
+++ b/arch/arm/mach-exynos/common.c
@@ -58,7 +58,6 @@
 
 static void exynos4_map_io(void);
 static void exynos5_map_io(void);
-static void exynos5440_map_io(void);
 static int exynos_init(void);
 
 static struct cpu_table cpu_ids[] __initdata = {
@@ -95,7 +94,6 @@
 	}, {
 		.idcode		= EXYNOS5440_SOC_ID,
 		.idmask		= EXYNOS5_SOC_MASK,
-		.map_io		= exynos5440_map_io,
 		.init		= exynos_init,
 		.name		= name_exynos5440,
 	},
@@ -150,11 +148,6 @@
 		.length		= SZ_64K,
 		.type		= MT_DEVICE,
 	}, {
-		.virtual	= (unsigned long)S3C_VA_UART,
-		.pfn		= __phys_to_pfn(EXYNOS4_PA_UART),
-		.length		= SZ_512K,
-		.type		= MT_DEVICE,
-	}, {
 		.virtual	= (unsigned long)S5P_VA_CMU,
 		.pfn		= __phys_to_pfn(EXYNOS4_PA_CMU),
 		.length		= SZ_128K,
@@ -268,20 +261,6 @@
 		.pfn		= __phys_to_pfn(EXYNOS5_PA_PMU),
 		.length		= SZ_64K,
 		.type		= MT_DEVICE,
-	}, {
-		.virtual	= (unsigned long)S3C_VA_UART,
-		.pfn		= __phys_to_pfn(EXYNOS5_PA_UART),
-		.length		= SZ_512K,
-		.type		= MT_DEVICE,
-	},
-};
-
-static struct map_desc exynos5440_iodesc0[] __initdata = {
-	{
-		.virtual	= (unsigned long)S3C_VA_UART,
-		.pfn		= __phys_to_pfn(EXYNOS5440_PA_UART0),
-		.length		= SZ_512K,
-		.type		= MT_DEVICE,
 	},
 };
 
@@ -388,11 +367,6 @@
 		iotable_init(exynos5250_iodesc, ARRAY_SIZE(exynos5250_iodesc));
 }
 
-static void __init exynos5440_map_io(void)
-{
-	iotable_init(exynos5440_iodesc0, ARRAY_SIZE(exynos5440_iodesc0));
-}
-
 void __init exynos_init_time(void)
 {
 	of_clk_init(NULL);
diff --git a/arch/arm/mach-exynos/common.h b/arch/arm/mach-exynos/common.h
index 3e156bc..972490f 100644
--- a/arch/arm/mach-exynos/common.h
+++ b/arch/arm/mach-exynos/common.h
@@ -97,6 +97,5 @@
 };
 
 extern void exynos_sys_powerdown_conf(enum sys_powerdown mode);
-extern void s3c_cpu_resume(void);
 
 #endif /* __ARCH_ARM_MACH_EXYNOS_COMMON_H */
diff --git a/arch/arm/mach-exynos/cpuidle.c b/arch/arm/mach-exynos/cpuidle.c
index 17a18ff..225ee84 100644
--- a/arch/arm/mach-exynos/cpuidle.c
+++ b/arch/arm/mach-exynos/cpuidle.c
@@ -25,6 +25,7 @@
 #include <mach/regs-pmu.h>
 
 #include <plat/cpu.h>
+#include <plat/pm.h>
 
 #include "common.h"
 
diff --git a/arch/arm/mach-exynos/include/mach/memory.h b/arch/arm/mach-exynos/include/mach/memory.h
index 374ef2c..2a4cdb7 100644
--- a/arch/arm/mach-exynos/include/mach/memory.h
+++ b/arch/arm/mach-exynos/include/mach/memory.h
@@ -15,8 +15,13 @@
 
 #define PLAT_PHYS_OFFSET		UL(0x40000000)
 
+#ifndef CONFIG_ARM_LPAE
 /* Maximum of 256MiB in one bank */
 #define MAX_PHYSMEM_BITS	32
 #define SECTION_SIZE_BITS	28
+#else
+#define MAX_PHYSMEM_BITS	36
+#define SECTION_SIZE_BITS	31
+#endif
 
 #endif /* __ASM_ARCH_MEMORY_H */
diff --git a/arch/arm/mach-exynos/pm.c b/arch/arm/mach-exynos/pm.c
index 41c2069..c679db5 100644
--- a/arch/arm/mach-exynos/pm.c
+++ b/arch/arm/mach-exynos/pm.c
@@ -217,6 +217,9 @@
 	struct clk *pll_base;
 	unsigned int tmp;
 
+	if (soc_is_exynos5440())
+		return 0;
+
 	s3c_pm_init();
 
 	/* All wakeup disable */
@@ -340,6 +343,9 @@
 
 static __init int exynos_pm_syscore_init(void)
 {
+	if (soc_is_exynos5440())
+		return 0;
+
 	register_syscore_ops(&exynos_pm_syscore_ops);
 	return 0;
 }
diff --git a/arch/arm/mach-footbridge/dc21285.c b/arch/arm/mach-footbridge/dc21285.c
index a7cd2cf..3490a24 100644
--- a/arch/arm/mach-footbridge/dc21285.c
+++ b/arch/arm/mach-footbridge/dc21285.c
@@ -276,8 +276,6 @@
 
 	sys->mem_offset  = DC21285_PCI_MEM;
 
-	pci_ioremap_io(0, DC21285_PCI_IO);
-
 	pci_add_resource_offset(&sys->resources, &res[0], sys->mem_offset);
 	pci_add_resource_offset(&sys->resources, &res[1], sys->mem_offset);
 
diff --git a/arch/arm/mach-highbank/highbank.c b/arch/arm/mach-highbank/highbank.c
index dc5d6be..8881579 100644
--- a/arch/arm/mach-highbank/highbank.c
+++ b/arch/arm/mach-highbank/highbank.c
@@ -115,6 +115,7 @@
 {
 	struct resource *res;
 	int reg = -1;
+	u32 val;
 	struct device *dev = __dev;
 
 	if (event != BUS_NOTIFY_ADD_DEVICE)
@@ -141,10 +142,10 @@
 		return NOTIFY_DONE;
 
 	if (of_property_read_bool(dev->of_node, "dma-coherent")) {
-		writel(0xff31, sregs_base + reg);
+		val = readl(sregs_base + reg);
+		writel(val | 0xff01, sregs_base + reg);
 		set_dma_ops(dev, &arm_coherent_dma_ops);
-	} else
-		writel(0, sregs_base + reg);
+	}
 
 	return NOTIFY_OK;
 }
diff --git a/arch/arm/mach-imx/clk-imx6q.c b/arch/arm/mach-imx/clk-imx6q.c
index 4282e99f..86567d9 100644
--- a/arch/arm/mach-imx/clk-imx6q.c
+++ b/arch/arm/mach-imx/clk-imx6q.c
@@ -199,7 +199,8 @@
 static const char *ssi_sels[]		= { "pll3_pfd2_508m", "pll3_pfd3_454m", "pll4_post_div", };
 static const char *usdhc_sels[]	= { "pll2_pfd2_396m", "pll2_pfd0_352m", };
 static const char *enfc_sels[]	= { "pll2_pfd0_352m", "pll2_bus", "pll3_usb_otg", "pll2_pfd2_396m", };
-static const char *emi_sels[]		= { "axi", "pll3_usb_otg", "pll2_pfd2_396m", "pll2_pfd0_352m", };
+static const char *emi_sels[]		= { "pll2_pfd2_396m", "pll3_usb_otg", "axi", "pll2_pfd0_352m", };
+static const char *emi_slow_sels[]      = { "axi", "pll3_usb_otg", "pll2_pfd2_396m", "pll2_pfd0_352m", };
 static const char *vdo_axi_sels[]	= { "axi", "ahb", };
 static const char *vpu_axi_sels[]	= { "axi", "pll2_pfd2_396m", "pll2_pfd0_352m", };
 static const char *cko1_sels[]	= { "pll3_usb_otg", "pll2_bus", "pll1_sys", "pll5_video_div",
@@ -392,7 +393,7 @@
 	clk[usdhc4_sel]       = imx_clk_mux("usdhc4_sel",       base + 0x1c, 19, 1, usdhc_sels,        ARRAY_SIZE(usdhc_sels));
 	clk[enfc_sel]         = imx_clk_mux("enfc_sel",         base + 0x2c, 16, 2, enfc_sels,         ARRAY_SIZE(enfc_sels));
 	clk[emi_sel]          = imx_clk_mux("emi_sel",          base + 0x1c, 27, 2, emi_sels,          ARRAY_SIZE(emi_sels));
-	clk[emi_slow_sel]     = imx_clk_mux("emi_slow_sel",     base + 0x1c, 29, 2, emi_sels,          ARRAY_SIZE(emi_sels));
+	clk[emi_slow_sel]     = imx_clk_mux("emi_slow_sel",     base + 0x1c, 29, 2, emi_slow_sels,     ARRAY_SIZE(emi_slow_sels));
 	clk[vdo_axi_sel]      = imx_clk_mux("vdo_axi_sel",      base + 0x18, 11, 1, vdo_axi_sels,      ARRAY_SIZE(vdo_axi_sels));
 	clk[vpu_axi_sel]      = imx_clk_mux("vpu_axi_sel",      base + 0x18, 14, 2, vpu_axi_sels,      ARRAY_SIZE(vpu_axi_sels));
 	clk[cko1_sel]         = imx_clk_mux("cko1_sel",         base + 0x60, 0,  4, cko1_sels,         ARRAY_SIZE(cko1_sels));
diff --git a/arch/arm/mach-imx/clk-vf610.c b/arch/arm/mach-imx/clk-vf610.c
index d617c0b..b169a39 100644
--- a/arch/arm/mach-imx/clk-vf610.c
+++ b/arch/arm/mach-imx/clk-vf610.c
@@ -183,6 +183,8 @@
 	clk[VF610_CLK_ENET_TS_SEL] = imx_clk_mux("enet_ts_sel", CCM_CSCMR2, 0, 3, enet_ts_sels, 7);
 	clk[VF610_CLK_ENET] = imx_clk_gate("enet", "enet_sel", CCM_CSCDR1, 24);
 	clk[VF610_CLK_ENET_TS] = imx_clk_gate("enet_ts", "enet_ts_sel", CCM_CSCDR1, 23);
+	clk[VF610_CLK_ENET0] = imx_clk_gate2("enet0", "ipg_bus", CCM_CCGR9, CCM_CCGRx_CGn(0));
+	clk[VF610_CLK_ENET1] = imx_clk_gate2("enet1", "ipg_bus", CCM_CCGR9, CCM_CCGRx_CGn(1));
 
 	clk[VF610_CLK_PIT] = imx_clk_gate2("pit", "ipg_bus", CCM_CCGR1, CCM_CCGRx_CGn(7));
 
diff --git a/arch/arm/mach-imx/mx27.h b/arch/arm/mach-imx/mx27.h
index e074616..8a65f19 100644
--- a/arch/arm/mach-imx/mx27.h
+++ b/arch/arm/mach-imx/mx27.h
@@ -135,7 +135,7 @@
 #define MX27_INT_GPT4		(NR_IRQS_LEGACY + 4)
 #define MX27_INT_RTIC		(NR_IRQS_LEGACY + 5)
 #define MX27_INT_CSPI3		(NR_IRQS_LEGACY + 6)
-#define MX27_INT_SDHC		(NR_IRQS_LEGACY + 7)
+#define MX27_INT_MSHC		(NR_IRQS_LEGACY + 7)
 #define MX27_INT_GPIO		(NR_IRQS_LEGACY + 8)
 #define MX27_INT_SDHC3		(NR_IRQS_LEGACY + 9)
 #define MX27_INT_SDHC2		(NR_IRQS_LEGACY + 10)
diff --git a/arch/arm/mach-keystone/keystone.c b/arch/arm/mach-keystone/keystone.c
index fe4d9ff..b661c5c 100644
--- a/arch/arm/mach-keystone/keystone.c
+++ b/arch/arm/mach-keystone/keystone.c
@@ -49,7 +49,7 @@
 	NULL,
 };
 
-void keystone_restart(char mode, const char *cmd)
+void keystone_restart(enum reboot_mode mode, const char *cmd)
 {
 	u32 val;
 
diff --git a/arch/arm/mach-msm/Kconfig b/arch/arm/mach-msm/Kconfig
index 614e41e..905efc8 100644
--- a/arch/arm/mach-msm/Kconfig
+++ b/arch/arm/mach-msm/Kconfig
@@ -121,8 +121,7 @@
 	bool
 
 config MSM_GPIOMUX
-	depends on !(ARCH_MSM8X60 || ARCH_MSM8960)
-	bool "MSM V1 TLMM GPIOMUX architecture"
+	bool
 	help
 	  Support for MSM V1 TLMM GPIOMUX architecture.
 
diff --git a/arch/arm/mach-msm/gpiomux-v1.c b/arch/arm/mach-msm/gpiomux-v1.c
deleted file mode 100644
index 27de2ab..0000000
--- a/arch/arm/mach-msm/gpiomux-v1.c
+++ /dev/null
@@ -1,33 +0,0 @@
-/* Copyright (c) 2010, Code Aurora Forum. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-#include <linux/kernel.h>
-#include "gpiomux.h"
-#include "proc_comm.h"
-
-void __msm_gpiomux_write(unsigned gpio, gpiomux_config_t val)
-{
-	unsigned tlmm_config  = (val & ~GPIOMUX_CTL_MASK) |
-				((gpio & 0x3ff) << 4);
-	unsigned tlmm_disable = 0;
-	int rc;
-
-	rc = msm_proc_comm(PCOM_RPC_GPIO_TLMM_CONFIG_EX,
-			   &tlmm_config, &tlmm_disable);
-	if (rc)
-		pr_err("%s: unexpected proc_comm failure %d: %08x %08x\n",
-		       __func__, rc, tlmm_config, tlmm_disable);
-}
diff --git a/arch/arm/mach-msm/gpiomux.h b/arch/arm/mach-msm/gpiomux.h
index 8e82f41..4410d77 100644
--- a/arch/arm/mach-msm/gpiomux.h
+++ b/arch/arm/mach-msm/gpiomux.h
@@ -73,16 +73,6 @@
 int msm_gpiomux_write(unsigned gpio,
 		      gpiomux_config_t active,
 		      gpiomux_config_t suspended);
-
-/* Architecture-internal function for use by the framework only.
- * This function can assume the following:
- * - the gpio value has passed a bounds-check
- * - the gpiomux spinlock has been obtained
- *
- * This function is not for public consumption.  External users
- * should use msm_gpiomux_write.
- */
-void __msm_gpiomux_write(unsigned gpio, gpiomux_config_t val);
 #else
 static inline int msm_gpiomux_write(unsigned gpio,
 				    gpiomux_config_t active,
diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index 627fa7e..3eed000 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -62,7 +62,7 @@
 	select HAVE_SMP
 	select COMMON_CLK
 	select HAVE_ARM_ARCH_TIMER
-	select ARM_ERRATA_798181
+	select ARM_ERRATA_798181 if SMP
 
 config SOC_AM33XX
 	bool "AM33XX support"
diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c
index e5fbfed..be5d005 100644
--- a/arch/arm/mach-omap2/board-generic.c
+++ b/arch/arm/mach-omap2/board-generic.c
@@ -15,6 +15,7 @@
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/irqdomain.h>
+#include <linux/clk.h>
 
 #include <asm/mach/arch.h>
 
@@ -35,6 +36,21 @@
 	{ }
 };
 
+/*
+ * Create alias for USB host PHY clock.
+ * Remove this when clock phandle can be provided via DT
+ */
+static void __init legacy_init_ehci_clk(char *clkname)
+{
+	int ret;
+
+	ret = clk_add_alias("main_clk", NULL, clkname, NULL);
+	if (ret) {
+		pr_err("%s:Failed to add main_clk alias to %s :%d\n",
+						__func__, clkname, ret);
+	}
+}
+
 static void __init omap_generic_init(void)
 {
 	omap_sdrc_init(NULL, NULL);
@@ -45,10 +61,15 @@
 	 * HACK: call display setup code for selected boards to enable omapdss.
 	 * This will be removed when omapdss supports DT.
 	 */
-	if (of_machine_is_compatible("ti,omap4-panda"))
+	if (of_machine_is_compatible("ti,omap4-panda")) {
 		omap4_panda_display_init_of();
+		legacy_init_ehci_clk("auxclk3_ck");
+
+	}
 	else if (of_machine_is_compatible("ti,omap4-sdp"))
 		omap_4430sdp_display_init_of();
+	else if (of_machine_is_compatible("ti,omap5-uevm"))
+		legacy_init_ehci_clk("auxclk1_ck");
 }
 
 #ifdef CONFIG_SOC_OMAP2420
diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c
index f6eeb87..827d150 100644
--- a/arch/arm/mach-omap2/board-n8x0.c
+++ b/arch/arm/mach-omap2/board-n8x0.c
@@ -122,11 +122,7 @@
 };
 
 static struct musb_hdrc_platform_data tusb_data = {
-#ifdef CONFIG_USB_GADGET_MUSB_HDRC
 	.mode		= MUSB_OTG,
-#else
-	.mode		= MUSB_HOST,
-#endif
 	.set_power	= tusb_set_power,
 	.min_power	= 25,	/* x2 = 50 mA drawn from VBUS as peripheral */
 	.power		= 100,	/* Max 100 mA VBUS for host mode */
diff --git a/arch/arm/mach-omap2/board-rx51.c b/arch/arm/mach-omap2/board-rx51.c
index d2ea68e..773510556 100644
--- a/arch/arm/mach-omap2/board-rx51.c
+++ b/arch/arm/mach-omap2/board-rx51.c
@@ -85,7 +85,7 @@
 
 static struct omap_musb_board_data musb_board_data = {
 	.interface_type		= MUSB_INTERFACE_ULPI,
-	.mode			= MUSB_PERIPHERAL,
+	.mode			= MUSB_OTG,
 	.power			= 0,
 };
 
diff --git a/arch/arm/mach-omap2/dss-common.c b/arch/arm/mach-omap2/dss-common.c
index 393aeef..043e570 100644
--- a/arch/arm/mach-omap2/dss-common.c
+++ b/arch/arm/mach-omap2/dss-common.c
@@ -42,7 +42,7 @@
 
 /* Using generic display panel */
 static struct tfp410_platform_data omap4_dvi_panel = {
-	.i2c_bus_num		= 3,
+	.i2c_bus_num		= 2,
 	.power_down_gpio	= PANDA_DVI_TFP410_POWER_DOWN_GPIO,
 };
 
diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c
index 5cc9287..f99f68e 100644
--- a/arch/arm/mach-omap2/omap_device.c
+++ b/arch/arm/mach-omap2/omap_device.c
@@ -129,6 +129,7 @@
 	struct device_node *node = pdev->dev.of_node;
 	const char *oh_name;
 	int oh_cnt, i, ret = 0;
+	bool device_active = false;
 
 	oh_cnt = of_property_count_strings(node, "ti,hwmods");
 	if (oh_cnt <= 0) {
@@ -152,6 +153,8 @@
 			goto odbfd_exit1;
 		}
 		hwmods[i] = oh;
+		if (oh->flags & HWMOD_INIT_NO_IDLE)
+			device_active = true;
 	}
 
 	od = omap_device_alloc(pdev, hwmods, oh_cnt);
@@ -172,6 +175,11 @@
 
 	pdev->dev.pm_domain = &omap_device_pm_domain;
 
+	if (device_active) {
+		omap_device_enable(pdev);
+		pm_runtime_set_active(&pdev->dev);
+	}
+
 odbfd_exit1:
 	kfree(hwmods);
 odbfd_exit:
@@ -842,6 +850,7 @@
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct omap_device *od = to_omap_device(pdev);
+	int i;
 
 	if (!od)
 		return 0;
@@ -850,6 +859,15 @@
 	 * If omap_device state is enabled, but has no driver bound,
 	 * idle it.
 	 */
+
+	/*
+	 * Some devices (like memory controllers) are always kept
+	 * enabled, and should not be idled even with no drivers.
+	 */
+	for (i = 0; i < od->hwmods_cnt; i++)
+		if (od->hwmods[i]->flags & HWMOD_INIT_NO_IDLE)
+			return 0;
+
 	if (od->_driver_status != BUS_NOTIFY_BOUND_DRIVER) {
 		if (od->_state == OMAP_DEVICE_STATE_ENABLED) {
 			dev_warn(dev, "%s: enabled but no driver.  Idling\n",
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index 7341eff..7f4db12 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -2386,7 +2386,7 @@
 
 		np = of_dev_hwmod_lookup(of_find_node_by_name(NULL, "ocp"), oh);
 		if (np)
-			va_start = of_iomap(np, 0);
+			va_start = of_iomap(np, oh->mpu_rt_idx);
 	} else {
 		va_start = ioremap(mem->pa_start, mem->pa_end - mem->pa_start);
 	}
diff --git a/arch/arm/mach-omap2/omap_hwmod.h b/arch/arm/mach-omap2/omap_hwmod.h
index aab33fd..e1482a9 100644
--- a/arch/arm/mach-omap2/omap_hwmod.h
+++ b/arch/arm/mach-omap2/omap_hwmod.h
@@ -95,6 +95,54 @@
 #define MODULEMODE_HWCTRL		1
 #define MODULEMODE_SWCTRL		2
 
+#define DEBUG_OMAP2UART1_FLAGS	0
+#define DEBUG_OMAP2UART2_FLAGS	0
+#define DEBUG_OMAP2UART3_FLAGS	0
+#define DEBUG_OMAP3UART3_FLAGS	0
+#define DEBUG_OMAP3UART4_FLAGS	0
+#define DEBUG_OMAP4UART3_FLAGS	0
+#define DEBUG_OMAP4UART4_FLAGS	0
+#define DEBUG_TI81XXUART1_FLAGS	0
+#define DEBUG_TI81XXUART2_FLAGS	0
+#define DEBUG_TI81XXUART3_FLAGS	0
+#define DEBUG_AM33XXUART1_FLAGS	0
+
+#define DEBUG_OMAPUART_FLAGS	(HWMOD_INIT_NO_IDLE | HWMOD_INIT_NO_RESET)
+
+#if defined(CONFIG_DEBUG_OMAP2UART1)
+#undef DEBUG_OMAP2UART1_FLAGS
+#define DEBUG_OMAP2UART1_FLAGS DEBUG_OMAPUART_FLAGS
+#elif defined(CONFIG_DEBUG_OMAP2UART2)
+#undef DEBUG_OMAP2UART2_FLAGS
+#define DEBUG_OMAP2UART2_FLAGS DEBUG_OMAPUART_FLAGS
+#elif defined(CONFIG_DEBUG_OMAP2UART3)
+#undef DEBUG_OMAP2UART3_FLAGS
+#define DEBUG_OMAP2UART3_FLAGS DEBUG_OMAPUART_FLAGS
+#elif defined(CONFIG_DEBUG_OMAP3UART3)
+#undef DEBUG_OMAP3UART3_FLAGS
+#define DEBUG_OMAP3UART3_FLAGS DEBUG_OMAPUART_FLAGS
+#elif defined(CONFIG_DEBUG_OMAP3UART4)
+#undef DEBUG_OMAP3UART4_FLAGS
+#define DEBUG_OMAP3UART4_FLAGS DEBUG_OMAPUART_FLAGS
+#elif defined(CONFIG_DEBUG_OMAP4UART3)
+#undef DEBUG_OMAP4UART3_FLAGS
+#define DEBUG_OMAP4UART3_FLAGS DEBUG_OMAPUART_FLAGS
+#elif defined(CONFIG_DEBUG_OMAP4UART4)
+#undef DEBUG_OMAP4UART4_FLAGS
+#define DEBUG_OMAP4UART4_FLAGS DEBUG_OMAPUART_FLAGS
+#elif defined(CONFIG_DEBUG_TI81XXUART1)
+#undef DEBUG_TI81XXUART1_FLAGS
+#define DEBUG_TI81XXUART1_FLAGS DEBUG_OMAPUART_FLAGS
+#elif defined(CONFIG_DEBUG_TI81XXUART2)
+#undef DEBUG_TI81XXUART2_FLAGS
+#define DEBUG_TI81XXUART2_FLAGS DEBUG_OMAPUART_FLAGS
+#elif defined(CONFIG_DEBUG_TI81XXUART3)
+#undef DEBUG_TI81XXUART3_FLAGS
+#define DEBUG_TI81XXUART3_FLAGS DEBUG_OMAPUART_FLAGS
+#elif defined(CONFIG_DEBUG_AM33XXUART1)
+#undef DEBUG_AM33XXUART1_FLAGS
+#define DEBUG_AM33XXUART1_FLAGS DEBUG_OMAPUART_FLAGS
+#endif
 
 /**
  * struct omap_hwmod_mux_info - hwmod specific mux configuration
@@ -568,6 +616,7 @@
  * @voltdm: pointer to voltage domain (filled in at runtime)
  * @dev_attr: arbitrary device attributes that can be passed to the driver
  * @_sysc_cache: internal-use hwmod flags
+ * @mpu_rt_idx: index of device address space for register target (for DT boot)
  * @_mpu_rt_va: cached register target start address (internal use)
  * @_mpu_port: cached MPU register target slave (internal use)
  * @opt_clks_cnt: number of @opt_clks
@@ -617,6 +666,7 @@
 	struct list_head		node;
 	struct omap_hwmod_ocp_if	*_mpu_port;
 	u16				flags;
+	u8				mpu_rt_idx;
 	u8				response_lat;
 	u8				rst_lines_cnt;
 	u8				opt_clks_cnt;
diff --git a/arch/arm/mach-omap2/omap_hwmod_2xxx_ipblock_data.c b/arch/arm/mach-omap2/omap_hwmod_2xxx_ipblock_data.c
index d05fc7b..56cebb0 100644
--- a/arch/arm/mach-omap2/omap_hwmod_2xxx_ipblock_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_2xxx_ipblock_data.c
@@ -512,7 +512,7 @@
 	.mpu_irqs	= omap2_uart1_mpu_irqs,
 	.sdma_reqs	= omap2_uart1_sdma_reqs,
 	.main_clk	= "uart1_fck",
-	.flags		= HWMOD_SWSUP_SIDLE_ACT,
+	.flags		= DEBUG_OMAP2UART1_FLAGS | HWMOD_SWSUP_SIDLE_ACT,
 	.prcm		= {
 		.omap2 = {
 			.module_offs = CORE_MOD,
@@ -532,7 +532,7 @@
 	.mpu_irqs	= omap2_uart2_mpu_irqs,
 	.sdma_reqs	= omap2_uart2_sdma_reqs,
 	.main_clk	= "uart2_fck",
-	.flags		= HWMOD_SWSUP_SIDLE_ACT,
+	.flags		= DEBUG_OMAP2UART2_FLAGS | HWMOD_SWSUP_SIDLE_ACT,
 	.prcm		= {
 		.omap2 = {
 			.module_offs = CORE_MOD,
@@ -552,7 +552,7 @@
 	.mpu_irqs	= omap2_uart3_mpu_irqs,
 	.sdma_reqs	= omap2_uart3_sdma_reqs,
 	.main_clk	= "uart3_fck",
-	.flags		= HWMOD_SWSUP_SIDLE_ACT,
+	.flags		= DEBUG_OMAP2UART3_FLAGS | HWMOD_SWSUP_SIDLE_ACT,
 	.prcm		= {
 		.omap2 = {
 			.module_offs = CORE_MOD,
diff --git a/arch/arm/mach-omap2/omap_hwmod_33xx_data.c b/arch/arm/mach-omap2/omap_hwmod_33xx_data.c
index 28bbd56..eb2f3b9 100644
--- a/arch/arm/mach-omap2/omap_hwmod_33xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_33xx_data.c
@@ -562,6 +562,7 @@
 	.clkdm_name	= "cpsw_125mhz_clkdm",
 	.flags		= (HWMOD_SWSUP_SIDLE | HWMOD_SWSUP_MSTANDBY),
 	.main_clk	= "cpsw_125mhz_gclk",
+	.mpu_rt_idx	= 1,
 	.prcm		= {
 		.omap4	= {
 			.clkctrl_offs	= AM33XX_CM_PER_CPGMAC0_CLKCTRL_OFFSET,
@@ -1512,7 +1513,7 @@
 	.name		= "uart1",
 	.class		= &uart_class,
 	.clkdm_name	= "l4_wkup_clkdm",
-	.flags		= HWMOD_SWSUP_SIDLE_ACT,
+	.flags		= DEBUG_AM33XXUART1_FLAGS | HWMOD_SWSUP_SIDLE_ACT,
 	.main_clk	= "dpll_per_m2_div4_wkupdm_ck",
 	.prcm		= {
 		.omap4	= {
diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index f7a3df2..0c3a427 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -490,7 +490,7 @@
 	.mpu_irqs	= omap2_uart1_mpu_irqs,
 	.sdma_reqs	= omap2_uart1_sdma_reqs,
 	.main_clk	= "uart1_fck",
-	.flags		= HWMOD_SWSUP_SIDLE_ACT,
+	.flags		= DEBUG_TI81XXUART1_FLAGS | HWMOD_SWSUP_SIDLE_ACT,
 	.prcm		= {
 		.omap2 = {
 			.module_offs = CORE_MOD,
@@ -509,7 +509,7 @@
 	.mpu_irqs	= omap2_uart2_mpu_irqs,
 	.sdma_reqs	= omap2_uart2_sdma_reqs,
 	.main_clk	= "uart2_fck",
-	.flags		= HWMOD_SWSUP_SIDLE_ACT,
+	.flags		= DEBUG_TI81XXUART2_FLAGS | HWMOD_SWSUP_SIDLE_ACT,
 	.prcm		= {
 		.omap2 = {
 			.module_offs = CORE_MOD,
@@ -528,7 +528,8 @@
 	.mpu_irqs	= omap2_uart3_mpu_irqs,
 	.sdma_reqs	= omap2_uart3_sdma_reqs,
 	.main_clk	= "uart3_fck",
-	.flags		= HWMOD_SWSUP_SIDLE_ACT,
+	.flags		= DEBUG_OMAP3UART3_FLAGS | DEBUG_TI81XXUART3_FLAGS |
+				HWMOD_SWSUP_SIDLE_ACT,
 	.prcm		= {
 		.omap2 = {
 			.module_offs = OMAP3430_PER_MOD,
@@ -558,7 +559,7 @@
 	.mpu_irqs	= uart4_mpu_irqs,
 	.sdma_reqs	= uart4_sdma_reqs,
 	.main_clk	= "uart4_fck",
-	.flags		= HWMOD_SWSUP_SIDLE_ACT,
+	.flags		= DEBUG_OMAP3UART4_FLAGS | HWMOD_SWSUP_SIDLE_ACT,
 	.prcm		= {
 		.omap2 = {
 			.module_offs = OMAP3430_PER_MOD,
diff --git a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
index d04b5e6..9c3b504 100644
--- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
@@ -2858,8 +2858,7 @@
 	.name		= "uart3",
 	.class		= &omap44xx_uart_hwmod_class,
 	.clkdm_name	= "l4_per_clkdm",
-	.flags		= HWMOD_INIT_NO_IDLE | HWMOD_INIT_NO_RESET |
-				HWMOD_SWSUP_SIDLE_ACT,
+	.flags		= DEBUG_OMAP4UART3_FLAGS | HWMOD_SWSUP_SIDLE_ACT,
 	.main_clk	= "func_48m_fclk",
 	.prcm = {
 		.omap4 = {
@@ -2875,7 +2874,7 @@
 	.name		= "uart4",
 	.class		= &omap44xx_uart_hwmod_class,
 	.clkdm_name	= "l4_per_clkdm",
-	.flags		= HWMOD_SWSUP_SIDLE_ACT,
+	.flags		= DEBUG_OMAP4UART4_FLAGS | HWMOD_SWSUP_SIDLE_ACT,
 	.main_clk	= "func_48m_fclk",
 	.prcm = {
 		.omap4 = {
diff --git a/arch/arm/mach-omap2/omap_hwmod_54xx_data.c b/arch/arm/mach-omap2/omap_hwmod_54xx_data.c
index f37ae96..3c70f5c 100644
--- a/arch/arm/mach-omap2/omap_hwmod_54xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_54xx_data.c
@@ -1375,7 +1375,7 @@
 	.name		= "uart3",
 	.class		= &omap54xx_uart_hwmod_class,
 	.clkdm_name	= "l4per_clkdm",
-	.flags		= HWMOD_INIT_NO_IDLE | HWMOD_INIT_NO_RESET,
+	.flags		= DEBUG_OMAP4UART3_FLAGS,
 	.main_clk	= "func_48m_fclk",
 	.prcm = {
 		.omap4 = {
@@ -1391,6 +1391,7 @@
 	.name		= "uart4",
 	.class		= &omap54xx_uart_hwmod_class,
 	.clkdm_name	= "l4per_clkdm",
+	.flags		= DEBUG_OMAP4UART4_FLAGS,
 	.main_clk	= "func_48m_fclk",
 	.prcm = {
 		.omap4 = {
diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c
index 3a674de..a388f8c 100644
--- a/arch/arm/mach-omap2/serial.c
+++ b/arch/arm/mach-omap2/serial.c
@@ -208,17 +208,6 @@
 				pr_info("%s used as console in debug mode: uart%d clocks will not be gated",
 					uart_name, uart->num);
 			}
-
-			/*
-			 * omap-uart can be used for earlyprintk logs
-			 * So if omap-uart is used as console then prevent
-			 * uart reset and idle to get logs from omap-uart
-			 * until uart console driver is available to take
-			 * care for console messages.
-			 * Idling or resetting omap-uart while printing logs
-			 * early boot logs can stall the boot-up.
-			 */
-			oh->flags |= HWMOD_INIT_NO_IDLE | HWMOD_INIT_NO_RESET;
 		}
 	} while (1);
 
diff --git a/arch/arm/mach-omap2/usb-musb.c b/arch/arm/mach-omap2/usb-musb.c
index 8c4de27..bc89723 100644
--- a/arch/arm/mach-omap2/usb-musb.c
+++ b/arch/arm/mach-omap2/usb-musb.c
@@ -38,11 +38,8 @@
 };
 
 static struct musb_hdrc_platform_data musb_plat = {
-#ifdef CONFIG_USB_GADGET_MUSB_HDRC
 	.mode		= MUSB_OTG,
-#else
-	.mode		= MUSB_HOST,
-#endif
+
 	/* .clock is set dynamically */
 	.config		= &musb_config,
 
diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c
index f6726bb..3a3362f 100644
--- a/arch/arm/mach-pxa/em-x270.c
+++ b/arch/arm/mach-pxa/em-x270.c
@@ -477,16 +477,24 @@
 	/* USB Hub power-on and reset */
 	gpio_direction_output(usb_hub_reset, 1);
 	gpio_direction_output(GPIO9_USB_VBUS_EN, 0);
-	regulator_enable(em_x270_usb_ldo);
+	err = regulator_enable(em_x270_usb_ldo);
+	if (err)
+		goto err_free_rst_gpio;
+
 	gpio_set_value(usb_hub_reset, 0);
 	gpio_set_value(usb_hub_reset, 1);
 	regulator_disable(em_x270_usb_ldo);
-	regulator_enable(em_x270_usb_ldo);
+	err = regulator_enable(em_x270_usb_ldo);
+	if (err)
+		goto err_free_rst_gpio;
+
 	gpio_set_value(usb_hub_reset, 0);
 	gpio_set_value(GPIO9_USB_VBUS_EN, 1);
 
 	return 0;
 
+err_free_rst_gpio:
+	gpio_free(usb_hub_reset);
 err_free_vbus_gpio:
 	gpio_free(GPIO9_USB_VBUS_EN);
 err_free_usb_ldo:
@@ -592,7 +600,7 @@
 	return err;
 }
 
-static void em_x270_mci_setpower(struct device *dev, unsigned int vdd)
+static int em_x270_mci_setpower(struct device *dev, unsigned int vdd)
 {
 	struct pxamci_platform_data* p_d = dev->platform_data;
 
@@ -600,10 +608,11 @@
 		int vdd_uV = (2000 + (vdd - __ffs(MMC_VDD_20_21)) * 100) * 1000;
 
 		regulator_set_voltage(em_x270_sdio_ldo, vdd_uV, vdd_uV);
-		regulator_enable(em_x270_sdio_ldo);
+		return regulator_enable(em_x270_sdio_ldo);
 	} else {
 		regulator_disable(em_x270_sdio_ldo);
 	}
+	return 0;
 }
 
 static void em_x270_mci_exit(struct device *dev, void *data)
diff --git a/arch/arm/mach-pxa/mainstone.c b/arch/arm/mach-pxa/mainstone.c
index d2c6523..dd70343 100644
--- a/arch/arm/mach-pxa/mainstone.c
+++ b/arch/arm/mach-pxa/mainstone.c
@@ -408,7 +408,7 @@
 	return err;
 }
 
-static void mainstone_mci_setpower(struct device *dev, unsigned int vdd)
+static int mainstone_mci_setpower(struct device *dev, unsigned int vdd)
 {
 	struct pxamci_platform_data* p_d = dev->platform_data;
 
@@ -420,6 +420,7 @@
 		printk(KERN_DEBUG "%s: off\n", __func__);
 		MST_MSCWR1 &= ~MST_MSCWR1_MMC_ON;
 	}
+	return 0;
 }
 
 static void mainstone_mci_exit(struct device *dev, void *data)
diff --git a/arch/arm/mach-pxa/pcm990-baseboard.c b/arch/arm/mach-pxa/pcm990-baseboard.c
index fb7f1d1..13e5b00 100644
--- a/arch/arm/mach-pxa/pcm990-baseboard.c
+++ b/arch/arm/mach-pxa/pcm990-baseboard.c
@@ -335,7 +335,7 @@
 	return err;
 }
 
-static void pcm990_mci_setpower(struct device *dev, unsigned int vdd)
+static int pcm990_mci_setpower(struct device *dev, unsigned int vdd)
 {
 	struct pxamci_platform_data *p_d = dev->platform_data;
 	u8 val;
@@ -348,6 +348,7 @@
 		val &= ~PCM990_CTRL_MMC2PWR;
 
 	pcm990_cpld_writeb(PCM990_CTRL_MMC2PWR, PCM990_CTRL_REG5);
+	return 0;
 }
 
 static void pcm990_mci_exit(struct device *dev, void *data)
diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c
index 711d37e..aedf053 100644
--- a/arch/arm/mach-pxa/poodle.c
+++ b/arch/arm/mach-pxa/poodle.c
@@ -258,7 +258,7 @@
 	return err;
 }
 
-static void poodle_mci_setpower(struct device *dev, unsigned int vdd)
+static int poodle_mci_setpower(struct device *dev, unsigned int vdd)
 {
 	struct pxamci_platform_data* p_d = dev->platform_data;
 
@@ -270,6 +270,8 @@
 		gpio_set_value(POODLE_GPIO_SD_PWR1, 0);
 		gpio_set_value(POODLE_GPIO_SD_PWR, 0);
 	}
+
+	return 0;
 }
 
 static void poodle_mci_exit(struct device *dev, void *data)
diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c
index 2125df0..4c29173 100644
--- a/arch/arm/mach-pxa/spitz.c
+++ b/arch/arm/mach-pxa/spitz.c
@@ -598,7 +598,7 @@
  * NOTE: The card detect interrupt isn't debounced so we delay it by 250ms to
  * give the card a chance to fully insert/eject.
  */
-static void spitz_mci_setpower(struct device *dev, unsigned int vdd)
+static int spitz_mci_setpower(struct device *dev, unsigned int vdd)
 {
 	struct pxamci_platform_data* p_d = dev->platform_data;
 
@@ -606,6 +606,8 @@
 		spitz_card_pwr_ctrl(SCOOP_CPR_SD_3V, SCOOP_CPR_SD_3V);
 	else
 		spitz_card_pwr_ctrl(SCOOP_CPR_SD_3V, 0x0);
+
+	return 0;
 }
 
 static struct pxamci_platform_data spitz_mci_platform_data = {
diff --git a/arch/arm/mach-pxa/stargate2.c b/arch/arm/mach-pxa/stargate2.c
index 88fde43..62aea3e 100644
--- a/arch/arm/mach-pxa/stargate2.c
+++ b/arch/arm/mach-pxa/stargate2.c
@@ -734,9 +734,10 @@
  *
  * Very simple control. Either it is on or off and is controlled by
  * a gpio pin */
-static void stargate2_mci_setpower(struct device *dev, unsigned int vdd)
+static int stargate2_mci_setpower(struct device *dev, unsigned int vdd)
 {
 	gpio_set_value(SG2_SD_POWER_ENABLE, !!vdd);
+	return 0;
 }
 
 static void stargate2_mci_exit(struct device *dev, void *data)
diff --git a/arch/arm/mach-s3c24xx/clock-s3c2410.c b/arch/arm/mach-s3c24xx/clock-s3c2410.c
index 34fffdf..5645536 100644
--- a/arch/arm/mach-s3c24xx/clock-s3c2410.c
+++ b/arch/arm/mach-s3c24xx/clock-s3c2410.c
@@ -119,66 +119,101 @@
 	}
 };
 
-static struct clk init_clocks[] = {
-	{
-		.name		= "lcd",
-		.parent		= &clk_h,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_LCDC,
-	}, {
-		.name		= "gpio",
-		.parent		= &clk_p,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_GPIO,
-	}, {
-		.name		= "usb-host",
-		.parent		= &clk_h,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_USBH,
-	}, {
-		.name		= "usb-device",
-		.parent		= &clk_h,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_USBD,
-	}, {
-		.name		= "timers",
-		.parent		= &clk_p,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_PWMT,
-	}, {
-		.name		= "uart",
-		.devname	= "s3c2410-uart.0",
-		.parent		= &clk_p,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_UART0,
-	}, {
-		.name		= "uart",
-		.devname	= "s3c2410-uart.1",
-		.parent		= &clk_p,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_UART1,
-	}, {
-		.name		= "uart",
-		.devname	= "s3c2410-uart.2",
-		.parent		= &clk_p,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_UART2,
-	}, {
-		.name		= "rtc",
-		.parent		= &clk_p,
-		.enable		= s3c2410_clkcon_enable,
-		.ctrlbit	= S3C2410_CLKCON_RTC,
-	}, {
-		.name		= "watchdog",
-		.parent		= &clk_p,
-		.ctrlbit	= 0,
-	}, {
-		.name		= "usb-bus-host",
-		.parent		= &clk_usb_bus,
-	}, {
-		.name		= "usb-bus-gadget",
-		.parent		= &clk_usb_bus,
-	},
+static struct clk clk_lcd = {
+	.name		= "lcd",
+	.parent		= &clk_h,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_LCDC,
+};
+
+static struct clk clk_gpio = {
+	.name		= "gpio",
+	.parent		= &clk_p,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_GPIO,
+};
+
+static struct clk clk_usb_host = {
+	.name		= "usb-host",
+	.parent		= &clk_h,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_USBH,
+};
+
+static struct clk clk_usb_device = {
+	.name		= "usb-device",
+	.parent		= &clk_h,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_USBD,
+};
+
+static struct clk clk_timers = {
+	.name		= "timers",
+	.parent		= &clk_p,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_PWMT,
+};
+
+struct clk s3c24xx_clk_uart0 = {
+	.name		= "uart",
+	.devname	= "s3c2410-uart.0",
+	.parent		= &clk_p,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_UART0,
+};
+
+struct clk s3c24xx_clk_uart1 = {
+	.name		= "uart",
+	.devname	= "s3c2410-uart.1",
+	.parent		= &clk_p,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_UART1,
+};
+
+struct clk s3c24xx_clk_uart2 = {
+	.name		= "uart",
+	.devname	= "s3c2410-uart.2",
+	.parent		= &clk_p,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_UART2,
+};
+
+static struct clk clk_rtc = {
+	.name		= "rtc",
+	.parent		= &clk_p,
+	.enable		= s3c2410_clkcon_enable,
+	.ctrlbit	= S3C2410_CLKCON_RTC,
+};
+
+static struct clk clk_watchdog = {
+	.name		= "watchdog",
+	.parent		= &clk_p,
+	.ctrlbit	= 0,
+};
+
+static struct clk clk_usb_bus_host = {
+	.name		= "usb-bus-host",
+	.parent		= &clk_usb_bus,
+};
+
+static struct clk clk_usb_bus_gadget = {
+	.name		= "usb-bus-gadget",
+	.parent		= &clk_usb_bus,
+};
+
+static struct clk *init_clocks[] = {
+	&clk_lcd,
+	&clk_gpio,
+	&clk_usb_host,
+	&clk_usb_device,
+	&clk_timers,
+	&s3c24xx_clk_uart0,
+	&s3c24xx_clk_uart1,
+	&s3c24xx_clk_uart2,
+	&clk_rtc,
+	&clk_watchdog,
+	&clk_usb_bus_host,
+	&clk_usb_bus_gadget,
 };
 
 /* s3c2410_baseclk_add()
@@ -195,7 +230,6 @@
 {
 	unsigned long clkslow = __raw_readl(S3C2410_CLKSLOW);
 	unsigned long clkcon  = __raw_readl(S3C2410_CLKCON);
-	struct clk *clkp;
 	struct clk *xtal;
 	int ret;
 	int ptr;
@@ -207,8 +241,9 @@
 
 	/* register clocks from clock array */
 
-	clkp = init_clocks;
-	for (ptr = 0; ptr < ARRAY_SIZE(init_clocks); ptr++, clkp++) {
+	for (ptr = 0; ptr < ARRAY_SIZE(init_clocks); ptr++) {
+		struct clk *clkp = init_clocks[ptr];
+
 		/* ensure that we note the clock state */
 
 		clkp->usage = clkcon & clkp->ctrlbit ? 1 : 0;
diff --git a/arch/arm/mach-s3c24xx/clock-s3c2440.c b/arch/arm/mach-s3c24xx/clock-s3c2440.c
index 1069b56..aaf006d 100644
--- a/arch/arm/mach-s3c24xx/clock-s3c2440.c
+++ b/arch/arm/mach-s3c24xx/clock-s3c2440.c
@@ -166,6 +166,9 @@
 	CLKDEV_INIT(NULL, "clk_uart_baud1", &s3c24xx_uclk),
 	CLKDEV_INIT(NULL, "clk_uart_baud2", &clk_p),
 	CLKDEV_INIT(NULL, "clk_uart_baud3", &s3c2440_clk_fclk_n),
+	CLKDEV_INIT("s3c2440-uart.0", "uart", &s3c24xx_clk_uart0),
+	CLKDEV_INIT("s3c2440-uart.1", "uart", &s3c24xx_clk_uart1),
+	CLKDEV_INIT("s3c2440-uart.2", "uart", &s3c24xx_clk_uart2),
 	CLKDEV_INIT("s3c2440-camif", "camera", &s3c2440_clk_cam_upll),
 };
 
diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c
index e115f67..c5be60d 100644
--- a/arch/arm/mach-shmobile/board-armadillo800eva.c
+++ b/arch/arm/mach-shmobile/board-armadillo800eva.c
@@ -1162,9 +1162,6 @@
 	gpio_request_one(61, GPIOF_OUT_INIT_HIGH, NULL); /* LCDDON */
 	gpio_request_one(202, GPIOF_OUT_INIT_LOW, NULL); /* LCD0_LED_CONT */
 
-	/* Touchscreen */
-	gpio_request_one(166, GPIOF_OUT_INIT_HIGH, NULL); /* TP_RST_B */
-
 	/* GETHER */
 	gpio_request_one(18, GPIOF_OUT_INIT_HIGH, NULL); /* PHY_RST */
 
diff --git a/arch/arm/mach-shmobile/board-bockw.c b/arch/arm/mach-shmobile/board-bockw.c
index d555464..3354a85 100644
--- a/arch/arm/mach-shmobile/board-bockw.c
+++ b/arch/arm/mach-shmobile/board-bockw.c
@@ -167,7 +167,13 @@
 				  "usb1", "usb1"),
 	/* SDHI0 */
 	PIN_MAP_MUX_GROUP_DEFAULT("sh_mobile_sdhi.0", "pfc-r8a7778",
-				  "sdhi0", "sdhi0"),
+				  "sdhi0_data4", "sdhi0"),
+	PIN_MAP_MUX_GROUP_DEFAULT("sh_mobile_sdhi.0", "pfc-r8a7778",
+				  "sdhi0_ctrl", "sdhi0"),
+	PIN_MAP_MUX_GROUP_DEFAULT("sh_mobile_sdhi.0", "pfc-r8a7778",
+				  "sdhi0_cd", "sdhi0"),
+	PIN_MAP_MUX_GROUP_DEFAULT("sh_mobile_sdhi.0", "pfc-r8a7778",
+				  "sdhi0_wp", "sdhi0"),
 };
 
 #define FPGA	0x18200000
diff --git a/arch/arm/mach-shmobile/board-lager.c b/arch/arm/mach-shmobile/board-lager.c
index d73e21d..8d6bd5c 100644
--- a/arch/arm/mach-shmobile/board-lager.c
+++ b/arch/arm/mach-shmobile/board-lager.c
@@ -59,7 +59,7 @@
 #define GPIO_KEY(c, g, d, ...) \
 	{ .code = c, .gpio = g, .desc = d, .active_low = 1 }
 
-static __initdata struct gpio_keys_button gpio_buttons[] = {
+static struct gpio_keys_button gpio_buttons[] = {
 	GPIO_KEY(KEY_4,		RCAR_GP_PIN(1, 28),	"SW2-pin4"),
 	GPIO_KEY(KEY_3,		RCAR_GP_PIN(1, 26),	"SW2-pin3"),
 	GPIO_KEY(KEY_2,		RCAR_GP_PIN(1, 24),	"SW2-pin2"),
diff --git a/arch/arm/mach-sti/Kconfig b/arch/arm/mach-sti/Kconfig
index d04e3bf..835833e 100644
--- a/arch/arm/mach-sti/Kconfig
+++ b/arch/arm/mach-sti/Kconfig
@@ -11,8 +11,9 @@
 	select HAVE_SMP
 	select HAVE_ARM_SCU if SMP
 	select ARCH_REQUIRE_GPIOLIB
-	select ARM_ERRATA_720789
 	select ARM_ERRATA_754322
+	select ARM_ERRATA_764369
+	select ARM_ERRATA_775420
 	select PL310_ERRATA_753970 if CACHE_PL310
 	select PL310_ERRATA_769419 if CACHE_PL310
 	help
diff --git a/arch/arm/mach-sti/headsmp.S b/arch/arm/mach-sti/headsmp.S
index 78ebc75..4c09bae 100644
--- a/arch/arm/mach-sti/headsmp.S
+++ b/arch/arm/mach-sti/headsmp.S
@@ -16,8 +16,6 @@
 #include <linux/linkage.h>
 #include <linux/init.h>
 
-	__INIT
-
 /*
  * ST specific entry point for secondary CPUs.  This provides
  * a "holding pen" into which all secondary cores are held until we're
diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c
index 5b799c2..5f25256 100644
--- a/arch/arm/mach-zynq/common.c
+++ b/arch/arm/mach-zynq/common.c
@@ -91,7 +91,7 @@
 	zynq_scu_map_io();
 }
 
-static void zynq_system_reset(char mode, const char *cmd)
+static void zynq_system_reset(enum reboot_mode mode, const char *cmd)
 {
 	zynq_slcr_system_reset();
 }
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 6cacdc8..cd2c88e 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -421,24 +421,28 @@
 	select CPU_USE_DOMAINS if MMU
 	select NEEDS_SYSCALL_FOR_CMPXCHG if SMP
 	select TLS_REG_EMUL if SMP || !MMU
+	select NEED_KUSER_HELPERS
 
 config CPU_32v4
 	bool
 	select CPU_USE_DOMAINS if MMU
 	select NEEDS_SYSCALL_FOR_CMPXCHG if SMP
 	select TLS_REG_EMUL if SMP || !MMU
+	select NEED_KUSER_HELPERS
 
 config CPU_32v4T
 	bool
 	select CPU_USE_DOMAINS if MMU
 	select NEEDS_SYSCALL_FOR_CMPXCHG if SMP
 	select TLS_REG_EMUL if SMP || !MMU
+	select NEED_KUSER_HELPERS
 
 config CPU_32v5
 	bool
 	select CPU_USE_DOMAINS if MMU
 	select NEEDS_SYSCALL_FOR_CMPXCHG if SMP
 	select TLS_REG_EMUL if SMP || !MMU
+	select NEED_KUSER_HELPERS
 
 config CPU_32v6
 	bool
@@ -776,6 +780,7 @@
 
 config TLS_REG_EMUL
 	bool
+	select NEED_KUSER_HELPERS
 	help
 	  An SMP system using a pre-ARMv6 processor (there are apparently
 	  a few prototypes like that in existence) and therefore access to
@@ -783,11 +788,43 @@
 
 config NEEDS_SYSCALL_FOR_CMPXCHG
 	bool
+	select NEED_KUSER_HELPERS
 	help
 	  SMP on a pre-ARMv6 processor?  Well OK then.
 	  Forget about fast user space cmpxchg support.
 	  It is just not possible.
 
+config NEED_KUSER_HELPERS
+	bool
+
+config KUSER_HELPERS
+	bool "Enable kuser helpers in vector page" if !NEED_KUSER_HELPERS
+	default y
+	help
+	  Warning: disabling this option may break user programs.
+
+	  Provide kuser helpers in the vector page.  The kernel provides
+	  helper code to userspace in read only form at a fixed location
+	  in the high vector page to allow userspace to be independent of
+	  the CPU type fitted to the system.  This permits binaries to be
+	  run on ARMv4 through to ARMv7 without modification.
+
+	  See Documentation/arm/kernel_user_helpers.txt for details.
+
+	  However, the fixed address nature of these helpers can be used
+	  by ROP (return orientated programming) authors when creating
+	  exploits.
+
+	  If all of the binaries and libraries which run on your platform
+	  are built specifically for your platform, and make no use of
+	  these helpers, then you can turn this option off to hinder
+	  such exploits. However, in that case, if a binary or library
+	  relying on those helpers is run, it will receive a SIGILL signal,
+	  which will terminate the program.
+
+	  Say N here only if you are absolutely certain that you do not
+	  need these helpers; otherwise, the safe option is to say Y.
+
 config DMA_CACHE_RWFO
 	bool "Enable read/write for ownership DMA cache maintenance"
 	depends on CPU_V6K && SMP
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index b55b101..4a05444 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -245,7 +245,8 @@
 	if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) {
 		local_flush_bp_all();
 		local_flush_tlb_all();
-		dummy_flush_tlb_a15_erratum();
+		if (erratum_a15_798181())
+			dummy_flush_tlb_a15_erratum();
 	}
 
 	atomic64_set(&per_cpu(active_asids, cpu), asid);
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 4f56617..53cdbd3 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -989,6 +989,7 @@
 
 void __init sanity_check_meminfo(void)
 {
+	phys_addr_t memblock_limit = 0;
 	int i, j, highmem = 0;
 	phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1;
 
@@ -1052,9 +1053,32 @@
 			bank->size = size_limit;
 		}
 #endif
-		if (!bank->highmem && bank->start + bank->size > arm_lowmem_limit)
-			arm_lowmem_limit = bank->start + bank->size;
+		if (!bank->highmem) {
+			phys_addr_t bank_end = bank->start + bank->size;
 
+			if (bank_end > arm_lowmem_limit)
+				arm_lowmem_limit = bank_end;
+
+			/*
+			 * Find the first non-section-aligned page, and point
+			 * memblock_limit at it. This relies on rounding the
+			 * limit down to be section-aligned, which happens at
+			 * the end of this function.
+			 *
+			 * With this algorithm, the start or end of almost any
+			 * bank can be non-section-aligned. The only exception
+			 * is that the start of the bank 0 must be section-
+			 * aligned, since otherwise memory would need to be
+			 * allocated when mapping the start of bank 0, which
+			 * occurs before any free memory is mapped.
+			 */
+			if (!memblock_limit) {
+				if (!IS_ALIGNED(bank->start, SECTION_SIZE))
+					memblock_limit = bank->start;
+				else if (!IS_ALIGNED(bank_end, SECTION_SIZE))
+					memblock_limit = bank_end;
+			}
+		}
 		j++;
 	}
 #ifdef CONFIG_HIGHMEM
@@ -1079,7 +1103,18 @@
 #endif
 	meminfo.nr_banks = j;
 	high_memory = __va(arm_lowmem_limit - 1) + 1;
-	memblock_set_current_limit(arm_lowmem_limit);
+
+	/*
+	 * Round the memblock limit down to a section size.  This
+	 * helps to ensure that we will allocate memory from the
+	 * last full section, which should be mapped.
+	 */
+	if (memblock_limit)
+		memblock_limit = round_down(memblock_limit, SECTION_SIZE);
+	if (!memblock_limit)
+		memblock_limit = arm_lowmem_limit;
+
+	memblock_set_current_limit(memblock_limit);
 }
 
 static inline void prepare_page_table(void)
@@ -1160,7 +1195,7 @@
 	/*
 	 * Allocate the vector page early.
 	 */
-	vectors = early_alloc(PAGE_SIZE);
+	vectors = early_alloc(PAGE_SIZE * 2);
 
 	early_trap_init(vectors);
 
@@ -1205,15 +1240,27 @@
 	map.pfn = __phys_to_pfn(virt_to_phys(vectors));
 	map.virtual = 0xffff0000;
 	map.length = PAGE_SIZE;
+#ifdef CONFIG_KUSER_HELPERS
 	map.type = MT_HIGH_VECTORS;
+#else
+	map.type = MT_LOW_VECTORS;
+#endif
 	create_mapping(&map);
 
 	if (!vectors_high()) {
 		map.virtual = 0;
+		map.length = PAGE_SIZE * 2;
 		map.type = MT_LOW_VECTORS;
 		create_mapping(&map);
 	}
 
+	/* Now create a kernel read-only mapping */
+	map.pfn += 1;
+	map.virtual = 0xffff0000 + PAGE_SIZE;
+	map.length = PAGE_SIZE;
+	map.type = MT_LOW_VECTORS;
+	create_mapping(&map);
+
 	/*
 	 * Ask the machine support to map in the statically mapped devices.
 	 */
@@ -1276,8 +1323,6 @@
 {
 	void *zero_page;
 
-	memblock_set_current_limit(arm_lowmem_limit);
-
 	build_mem_type_table();
 	prepare_page_table();
 	map_lowmem();
diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S
index f64afb9..bdd3be4 100644
--- a/arch/arm/mm/proc-v7-2level.S
+++ b/arch/arm/mm/proc-v7-2level.S
@@ -110,7 +110,7 @@
  ARM(	str	r3, [r0, #2048]! )
  THUMB(	add	r0, r0, #2048 )
  THUMB(	str	r3, [r0] )
-	ALT_SMP(mov	pc,lr)
+	ALT_SMP(W(nop))
 	ALT_UP (mcr	p15, 0, r0, c7, c10, 1)		@ flush_pte
 #endif
 	mov	pc, lr
diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S
index c36ac69..01a719e 100644
--- a/arch/arm/mm/proc-v7-3level.S
+++ b/arch/arm/mm/proc-v7-3level.S
@@ -81,7 +81,7 @@
 	tst	r3, #1 << (55 - 32)		@ L_PTE_DIRTY
 	orreq	r2, #L_PTE_RDONLY
 1:	strd	r2, r3, [r0]
-	ALT_SMP(mov	pc, lr)
+	ALT_SMP(W(nop))
 	ALT_UP (mcr	p15, 0, r0, c7, c10, 1)		@ flush_pte
 #endif
 	mov	pc, lr
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 5c6d5a3..73398bc 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -75,13 +75,14 @@
 ENDPROC(cpu_v7_do_idle)
 
 ENTRY(cpu_v7_dcache_clean_area)
-	ALT_SMP(mov	pc, lr)			@ MP extensions imply L1 PTW
-	ALT_UP(W(nop))
-	dcache_line_size r2, r3
-1:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
+	ALT_SMP(W(nop))			@ MP extensions imply L1 PTW
+	ALT_UP_B(1f)
+	mov	pc, lr
+1:	dcache_line_size r2, r3
+2:	mcr	p15, 0, r0, c7, c10, 1		@ clean D entry
 	add	r0, r0, r2
 	subs	r1, r1, r2
-	bhi	1b
+	bhi	2b
 	dsb
 	mov	pc, lr
 ENDPROC(cpu_v7_dcache_clean_area)
diff --git a/arch/arm/plat-samsung/Kconfig b/arch/arm/plat-samsung/Kconfig
index 3dc5cbe..a5b5ff6 100644
--- a/arch/arm/plat-samsung/Kconfig
+++ b/arch/arm/plat-samsung/Kconfig
@@ -29,6 +29,13 @@
 	help
 	  Base platform code for Samsung's S5P series SoC.
 
+config SAMSUNG_PM
+	bool
+	depends on PM && (PLAT_S3C24XX || ARCH_S3C64XX || ARCH_S5P64X0 || S5P_PM)
+	default y
+	help
+	  Base platform power management code for samsung code
+
 if PLAT_SAMSUNG
 
 # boot configurations
diff --git a/arch/arm/plat-samsung/Makefile b/arch/arm/plat-samsung/Makefile
index 98d07d8..199bbe3 100644
--- a/arch/arm/plat-samsung/Makefile
+++ b/arch/arm/plat-samsung/Makefile
@@ -51,7 +51,7 @@
 
 # PM support
 
-obj-$(CONFIG_PM)		+= pm.o
+obj-$(CONFIG_SAMSUNG_PM)	+= pm.o
 obj-$(CONFIG_SAMSUNG_PM_GPIO)	+= pm-gpio.o
 obj-$(CONFIG_SAMSUNG_PM_CHECK)	+= pm-check.o
 
diff --git a/arch/arm/plat-samsung/include/plat/clock.h b/arch/arm/plat-samsung/include/plat/clock.h
index a62753d..df45d6e 100644
--- a/arch/arm/plat-samsung/include/plat/clock.h
+++ b/arch/arm/plat-samsung/include/plat/clock.h
@@ -83,6 +83,11 @@
 extern struct clksrc_clk clk_epllref;
 extern struct clksrc_clk clk_esysclk;
 
+/* S3C24XX UART clocks */
+extern struct clk s3c24xx_clk_uart0;
+extern struct clk s3c24xx_clk_uart1;
+extern struct clk s3c24xx_clk_uart2;
+
 /* S3C64XX specific clocks */
 extern struct clk clk_h2;
 extern struct clk clk_27m;
diff --git a/arch/arm/plat-samsung/include/plat/pm.h b/arch/arm/plat-samsung/include/plat/pm.h
index 5d47ca3..6bc1a8f 100644
--- a/arch/arm/plat-samsung/include/plat/pm.h
+++ b/arch/arm/plat-samsung/include/plat/pm.h
@@ -19,7 +19,7 @@
 
 struct device;
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_SAMSUNG_PM
 
 extern __init int s3c_pm_init(void);
 extern __init int s3c64xx_pm_init(void);
@@ -58,8 +58,6 @@
 
 /* from sleep.S */
 
-extern void s3c_cpu_resume(void);
-
 extern int s3c2410_cpu_suspend(unsigned long);
 
 /* sleep save info */
@@ -106,12 +104,14 @@
 extern void s3c_pm_do_restore(struct sleep_save *ptr, int count);
 extern void s3c_pm_do_restore_core(struct sleep_save *ptr, int count);
 
-#ifdef CONFIG_PM
+#ifdef CONFIG_SAMSUNG_PM
 extern int s3c_irq_wake(struct irq_data *data, unsigned int state);
 extern int s3c_irqext_wake(struct irq_data *data, unsigned int state);
+extern void s3c_cpu_resume(void);
 #else
 #define s3c_irq_wake NULL
 #define s3c_irqext_wake NULL
+#define s3c_cpu_resume NULL
 #endif
 
 /* PM debug functions */
diff --git a/arch/arm/plat-samsung/init.c b/arch/arm/plat-samsung/init.c
index 3e5c461..50a3ea0 100644
--- a/arch/arm/plat-samsung/init.c
+++ b/arch/arm/plat-samsung/init.c
@@ -55,12 +55,13 @@
 
 	printk("CPU %s (id 0x%08lx)\n", cpu->name, idcode);
 
-	if (cpu->map_io == NULL || cpu->init == NULL) {
+	if (cpu->init == NULL) {
 		printk(KERN_ERR "CPU %s support not enabled\n", cpu->name);
 		panic("Unsupported Samsung CPU");
 	}
 
-	cpu->map_io();
+	if (cpu->map_io)
+		cpu->map_io();
 }
 
 /* s3c24xx_init_clocks
diff --git a/arch/arm/plat-samsung/pm.c b/arch/arm/plat-samsung/pm.c
index ea36136..d0c2301 100644
--- a/arch/arm/plat-samsung/pm.c
+++ b/arch/arm/plat-samsung/pm.c
@@ -80,7 +80,7 @@
 
 #ifdef CONFIG_SAMSUNG_PM_DEBUG
 
-static struct pm_uart_save uart_save[CONFIG_SERIAL_SAMSUNG_UARTS];
+static struct pm_uart_save uart_save;
 
 static void s3c_pm_save_uart(unsigned int uart, struct pm_uart_save *save)
 {
@@ -101,11 +101,7 @@
 
 static void s3c_pm_save_uarts(void)
 {
-	struct pm_uart_save *save = uart_save;
-	unsigned int uart;
-
-	for (uart = 0; uart < CONFIG_SERIAL_SAMSUNG_UARTS; uart++, save++)
-		s3c_pm_save_uart(uart, save);
+	s3c_pm_save_uart(CONFIG_DEBUG_S3C_UART, &uart_save);
 }
 
 static void s3c_pm_restore_uart(unsigned int uart, struct pm_uart_save *save)
@@ -126,11 +122,7 @@
 
 static void s3c_pm_restore_uarts(void)
 {
-	struct pm_uart_save *save = uart_save;
-	unsigned int uart;
-
-	for (uart = 0; uart < CONFIG_SERIAL_SAMSUNG_UARTS; uart++, save++)
-		s3c_pm_restore_uart(uart, save);
+	s3c_pm_restore_uart(CONFIG_DEBUG_S3C_UART, &uart_save);
 }
 #else
 static void s3c_pm_save_uarts(void) { }
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index f71c37e..8a6295c 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -170,9 +170,10 @@
 	per_cpu(xen_vcpu, cpu) = vcpup;
 
 	enable_percpu_irq(xen_events_irq, 0);
+	put_cpu();
 }
 
-static void xen_restart(char str, const char *cmd)
+static void xen_restart(enum reboot_mode reboot_mode, const char *cmd)
 {
 	struct sched_shutdown r = { .reason = SHUTDOWN_reboot };
 	int rc;
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index c92de41..b25763b 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -42,14 +42,15 @@
 #define	TPIDR_EL1	18	/* Thread ID, Privileged */
 #define	AMAIR_EL1	19	/* Aux Memory Attribute Indirection Register */
 #define	CNTKCTL_EL1	20	/* Timer Control Register (EL1) */
+#define	PAR_EL1		21	/* Physical Address Register */
 /* 32bit specific registers. Keep them at the end of the range */
-#define	DACR32_EL2	21	/* Domain Access Control Register */
-#define	IFSR32_EL2	22	/* Instruction Fault Status Register */
-#define	FPEXC32_EL2	23	/* Floating-Point Exception Control Register */
-#define	DBGVCR32_EL2	24	/* Debug Vector Catch Register */
-#define	TEECR32_EL1	25	/* ThumbEE Configuration Register */
-#define	TEEHBR32_EL1	26	/* ThumbEE Handler Base Register */
-#define	NR_SYS_REGS	27
+#define	DACR32_EL2	22	/* Domain Access Control Register */
+#define	IFSR32_EL2	23	/* Instruction Fault Status Register */
+#define	FPEXC32_EL2	24	/* Floating-Point Exception Control Register */
+#define	DBGVCR32_EL2	25	/* Debug Vector Catch Register */
+#define	TEECR32_EL1	26	/* ThumbEE Configuration Register */
+#define	TEEHBR32_EL1	27	/* ThumbEE Handler Base Register */
+#define	NR_SYS_REGS	28
 
 /* 32bit mapping */
 #define c0_MPIDR	(MPIDR_EL1 * 2)	/* MultiProcessor ID Register */
@@ -69,6 +70,8 @@
 #define c5_AIFSR	(AFSR1_EL1 * 2)	/* Auxiliary Instr Fault Status R */
 #define c6_DFAR		(FAR_EL1 * 2)	/* Data Fault Address Register */
 #define c6_IFAR		(c6_DFAR + 1)	/* Instruction Fault Address Register */
+#define c7_PAR		(PAR_EL1 * 2)	/* Physical Address Register */
+#define c7_PAR_high	(c7_PAR + 1)	/* PAR top 32 bits */
 #define c10_PRRR	(MAIR_EL1 * 2)	/* Primary Region Remap Register */
 #define c10_NMRR	(c10_PRRR + 1)	/* Normal Memory Remap Register */
 #define c12_VBAR	(VBAR_EL1 * 2)	/* Vector Base Address Register */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 644d739..0859a4d 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -129,7 +129,7 @@
 	struct kvm_mmu_memory_cache mmu_page_cache;
 
 	/* Target CPU and feature flags */
-	u32 target;
+	int target;
 	DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
 
 	/* Detect first run of a vcpu */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 3659e46..23a3c47 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -24,10 +24,10 @@
 #include <linux/compiler.h>
 
 #ifndef CONFIG_ARM64_64K_PAGES
-#define THREAD_SIZE_ORDER	1
+#define THREAD_SIZE_ORDER	2
 #endif
 
-#define THREAD_SIZE		8192
+#define THREAD_SIZE		16384
 #define THREAD_START_SP		(THREAD_SIZE - 16)
 
 #ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index 46b3beb..717031a 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -35,6 +35,7 @@
 	struct mm_struct	*mm;
 	unsigned int		fullmm;
 	struct vm_area_struct	*vma;
+	unsigned long		start, end;
 	unsigned long		range_start;
 	unsigned long		range_end;
 	unsigned int		nr;
@@ -97,10 +98,12 @@
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int fullmm)
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
-	tlb->fullmm = fullmm;
+	tlb->fullmm = !(start | (end+1));
+	tlb->start = start;
+	tlb->end = end;
 	tlb->vma = NULL;
 	tlb->max = ARRAY_SIZE(tlb->local);
 	tlb->pages = tlb->local;
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 4398272..26e310c 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -21,6 +21,7 @@
 #define BOOT_CPU_MODE_EL2	(0x0e12b007)
 
 #ifndef __ASSEMBLY__
+#include <asm/cacheflush.h>
 
 /*
  * __boot_cpu_mode records what mode CPUs were booted in.
@@ -36,9 +37,20 @@
 void __hyp_set_vectors(phys_addr_t phys_vector_base);
 phys_addr_t __hyp_get_vectors(void);
 
+static inline void sync_boot_mode(void)
+{
+	/*
+	 * As secondaries write to __boot_cpu_mode with caches disabled, we
+	 * must flush the corresponding cache entries to ensure the visibility
+	 * of their writes.
+	 */
+	__flush_dcache_area(__boot_cpu_mode, sizeof(__boot_cpu_mode));
+}
+
 /* Reports the availability of HYP mode */
 static inline bool is_hyp_mode_available(void)
 {
+	sync_boot_mode();
 	return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 &&
 		__boot_cpu_mode[1] == BOOT_CPU_MODE_EL2);
 }
@@ -46,6 +58,7 @@
 /* Check if the bootloader has booted CPUs in different modes */
 static inline bool is_hyp_mode_mismatched(void)
 {
+	sync_boot_mode();
 	return __boot_cpu_mode[0] != __boot_cpu_mode[1];
 }
 
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 1d13142..6ad781b 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -121,7 +121,7 @@
 
 	.macro	get_thread_info, rd
 	mov	\rd, sp
-	and	\rd, \rd, #~((1 << 13) - 1)	// top of 8K stack
+	and	\rd, \rd, #~(THREAD_SIZE - 1)	// top of stack
 	.endm
 
 /*
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 9ba33c4..12e6ccb 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -107,7 +107,12 @@
 static int
 armpmu_map_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
 {
-	int mapping = (*event_map)[config];
+	int mapping;
+
+	if (config >= PERF_COUNT_HW_MAX)
+		return -EINVAL;
+
+	mapping = (*event_map)[config];
 	return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
 }
 
@@ -317,6 +322,9 @@
 	struct hw_perf_event fake_event = event->hw;
 	struct pmu *leader_pmu = event->group_leader->pmu;
 
+	if (is_software_event(event))
+		return 1;
+
 	if (event->pmu != leader_pmu || event->state <= PERF_EVENT_STATE_OFF)
 		return 1;
 
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 1788bf6..57fb55c 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -81,7 +81,7 @@
 void (*pm_power_off)(void);
 EXPORT_SYMBOL_GPL(pm_power_off);
 
-void (*arm_pm_restart)(char str, const char *cmd);
+void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
 EXPORT_SYMBOL_GPL(arm_pm_restart);
 
 void arch_cpu_idle_prepare(void)
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index ff985e3..1ac0bbb 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -214,6 +214,7 @@
 	mrs	x21,	tpidr_el1
 	mrs	x22, 	amair_el1
 	mrs	x23, 	cntkctl_el1
+	mrs	x24,	par_el1
 
 	stp	x4, x5, [x3]
 	stp	x6, x7, [x3, #16]
@@ -225,6 +226,7 @@
 	stp	x18, x19, [x3, #112]
 	stp	x20, x21, [x3, #128]
 	stp	x22, x23, [x3, #144]
+	str	x24, [x3, #160]
 .endm
 
 .macro restore_sysregs
@@ -243,6 +245,7 @@
 	ldp	x18, x19, [x3, #112]
 	ldp	x20, x21, [x3, #128]
 	ldp	x22, x23, [x3, #144]
+	ldr	x24, [x3, #160]
 
 	msr	vmpidr_el2,	x4
 	msr	csselr_el1,	x5
@@ -264,6 +267,7 @@
 	msr	tpidr_el1,	x21
 	msr	amair_el1,	x22
 	msr	cntkctl_el1,	x23
+	msr	par_el1,	x24
 .endm
 
 .macro skip_32bit_state tmp, target
@@ -600,6 +604,8 @@
 
 // void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
 ENTRY(__kvm_tlb_flush_vmid_ipa)
+	dsb	ishst
+
 	kern_hyp_va	x0
 	ldr	x2, [x0, #KVM_VTTBR]
 	msr	vttbr_el2, x2
@@ -621,6 +627,7 @@
 ENDPROC(__kvm_tlb_flush_vmid_ipa)
 
 ENTRY(__kvm_flush_vm_context)
+	dsb	ishst
 	tlbi	alle1is
 	ic	ialluis
 	dsb	sy
@@ -753,6 +760,10 @@
 	 */
 	tbnz	x1, #7, 1f	// S1PTW is set
 
+	/* Preserve PAR_EL1 */
+	mrs	x3, par_el1
+	push	x3, xzr
+
 	/*
 	 * Permission fault, HPFAR_EL2 is invalid.
 	 * Resolve the IPA the hard way using the guest VA.
@@ -766,6 +777,8 @@
 
 	/* Read result */
 	mrs	x3, par_el1
+	pop	x0, xzr			// Restore PAR_EL1 from the stack
+	msr	par_el1, x0
 	tbnz	x3, #0, 3f		// Bail out if we failed the translation
 	ubfx	x3, x3, #12, #36	// Extract IPA
 	lsl	x3, x3, #4		// and present it like HPFAR
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 9492360..02e9d09 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -211,6 +211,9 @@
 	/* FAR_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b0110), CRm(0b0000), Op2(0b000),
 	  NULL, reset_unknown, FAR_EL1 },
+	/* PAR_EL1 */
+	{ Op0(0b11), Op1(0b000), CRn(0b0111), CRm(0b0100), Op2(0b000),
+	  NULL, reset_unknown, PAR_EL1 },
 
 	/* PMINTENSET_EL1 */
 	{ Op0(0b11), Op1(0b000), CRn(0b1001), CRm(0b1110), Op2(0b001),
diff --git a/arch/avr32/boards/atngw100/mrmt.c b/arch/avr32/boards/atngw100/mrmt.c
index f914319..7de083d 100644
--- a/arch/avr32/boards/atngw100/mrmt.c
+++ b/arch/avr32/boards/atngw100/mrmt.c
@@ -150,7 +150,6 @@
 static struct platform_device rmt_ts_device = {
 	.name	= "ucb1400_ts",
 	.id	= -1,
-	}
 };
 #endif
 
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index 33a9792..77d442a 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -158,6 +158,7 @@
 endmenu
 
 source "init/Kconfig"
+source "kernel/Kconfig.freezer"
 source "drivers/Kconfig"
 source "fs/Kconfig"
 
diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig
index 7913695..efbd292 100644
--- a/arch/ia64/configs/generic_defconfig
+++ b/arch/ia64/configs/generic_defconfig
@@ -31,7 +31,7 @@
 CONFIG_ACPI_DOCK=y
 CONFIG_ACPI_PROCESSOR=m
 CONFIG_ACPI_CONTAINER=m
-CONFIG_HOTPLUG_PCI=m
+CONFIG_HOTPLUG_PCI=y
 CONFIG_HOTPLUG_PCI_ACPI=m
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/ia64/configs/gensparse_defconfig b/arch/ia64/configs/gensparse_defconfig
index f8e9133..f64980d 100644
--- a/arch/ia64/configs/gensparse_defconfig
+++ b/arch/ia64/configs/gensparse_defconfig
@@ -25,7 +25,7 @@
 CONFIG_ACPI_FAN=m
 CONFIG_ACPI_PROCESSOR=m
 CONFIG_ACPI_CONTAINER=m
-CONFIG_HOTPLUG_PCI=m
+CONFIG_HOTPLUG_PCI=y
 CONFIG_HOTPLUG_PCI_ACPI=m
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig
index a5a9e02..0f4e9e4 100644
--- a/arch/ia64/configs/tiger_defconfig
+++ b/arch/ia64/configs/tiger_defconfig
@@ -31,7 +31,7 @@
 CONFIG_ACPI_FAN=m
 CONFIG_ACPI_PROCESSOR=m
 CONFIG_ACPI_CONTAINER=m
-CONFIG_HOTPLUG_PCI=m
+CONFIG_HOTPLUG_PCI=y
 CONFIG_HOTPLUG_PCI_ACPI=m
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/ia64/configs/xen_domu_defconfig b/arch/ia64/configs/xen_domu_defconfig
index 37b9b42..b025acf 100644
--- a/arch/ia64/configs/xen_domu_defconfig
+++ b/arch/ia64/configs/xen_domu_defconfig
@@ -32,7 +32,7 @@
 CONFIG_ACPI_FAN=m
 CONFIG_ACPI_PROCESSOR=m
 CONFIG_ACPI_CONTAINER=m
-CONFIG_HOTPLUG_PCI=m
+CONFIG_HOTPLUG_PCI=y
 CONFIG_HOTPLUG_PCI_ACPI=m
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index ef3a9de..bc5efc7 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -22,7 +22,7 @@
  * unmapping a portion of the virtual address space, these hooks are called according to
  * the following template:
  *
- *	tlb <- tlb_gather_mmu(mm, full_mm_flush);	// start unmap for address space MM
+ *	tlb <- tlb_gather_mmu(mm, start, end);		// start unmap for address space MM
  *	{
  *	  for each vma that needs a shootdown do {
  *	    tlb_start_vma(tlb, vma);
@@ -58,6 +58,7 @@
 	unsigned int		max;
 	unsigned char		fullmm;		/* non-zero means full mm flush */
 	unsigned char		need_flush;	/* really unmapped some PTEs? */
+	unsigned long		start, end;
 	unsigned long		start_addr;
 	unsigned long		end_addr;
 	struct page		**pages;
@@ -155,13 +156,15 @@
 
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 	tlb->max = ARRAY_SIZE(tlb->local);
 	tlb->pages = tlb->local;
 	tlb->nr = 0;
-	tlb->fullmm = full_mm_flush;
+	tlb->fullmm = !(start | (end+1));
+	tlb->start = start;
+	tlb->end = end;
 	tlb->start_addr = ~0UL;
 }
 
diff --git a/arch/m68k/emu/natfeat.c b/arch/m68k/emu/natfeat.c
index 2291a7d..fa277ae 100644
--- a/arch/m68k/emu/natfeat.c
+++ b/arch/m68k/emu/natfeat.c
@@ -18,9 +18,11 @@
 #include <asm/machdep.h>
 #include <asm/natfeat.h>
 
+extern long nf_get_id2(const char *feature_name);
+
 asm("\n"
-"	.global nf_get_id,nf_call\n"
-"nf_get_id:\n"
+"	.global nf_get_id2,nf_call\n"
+"nf_get_id2:\n"
 "	.short	0x7300\n"
 "	rts\n"
 "nf_call:\n"
@@ -29,12 +31,25 @@
 "1:	moveq.l	#0,%d0\n"
 "	rts\n"
 "	.section __ex_table,\"a\"\n"
-"	.long	nf_get_id,1b\n"
+"	.long	nf_get_id2,1b\n"
 "	.long	nf_call,1b\n"
 "	.previous");
-EXPORT_SYMBOL_GPL(nf_get_id);
 EXPORT_SYMBOL_GPL(nf_call);
 
+long nf_get_id(const char *feature_name)
+{
+	/* feature_name may be in vmalloc()ed memory, so make a copy */
+	char name_copy[32];
+	size_t n;
+
+	n = strlcpy(name_copy, feature_name, sizeof(name_copy));
+	if (n >= sizeof(name_copy))
+		return 0;
+
+	return nf_get_id2(name_copy);
+}
+EXPORT_SYMBOL_GPL(nf_get_id);
+
 void nfprint(const char *fmt, ...)
 {
 	static char buf[256];
diff --git a/arch/m68k/include/asm/div64.h b/arch/m68k/include/asm/div64.h
index 444ea8a..ef881cf 100644
--- a/arch/m68k/include/asm/div64.h
+++ b/arch/m68k/include/asm/div64.h
@@ -15,16 +15,17 @@
 		unsigned long long n64;				\
 	} __n;							\
 	unsigned long __rem, __upper;				\
+	unsigned long __base = (base);				\
 								\
 	__n.n64 = (n);						\
 	if ((__upper = __n.n32[0])) {				\
 		asm ("divul.l %2,%1:%0"				\
-			: "=d" (__n.n32[0]), "=d" (__upper)	\
-			: "d" (base), "0" (__n.n32[0]));	\
+		     : "=d" (__n.n32[0]), "=d" (__upper)	\
+		     : "d" (__base), "0" (__n.n32[0]));		\
 	}							\
 	asm ("divu.l %2,%1:%0"					\
-		: "=d" (__n.n32[1]), "=d" (__rem)		\
-		: "d" (base), "1" (__upper), "0" (__n.n32[1]));	\
+	     : "=d" (__n.n32[1]), "=d" (__rem)			\
+	     : "d" (__base), "1" (__upper), "0" (__n.n32[1]));	\
 	(n) = __n.n64;						\
 	__rem;							\
 })
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index d22a4ec..4fab522 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -28,7 +28,7 @@
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_IDLE_POLL_SETUP
 	select MODULES_USE_ELF_RELA
-	select CLONE_BACKWARDS
+	select CLONE_BACKWARDS3
 
 config SWAP
 	def_bool n
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index c3abed3..e12764c 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -114,6 +114,7 @@
 	select FW_CFE
 	select HW_HAS_PCI
 	select IRQ_CPU
+	select SYS_HAS_CPU_MIPS32_R1
 	select NO_EXCEPT_FILL
 	select SYS_SUPPORTS_32BIT_KERNEL
 	select SYS_SUPPORTS_LITTLE_ENDIAN
diff --git a/arch/mips/bcm47xx/Kconfig b/arch/mips/bcm47xx/Kconfig
index ba61192..2b8b118 100644
--- a/arch/mips/bcm47xx/Kconfig
+++ b/arch/mips/bcm47xx/Kconfig
@@ -2,7 +2,6 @@
 
 config BCM47XX_SSB
 	bool "SSB Support for Broadcom BCM47XX"
-	select SYS_HAS_CPU_MIPS32_R1
 	select SSB
 	select SSB_DRIVER_MIPS
 	select SSB_DRIVER_EXTIF
diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
index 1dc0860..fa44f3e 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -17,6 +17,8 @@
 #define current_cpu_type()	current_cpu_data.cputype
 #endif
 
+#define boot_cpu_type()		cpu_data[0].cputype
+
 /*
  * SMP assumption: Options of CPU 0 are a superset of all processors.
  * This is true for all known MIPS systems.
diff --git a/arch/mips/include/asm/mach-generic/spaces.h b/arch/mips/include/asm/mach-generic/spaces.h
index 5b2f2e6..9488fa5 100644
--- a/arch/mips/include/asm/mach-generic/spaces.h
+++ b/arch/mips/include/asm/mach-generic/spaces.h
@@ -25,8 +25,12 @@
 #else
 #define CAC_BASE		_AC(0x80000000, UL)
 #endif
+#ifndef IO_BASE
 #define IO_BASE			_AC(0xa0000000, UL)
+#endif
+#ifndef UNCAC_BASE
 #define UNCAC_BASE		_AC(0xa0000000, UL)
+#endif
 
 #ifndef MAP_BASE
 #ifdef CONFIG_KVM_GUEST
diff --git a/arch/mips/include/uapi/asm/siginfo.h b/arch/mips/include/uapi/asm/siginfo.h
index b7a2306..88e292b 100644
--- a/arch/mips/include/uapi/asm/siginfo.h
+++ b/arch/mips/include/uapi/asm/siginfo.h
@@ -25,11 +25,12 @@
 /*
  * Careful to keep union _sifields from shifting ...
  */
-#if __SIZEOF_LONG__ == 4
+#if _MIPS_SZLONG == 32
 #define __ARCH_SI_PREAMBLE_SIZE (3 * sizeof(int))
-#endif
-#if __SIZEOF_LONG__ == 8
+#elif _MIPS_SZLONG == 64
 #define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int))
+#else
+#error _MIPS_SZLONG neither 32 nor 64
 #endif
 
 #include <asm-generic/siginfo.h>
diff --git a/arch/mips/kernel/bmips_vec.S b/arch/mips/kernel/bmips_vec.S
index f739aed..bd79c4f 100644
--- a/arch/mips/kernel/bmips_vec.S
+++ b/arch/mips/kernel/bmips_vec.S
@@ -54,7 +54,11 @@
 	/* set up CPU1 CBR; move BASE to 0xa000_0000 */
 	li	k0, 0xff400000
 	mtc0	k0, $22, 6
-	li	k1, CKSEG1 | BMIPS_RELO_VECTOR_CONTROL_1
+	/* set up relocation vector address based on thread ID */
+	mfc0	k1, $22, 3
+	srl	k1, 16
+	andi	k1, 0x8000
+	or	k1, CKSEG1 | BMIPS_RELO_VECTOR_CONTROL_0
 	or	k0, k1
 	li	k1, 0xa0080000
 	sw	k1, 0(k0)
diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c
index c0bb4d5..126da74 100644
--- a/arch/mips/kernel/smp-bmips.c
+++ b/arch/mips/kernel/smp-bmips.c
@@ -66,6 +66,8 @@
 	int i, cpu = 1, boot_cpu = 0;
 
 #if defined(CONFIG_CPU_BMIPS4350) || defined(CONFIG_CPU_BMIPS4380)
+	int cpu_hw_intr;
+
 	/* arbitration priority */
 	clear_c0_brcm_cmt_ctrl(0x30);
 
@@ -79,15 +81,13 @@
 	 * MIPS interrupts 0,1 (SW INT 0,1) cross over to the other thread
 	 * MIPS interrupt 2 (HW INT 0) is the CPU0 L1 controller output
 	 * MIPS interrupt 3 (HW INT 1) is the CPU1 L1 controller output
-	 *
-	 * If booting from TP1, leave the existing CMT interrupt routing
-	 * such that TP0 responds to SW1 and TP1 responds to SW0.
 	 */
 	if (boot_cpu == 0)
-		change_c0_brcm_cmt_intr(0xf8018000,
-					(0x02 << 27) | (0x03 << 15));
+		cpu_hw_intr = 0x02;
 	else
-		change_c0_brcm_cmt_intr(0xf8018000, (0x1d << 27));
+		cpu_hw_intr = 0x1d;
+
+	change_c0_brcm_cmt_intr(0xf8018000, (cpu_hw_intr << 27) | (0x03 << 15));
 
 	/* single core, 2 threads (2 pipelines) */
 	max_cpus = 2;
@@ -202,9 +202,15 @@
 #if defined(CONFIG_CPU_BMIPS4350) || defined(CONFIG_CPU_BMIPS4380)
 	void __iomem *cbr = BMIPS_GET_CBR();
 	unsigned long old_vec;
+	unsigned long relo_vector;
+	int boot_cpu;
 
-	old_vec = __raw_readl(cbr + BMIPS_RELO_VECTOR_CONTROL_1);
-	__raw_writel(old_vec & ~0x20000000, cbr + BMIPS_RELO_VECTOR_CONTROL_1);
+	boot_cpu = !!(read_c0_brcm_cmt_local() & (1 << 31));
+	relo_vector = boot_cpu ? BMIPS_RELO_VECTOR_CONTROL_0 :
+			  BMIPS_RELO_VECTOR_CONTROL_1;
+
+	old_vec = __raw_readl(cbr + relo_vector);
+	__raw_writel(old_vec & ~0x20000000, cbr + relo_vector);
 
 	clear_c0_cause(smp_processor_id() ? C_SW1 : C_SW0);
 #elif defined(CONFIG_CPU_BMIPS5000)
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index e773659..46048d2 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -803,6 +803,32 @@
 				dec_insn.next_pc_inc;
 		return 1;
 		break;
+#ifdef CONFIG_CPU_CAVIUM_OCTEON
+	case lwc2_op: /* This is bbit0 on Octeon */
+		if ((regs->regs[insn.i_format.rs] & (1ull<<insn.i_format.rt)) == 0)
+			*contpc = regs->cp0_epc + 4 + (insn.i_format.simmediate << 2);
+		else
+			*contpc = regs->cp0_epc + 8;
+		return 1;
+	case ldc2_op: /* This is bbit032 on Octeon */
+		if ((regs->regs[insn.i_format.rs] & (1ull<<(insn.i_format.rt + 32))) == 0)
+			*contpc = regs->cp0_epc + 4 + (insn.i_format.simmediate << 2);
+		else
+			*contpc = regs->cp0_epc + 8;
+		return 1;
+	case swc2_op: /* This is bbit1 on Octeon */
+		if (regs->regs[insn.i_format.rs] & (1ull<<insn.i_format.rt))
+			*contpc = regs->cp0_epc + 4 + (insn.i_format.simmediate << 2);
+		else
+			*contpc = regs->cp0_epc + 8;
+		return 1;
+	case sdc2_op: /* This is bbit132 on Octeon */
+		if (regs->regs[insn.i_format.rs] & (1ull<<(insn.i_format.rt + 32)))
+			*contpc = regs->cp0_epc + 4 + (insn.i_format.simmediate << 2);
+		else
+			*contpc = regs->cp0_epc + 8;
+		return 1;
+#endif
 	case cop0_op:
 	case cop1_op:
 	case cop2_op:
diff --git a/arch/mips/oprofile/op_model_mipsxx.c b/arch/mips/oprofile/op_model_mipsxx.c
index e4b1140..3a2b6e9 100644
--- a/arch/mips/oprofile/op_model_mipsxx.c
+++ b/arch/mips/oprofile/op_model_mipsxx.c
@@ -166,7 +166,7 @@
 			reg.control[i] |= M_PERFCTL_USER;
 		if (ctr[i].exl)
 			reg.control[i] |= M_PERFCTL_EXL;
-		if (current_cpu_type() == CPU_XLR)
+		if (boot_cpu_type() == CPU_XLR)
 			reg.control[i] |= M_PERFCTL_COUNT_ALL_THREADS;
 		reg.counter[i] = 0x80000000 - ctr[i].count;
 	}
diff --git a/arch/mips/pnx833x/common/platform.c b/arch/mips/pnx833x/common/platform.c
index d22dc0d..2b7e837 100644
--- a/arch/mips/pnx833x/common/platform.c
+++ b/arch/mips/pnx833x/common/platform.c
@@ -206,11 +206,13 @@
 		.end   = PNX8335_IP3902_PORTS_END,
 		.flags = IORESOURCE_MEM,
 	},
+#ifdef CONFIG_SOC_PNX8335
 	[1] = {
 		.start = PNX8335_PIC_ETHERNET_INT,
 		.end   = PNX8335_PIC_ETHERNET_INT,
 		.flags = IORESOURCE_IRQ,
 	},
+#endif
 };
 
 static struct platform_device pnx833x_ethernet_device = {
diff --git a/arch/mips/powertv/asic/asic_devices.c b/arch/mips/powertv/asic/asic_devices.c
index 9f64c23..0238af1 100644
--- a/arch/mips/powertv/asic/asic_devices.c
+++ b/arch/mips/powertv/asic/asic_devices.c
@@ -529,8 +529,7 @@
  */
 void platform_release_memory(void *ptr, int size)
 {
-	free_reserved_area((unsigned long)ptr, (unsigned long)(ptr + size),
-			   -1, NULL);
+	free_reserved_area(ptr, ptr + size, -1, NULL);
 }
 EXPORT_SYMBOL(platform_release_memory);
 
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index 99dbab1..d60bf98 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -55,6 +55,7 @@
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
 
 menu "Processor type and features"
 
diff --git a/arch/parisc/configs/c8000_defconfig b/arch/parisc/configs/c8000_defconfig
new file mode 100644
index 0000000..f110063
--- /dev/null
+++ b/arch/parisc/configs/c8000_defconfig
@@ -0,0 +1,279 @@
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_FHANDLE=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_RELAY=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_RD_BZIP2=y
+CONFIG_RD_LZMA=y
+CONFIG_RD_LZO=y
+CONFIG_EXPERT=y
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_SLAB=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_BLK_DEV_INTEGRITY=y
+CONFIG_PA8X00=y
+CONFIG_MLONGCALLS=y
+CONFIG_64BIT=y
+CONFIG_SMP=y
+CONFIG_PREEMPT=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_IOMMU_CCIO=y
+CONFIG_PCI=y
+CONFIG_PCI_LBA=y
+# CONFIG_SUPERIO is not set
+# CONFIG_CHASSIS_LCD_LED is not set
+# CONFIG_PDC_CHASSIS is not set
+# CONFIG_PDC_CHASSIS_WARN is not set
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_BINFMT_MISC=m
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_XFRM_USER=m
+CONFIG_XFRM_SUB_POLICY=y
+CONFIG_NET_KEY=m
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_NET_IPIP=m
+CONFIG_IP_MROUTE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_SYN_COOKIES=y
+CONFIG_INET_AH=m
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_XFRM_MODE_BEET=m
+CONFIG_INET_DIAG=m
+# CONFIG_IPV6 is not set
+CONFIG_IP_DCCP=m
+# CONFIG_IP_DCCP_CCID3 is not set
+CONFIG_TIPC=m
+CONFIG_LLC2=m
+CONFIG_DNS_RESOLVER=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+# CONFIG_STANDALONE is not set
+CONFIG_PARPORT=y
+CONFIG_PARPORT_PC=y
+CONFIG_PARPORT_PC_FIFO=y
+CONFIG_BLK_DEV_UMEM=m
+CONFIG_BLK_DEV_LOOP=m
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_SX8=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=6144
+CONFIG_CDROM_PKTCDVD=m
+CONFIG_CDROM_PKTCDVD_WCACHE=y
+CONFIG_ATA_OVER_ETH=m
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDECD=y
+CONFIG_BLK_DEV_PLATFORM=y
+CONFIG_BLK_DEV_GENERIC=y
+CONFIG_BLK_DEV_SIIMAGE=y
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_SCH=m
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_SAS_LIBSAS=m
+CONFIG_ISCSI_TCP=m
+CONFIG_ISCSI_BOOT_SYSFS=m
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=y
+CONFIG_FUSION_SAS=y
+CONFIG_NETDEVICES=y
+CONFIG_DUMMY=m
+CONFIG_NETCONSOLE=m
+CONFIG_TUN=y
+CONFIG_E1000=y
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_MPPE=m
+CONFIG_PPPOE=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+# CONFIG_WLAN is not set
+CONFIG_INPUT_FF_MEMLESS=m
+# CONFIG_KEYBOARD_ATKBD is not set
+# CONFIG_KEYBOARD_HIL_OLD is not set
+# CONFIG_KEYBOARD_HIL is not set
+CONFIG_MOUSE_PS2=m
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_CM109=m
+CONFIG_SERIO_SERPORT=m
+CONFIG_SERIO_PARKBD=m
+CONFIG_SERIO_GSCPS2=m
+# CONFIG_HP_SDC is not set
+CONFIG_SERIO_PCIPS2=m
+CONFIG_SERIO_LIBPS2=y
+CONFIG_SERIO_RAW=m
+CONFIG_SERIAL_8250=y
+# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=8
+CONFIG_SERIAL_8250_RUNTIME_UARTS=8
+CONFIG_SERIAL_8250_EXTENDED=y
+# CONFIG_SERIAL_MUX is not set
+CONFIG_SERIAL_JSM=m
+CONFIG_PRINTER=y
+CONFIG_HW_RANDOM=y
+CONFIG_RAW_DRIVER=m
+CONFIG_PTP_1588_CLOCK=y
+CONFIG_SSB=m
+CONFIG_SSB_DRIVER_PCICORE=y
+CONFIG_AGP=y
+CONFIG_AGP_PARISC=y
+CONFIG_DRM=y
+CONFIG_DRM_RADEON=y
+CONFIG_FIRMWARE_EDID=y
+CONFIG_FB_FOREIGN_ENDIAN=y
+CONFIG_FB_MODE_HELPERS=y
+CONFIG_FB_TILEBLITTING=y
+# CONFIG_FB_STI is not set
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_GENERIC is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+# CONFIG_STI_CONSOLE is not set
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+# CONFIG_LOGO_LINUX_CLUT224 is not set
+CONFIG_SOUND=m
+CONFIG_SND=m
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_MIXER_OSS=m
+CONFIG_SND_PCM_OSS=m
+CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_VERBOSE_PRINTK=y
+CONFIG_SND_AD1889=m
+# CONFIG_SND_USB is not set
+# CONFIG_SND_GSC is not set
+CONFIG_HID_A4TECH=m
+CONFIG_HID_APPLE=m
+CONFIG_HID_BELKIN=m
+CONFIG_HID_CHERRY=m
+CONFIG_HID_CHICONY=m
+CONFIG_HID_CYPRESS=m
+CONFIG_HID_DRAGONRISE=m
+CONFIG_HID_EZKEY=m
+CONFIG_HID_KYE=m
+CONFIG_HID_GYRATION=m
+CONFIG_HID_TWINHAN=m
+CONFIG_HID_KENSINGTON=m
+CONFIG_HID_LOGITECH=m
+CONFIG_HID_LOGITECH_DJ=m
+CONFIG_HID_MICROSOFT=m
+CONFIG_HID_MONTEREY=m
+CONFIG_HID_NTRIG=m
+CONFIG_HID_ORTEK=m
+CONFIG_HID_PANTHERLORD=m
+CONFIG_HID_PETALYNX=m
+CONFIG_HID_SAMSUNG=m
+CONFIG_HID_SUNPLUS=m
+CONFIG_HID_GREENASIA=m
+CONFIG_HID_SMARTJOYPLUS=m
+CONFIG_HID_TOPSEED=m
+CONFIG_HID_THRUSTMASTER=m
+CONFIG_HID_ZEROPLUS=m
+CONFIG_USB_HID=m
+CONFIG_USB=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_STORAGE=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_XATTR=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=m
+CONFIG_REISERFS_FS=m
+CONFIG_REISERFS_PROC_INFO=y
+CONFIG_XFS_FS=m
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_QUOTA=y
+CONFIG_QFMT_V1=m
+CONFIG_QFMT_V2=m
+CONFIG_AUTOFS4_FS=m
+CONFIG_FUSE_FS=m
+CONFIG_ISO9660_FS=y
+CONFIG_JOLIET=y
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_XATTR=y
+CONFIG_NFS_FS=m
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_ASCII=m
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_UTF8=m
+CONFIG_UNUSED_SYMBOLS=y
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_SLAB=y
+CONFIG_DEBUG_SLAB_LEAK=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_DEBUG_RT_MUTEXES=y
+CONFIG_RT_MUTEX_TESTER=y
+CONFIG_PROVE_RCU_DELAY=y
+CONFIG_DEBUG_BLOCK_EXT_DEVT=y
+CONFIG_LATENCYTOP=y
+CONFIG_DEBUG_STRICT_USER_COPY_CHECKS=y
+CONFIG_KEYS=y
+# CONFIG_CRYPTO_HW is not set
+CONFIG_FONTS=y
diff --git a/arch/parisc/include/asm/parisc-device.h b/arch/parisc/include/asm/parisc-device.h
index 9afdad6..eaf4dc1 100644
--- a/arch/parisc/include/asm/parisc-device.h
+++ b/arch/parisc/include/asm/parisc-device.h
@@ -23,6 +23,7 @@
 	/* generic info returned from pdc_pat_cell_module() */
 	unsigned long	mod_info;	/* PAT specific - Misc Module info */
 	unsigned long	pmod_loc;	/* physical Module location */
+	unsigned long	mod0;
 #endif
 	u64		dma_mask;	/* DMA mask for I/O */
 	struct device 	dev;
@@ -61,4 +62,6 @@
 
 extern struct bus_type parisc_bus_type;
 
+int iosapic_serial_irq(struct parisc_device *dev);
+
 #endif /*_ASM_PARISC_PARISC_DEVICE_H_*/
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 2e65aa5..c035673 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -71,18 +71,27 @@
 }
 EXPORT_SYMBOL(flush_cache_all_local);
 
+/* Virtual address of pfn.  */
+#define pfn_va(pfn)	__va(PFN_PHYS(pfn))
+
 void
 update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
 {
-	struct page *page = pte_page(*ptep);
+	unsigned long pfn = pte_pfn(*ptep);
+	struct page *page;
 
-	if (pfn_valid(page_to_pfn(page)) && page_mapping(page) &&
-	    test_bit(PG_dcache_dirty, &page->flags)) {
+	/* We don't have pte special.  As a result, we can be called with
+	   an invalid pfn and we don't need to flush the kernel dcache page.
+	   This occurs with FireGL card in C8000.  */
+	if (!pfn_valid(pfn))
+		return;
 
-		flush_kernel_dcache_page(page);
+	page = pfn_to_page(pfn);
+	if (page_mapping(page) && test_bit(PG_dcache_dirty, &page->flags)) {
+		flush_kernel_dcache_page_addr(pfn_va(pfn));
 		clear_bit(PG_dcache_dirty, &page->flags);
 	} else if (parisc_requires_coherency())
-		flush_kernel_dcache_page(page);
+		flush_kernel_dcache_page_addr(pfn_va(pfn));
 }
 
 void
@@ -495,44 +504,42 @@
 
 void flush_cache_mm(struct mm_struct *mm)
 {
+	struct vm_area_struct *vma;
+	pgd_t *pgd;
+
 	/* Flushing the whole cache on each cpu takes forever on
 	   rp3440, etc.  So, avoid it if the mm isn't too big.  */
-	if (mm_total_size(mm) < parisc_cache_flush_threshold) {
-		struct vm_area_struct *vma;
+	if (mm_total_size(mm) >= parisc_cache_flush_threshold) {
+		flush_cache_all();
+		return;
+	}
 
-		if (mm->context == mfsp(3)) {
-			for (vma = mm->mmap; vma; vma = vma->vm_next) {
-				flush_user_dcache_range_asm(vma->vm_start,
-					vma->vm_end);
-				if (vma->vm_flags & VM_EXEC)
-					flush_user_icache_range_asm(
-					  vma->vm_start, vma->vm_end);
-			}
-		} else {
-			pgd_t *pgd = mm->pgd;
-
-			for (vma = mm->mmap; vma; vma = vma->vm_next) {
-				unsigned long addr;
-
-				for (addr = vma->vm_start; addr < vma->vm_end;
-				     addr += PAGE_SIZE) {
-					pte_t *ptep = get_ptep(pgd, addr);
-					if (ptep != NULL) {
-						pte_t pte = *ptep;
-						__flush_cache_page(vma, addr,
-						  page_to_phys(pte_page(pte)));
-					}
-				}
-			}
+	if (mm->context == mfsp(3)) {
+		for (vma = mm->mmap; vma; vma = vma->vm_next) {
+			flush_user_dcache_range_asm(vma->vm_start, vma->vm_end);
+			if ((vma->vm_flags & VM_EXEC) == 0)
+				continue;
+			flush_user_icache_range_asm(vma->vm_start, vma->vm_end);
 		}
 		return;
 	}
 
-#ifdef CONFIG_SMP
-	flush_cache_all();
-#else
-	flush_cache_all_local();
-#endif
+	pgd = mm->pgd;
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		unsigned long addr;
+
+		for (addr = vma->vm_start; addr < vma->vm_end;
+		     addr += PAGE_SIZE) {
+			unsigned long pfn;
+			pte_t *ptep = get_ptep(pgd, addr);
+			if (!ptep)
+				continue;
+			pfn = pte_pfn(*ptep);
+			if (!pfn_valid(pfn))
+				continue;
+			__flush_cache_page(vma, addr, PFN_PHYS(pfn));
+		}
+	}
 }
 
 void
@@ -556,33 +563,32 @@
 void flush_cache_range(struct vm_area_struct *vma,
 		unsigned long start, unsigned long end)
 {
+	unsigned long addr;
+	pgd_t *pgd;
+
 	BUG_ON(!vma->vm_mm->context);
 
-	if ((end - start) < parisc_cache_flush_threshold) {
-		if (vma->vm_mm->context == mfsp(3)) {
-			flush_user_dcache_range_asm(start, end);
-			if (vma->vm_flags & VM_EXEC)
-				flush_user_icache_range_asm(start, end);
-		} else {
-			unsigned long addr;
-			pgd_t *pgd = vma->vm_mm->pgd;
-
-			for (addr = start & PAGE_MASK; addr < end;
-			     addr += PAGE_SIZE) {
-				pte_t *ptep = get_ptep(pgd, addr);
-				if (ptep != NULL) {
-					pte_t pte = *ptep;
-					flush_cache_page(vma,
-					   addr, pte_pfn(pte));
-				}
-			}
-		}
-	} else {
-#ifdef CONFIG_SMP
+	if ((end - start) >= parisc_cache_flush_threshold) {
 		flush_cache_all();
-#else
-		flush_cache_all_local();
-#endif
+		return;
+	}
+
+	if (vma->vm_mm->context == mfsp(3)) {
+		flush_user_dcache_range_asm(start, end);
+		if (vma->vm_flags & VM_EXEC)
+			flush_user_icache_range_asm(start, end);
+		return;
+	}
+
+	pgd = vma->vm_mm->pgd;
+	for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
+		unsigned long pfn;
+		pte_t *ptep = get_ptep(pgd, addr);
+		if (!ptep)
+			continue;
+		pfn = pte_pfn(*ptep);
+		if (pfn_valid(pfn))
+			__flush_cache_page(vma, addr, PFN_PHYS(pfn));
 	}
 }
 
@@ -591,9 +597,10 @@
 {
 	BUG_ON(!vma->vm_mm->context);
 
-	flush_tlb_page(vma, vmaddr);
-	__flush_cache_page(vma, vmaddr, page_to_phys(pfn_to_page(pfn)));
-
+	if (pfn_valid(pfn)) {
+		flush_tlb_page(vma, vmaddr);
+		__flush_cache_page(vma, vmaddr, PFN_PHYS(pfn));
+	}
 }
 
 #ifdef CONFIG_PARISC_TMPALIAS
diff --git a/arch/parisc/kernel/inventory.c b/arch/parisc/kernel/inventory.c
index 3295ef4..f0b6722 100644
--- a/arch/parisc/kernel/inventory.c
+++ b/arch/parisc/kernel/inventory.c
@@ -211,6 +211,7 @@
 	/* REVISIT: who is the consumer of this? not sure yet... */
 	dev->mod_info = pa_pdc_cell->mod_info;	/* pass to PAT_GET_ENTITY() */
 	dev->pmod_loc = pa_pdc_cell->mod_location;
+	dev->mod0 = pa_pdc_cell->mod[0];
 
 	register_parisc_device(dev);	/* advertise device */
 
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index 940188d..07349b0 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -56,13 +56,6 @@
 #define A(__x)	((unsigned long)(__x))
 
 /*
- * Atomically swap in the new signal mask, and wait for a signal.
- */
-#ifdef CONFIG_64BIT
-#include "sys32.h"
-#endif
-
-/*
  * Do a signal return - restore sigcontext.
  */
 
diff --git a/arch/parisc/kernel/signal32.c b/arch/parisc/kernel/signal32.c
index 33eca1b..6c6a271 100644
--- a/arch/parisc/kernel/signal32.c
+++ b/arch/parisc/kernel/signal32.c
@@ -34,7 +34,6 @@
 #include <asm/uaccess.h>
 
 #include "signal32.h"
-#include "sys32.h"
 
 #define DEBUG_COMPAT_SIG 0 
 #define DEBUG_COMPAT_SIG_LEVEL 2
diff --git a/arch/parisc/kernel/sys32.h b/arch/parisc/kernel/sys32.h
deleted file mode 100644
index 60dd470..0000000
--- a/arch/parisc/kernel/sys32.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/* 
- *    Copyright (C) 2002 Richard Hirst <rhirst at parisc-linux.org>
- *    Copyright (C) 2003 James Bottomley <jejb at parisc-linux.org>
- *    Copyright (C) 2003 Randolph Chung <tausq with parisc-linux.org>
- *
- *    This program is free software; you can redistribute it and/or modify
- *    it under the terms of the GNU General Public License as published by
- *    the Free Software Foundation; either version 2 of the License, or
- *    (at your option) any later version.
- *
- *    This program is distributed in the hope that it will be useful,
- *    but WITHOUT ANY WARRANTY; without even the implied warranty of
- *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *    GNU General Public License for more details.
- *
- *    You should have received a copy of the GNU General Public License
- *    along with this program; if not, write to the Free Software
- *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-#ifndef _PARISC64_KERNEL_SYS32_H
-#define _PARISC64_KERNEL_SYS32_H
-
-#include <linux/compat.h>
-
-/* Call a kernel syscall which will use kernel space instead of user
- * space for its copy_to/from_user.
- */
-#define KERNEL_SYSCALL(ret, syscall, args...) \
-{ \
-    mm_segment_t old_fs = get_fs(); \
-    set_fs(KERNEL_DS); \
-    ret = syscall(args); \
-    set_fs (old_fs); \
-}
-
-#endif
diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c
index a134ff4..bb9f3b6 100644
--- a/arch/parisc/kernel/sys_parisc32.c
+++ b/arch/parisc/kernel/sys_parisc32.c
@@ -42,8 +42,6 @@
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
 
-#include "sys32.h"
-
 #undef DEBUG
 
 #ifdef DEBUG
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 3bf72cd..dbd9d3c 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -566,7 +566,7 @@
 config PPC_DENORMALISATION
 	bool "PowerPC denormalisation exception handling"
 	depends on PPC_BOOK3S_64
-	default "n"
+	default "y" if PPC_POWERNV
 	---help---
 	  Add support for handling denormalisation of single precision
 	  values.  Useful for bare metal only.  If unsure say Y here.
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index c86fcb9..0e8cfd0 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -58,7 +58,7 @@
 CONFIG_PPC_DENORMALISATION=y
 CONFIG_PCCARD=y
 CONFIG_ELECTRA_CF=y
-CONFIG_HOTPLUG_PCI=m
+CONFIG_HOTPLUG_PCI=y
 CONFIG_HOTPLUG_PCI_RPA=m
 CONFIG_HOTPLUG_PCI_RPA_DLPAR=m
 CONFIG_PACKET=y
diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig
index 4b20f76..0085dc4 100644
--- a/arch/powerpc/configs/ppc64e_defconfig
+++ b/arch/powerpc/configs/ppc64e_defconfig
@@ -32,7 +32,7 @@
 CONFIG_SPARSEMEM_MANUAL=y
 CONFIG_PCI_MSI=y
 CONFIG_PCCARD=y
-CONFIG_HOTPLUG_PCI=m
+CONFIG_HOTPLUG_PCI=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
 CONFIG_XFRM_USER=m
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index bea8587..1d4b976 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -53,7 +53,7 @@
 CONFIG_PPC_SUBPAGE_PROT=y
 CONFIG_SCHED_SMT=y
 CONFIG_PPC_DENORMALISATION=y
-CONFIG_HOTPLUG_PCI=m
+CONFIG_HOTPLUG_PCI=y
 CONFIG_HOTPLUG_PCI_RPA=m
 CONFIG_HOTPLUG_PCI_RPA_DLPAR=m
 CONFIG_PACKET=y
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 09a8743..d3e5e9b 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -55,6 +55,8 @@
 #define EEH_PE_RECOVERING	(1 << 1)	/* Recovering PE	*/
 #define EEH_PE_PHB_DEAD		(1 << 2)	/* Dead PHB		*/
 
+#define EEH_PE_KEEP		(1 << 8)	/* Keep PE on hotplug	*/
+
 struct eeh_pe {
 	int type;			/* PE type: PHB/Bus/Device	*/
 	int state;			/* PE EEH dependent mode	*/
@@ -72,8 +74,8 @@
 	struct list_head child;		/* Child PEs			*/
 };
 
-#define eeh_pe_for_each_dev(pe, edev) \
-		list_for_each_entry(edev, &pe->edevs, list)
+#define eeh_pe_for_each_dev(pe, edev, tmp) \
+		list_for_each_entry_safe(edev, tmp, &pe->edevs, list)
 
 /*
  * The struct is used to trace EEH state for the associated
@@ -82,7 +84,13 @@
  * another tree except the currently existing tree of PCI
  * buses and PCI devices
  */
-#define EEH_DEV_IRQ_DISABLED	(1<<0)	/* Interrupt disabled		*/
+#define EEH_DEV_BRIDGE		(1 << 0)	/* PCI bridge		*/
+#define EEH_DEV_ROOT_PORT	(1 << 1)	/* PCIe root port	*/
+#define EEH_DEV_DS_PORT		(1 << 2)	/* Downstream port	*/
+#define EEH_DEV_IRQ_DISABLED	(1 << 3)	/* Interrupt disabled	*/
+#define EEH_DEV_DISCONNECTED	(1 << 4)	/* Removing from PE	*/
+
+#define EEH_DEV_SYSFS		(1 << 8)	/* Sysfs created        */
 
 struct eeh_dev {
 	int mode;			/* EEH mode			*/
@@ -90,11 +98,13 @@
 	int config_addr;		/* Config address		*/
 	int pe_config_addr;		/* PE config address		*/
 	u32 config_space[16];		/* Saved PCI config space	*/
+	u8 pcie_cap;			/* Saved PCIe capability	*/
 	struct eeh_pe *pe;		/* Associated PE		*/
 	struct list_head list;		/* Form link list in the PE	*/
 	struct pci_controller *phb;	/* Associated PHB		*/
 	struct device_node *dn;		/* Associated device node	*/
 	struct pci_dev *pdev;		/* Associated PCI device	*/
+	struct pci_bus *bus;		/* PCI bus for partial hotplug	*/
 };
 
 static inline struct device_node *eeh_dev_to_of_node(struct eeh_dev *edev)
@@ -193,8 +203,10 @@
 struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb);
 struct eeh_pe *eeh_pe_get(struct eeh_dev *edev);
 int eeh_add_to_parent_pe(struct eeh_dev *edev);
-int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe);
+int eeh_rmv_from_parent_pe(struct eeh_dev *edev);
 void eeh_pe_update_time_stamp(struct eeh_pe *pe);
+void *eeh_pe_traverse(struct eeh_pe *root,
+		eeh_traverse_func fn, void *flag);
 void *eeh_pe_dev_traverse(struct eeh_pe *root,
 		eeh_traverse_func fn, void *flag);
 void eeh_pe_restore_bars(struct eeh_pe *pe);
@@ -209,10 +221,12 @@
 				unsigned long val);
 int eeh_dev_check_failure(struct eeh_dev *edev);
 void eeh_addr_cache_build(void);
+void eeh_add_device_early(struct device_node *);
 void eeh_add_device_tree_early(struct device_node *);
+void eeh_add_device_late(struct pci_dev *);
 void eeh_add_device_tree_late(struct pci_bus *);
 void eeh_add_sysfs_files(struct pci_bus *);
-void eeh_remove_bus_device(struct pci_dev *, int);
+void eeh_remove_device(struct pci_dev *);
 
 /**
  * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
@@ -252,13 +266,17 @@
 
 static inline void eeh_addr_cache_build(void) { }
 
+static inline void eeh_add_device_early(struct device_node *dn) { }
+
 static inline void eeh_add_device_tree_early(struct device_node *dn) { }
 
+static inline void eeh_add_device_late(struct pci_dev *dev) { }
+
 static inline void eeh_add_device_tree_late(struct pci_bus *bus) { }
 
 static inline void eeh_add_sysfs_files(struct pci_bus *bus) { }
 
-static inline void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe) { }
+static inline void eeh_remove_device(struct pci_dev *dev) { }
 
 #define EEH_POSSIBLE_ERROR(val, type) (0)
 #define EEH_IO_ERROR_VALUE(size) (-1UL)
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index ba713f1..10be1dd 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -96,10 +96,11 @@
 #endif
 
 #define hard_irq_disable()	do {			\
-	u8 _was_enabled = get_paca()->soft_enabled;	\
+	u8 _was_enabled;				\
 	__hard_irq_disable();				\
-	get_paca()->soft_enabled = 0;			\
-	get_paca()->irq_happened |= PACA_IRQ_HARD_DIS;	\
+	_was_enabled = local_paca->soft_enabled;	\
+	local_paca->soft_enabled = 0;			\
+	local_paca->irq_happened |= PACA_IRQ_HARD_DIS;	\
 	if (_was_enabled)				\
 		trace_hardirqs_off();			\
 } while(0)
diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index c1df590..49fa55b 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -82,10 +82,9 @@
 void sort_ex_table(struct exception_table_entry *start,
 		   struct exception_table_entry *finish);
 
-#ifdef CONFIG_MODVERSIONS
+#if defined(CONFIG_MODVERSIONS) && defined(CONFIG_PPC64)
 #define ARCH_RELOCATES_KCRCTAB
-
-extern const unsigned long reloc_start[];
+#define reloc_start PHYSICAL_START
 #endif
 #endif /* __KERNEL__ */
 #endif	/* _ASM_POWERPC_MODULE_H */
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 2c1d8cb..32d0d20 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -209,7 +209,6 @@
 extern struct pci_bus *pcibios_find_pci_bus(struct device_node *dn);
 
 /** Remove all of the PCI devices under this bus */
-extern void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe);
 extern void pcibios_remove_pci_devices(struct pci_bus *bus);
 
 /** Discover new pci devices under this bus, and add them */
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 2dd7bfc..8b24926 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -12,6 +12,7 @@
 #include <linux/types.h>
 #include <asm/hw_irq.h>
 #include <linux/device.h>
+#include <uapi/asm/perf_event.h>
 
 #define MAX_HWEVENTS		8
 #define MAX_EVENT_ALTERNATIVES	8
@@ -69,11 +70,6 @@
 #define PPMU_LIMITED_PMC_REQD	2	/* have to put this on a limited PMC */
 #define PPMU_ONLY_COUNT_RUN	4	/* only counting in run state */
 
-/*
- * We use the event config bit 63 as a flag to request EBB.
- */
-#define EVENT_CONFIG_EBB_SHIFT	63
-
 extern int register_power_pmu(struct power_pmu *);
 
 struct pt_regs;
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 47a35b0..e378ccc 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -247,6 +247,10 @@
 	unsigned long	tm_orig_msr;	/* Thread's MSR on ctx switch */
 	struct pt_regs	ckpt_regs;	/* Checkpointed registers */
 
+	unsigned long	tm_tar;
+	unsigned long	tm_ppr;
+	unsigned long	tm_dscr;
+
 	/*
 	 * Transactional FP and VSX 0-31 register set.
 	 * NOTE: the sense of these is the opposite of the integer ckpt_regs!
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 5d7d9c2..99222e2 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -254,19 +254,28 @@
 #define SPRN_HRMOR	0x139	/* Real mode offset register */
 #define SPRN_HSRR0	0x13A	/* Hypervisor Save/Restore 0 */
 #define SPRN_HSRR1	0x13B	/* Hypervisor Save/Restore 1 */
+/* HFSCR and FSCR bit numbers are the same */
+#define FSCR_TAR_LG	8	/* Enable Target Address Register */
+#define FSCR_EBB_LG	7	/* Enable Event Based Branching */
+#define FSCR_TM_LG	5	/* Enable Transactional Memory */
+#define FSCR_PM_LG	4	/* Enable prob/priv access to PMU SPRs */
+#define FSCR_BHRB_LG	3	/* Enable Branch History Rolling Buffer*/
+#define FSCR_DSCR_LG	2	/* Enable Data Stream Control Register */
+#define FSCR_VECVSX_LG	1	/* Enable VMX/VSX  */
+#define FSCR_FP_LG	0	/* Enable Floating Point */
 #define SPRN_FSCR	0x099	/* Facility Status & Control Register */
-#define   FSCR_TAR	(1 << (63-55)) /* Enable Target Address Register */
-#define   FSCR_EBB	(1 << (63-56)) /* Enable Event Based Branching */
-#define   FSCR_DSCR	(1 << (63-61)) /* Enable Data Stream Control Register */
+#define   FSCR_TAR	__MASK(FSCR_TAR_LG)
+#define   FSCR_EBB	__MASK(FSCR_EBB_LG)
+#define   FSCR_DSCR	__MASK(FSCR_DSCR_LG)
 #define SPRN_HFSCR	0xbe	/* HV=1 Facility Status & Control Register */
-#define   HFSCR_TAR	(1 << (63-55)) /* Enable Target Address Register */
-#define   HFSCR_EBB	(1 << (63-56)) /* Enable Event Based Branching */
-#define   HFSCR_TM	(1 << (63-58)) /* Enable Transactional Memory */
-#define   HFSCR_PM	(1 << (63-60)) /* Enable prob/priv access to PMU SPRs */
-#define   HFSCR_BHRB	(1 << (63-59)) /* Enable Branch History Rolling Buffer*/
-#define   HFSCR_DSCR	(1 << (63-61)) /* Enable Data Stream Control Register */
-#define   HFSCR_VECVSX	(1 << (63-62)) /* Enable VMX/VSX  */
-#define   HFSCR_FP	(1 << (63-63)) /* Enable Floating Point */
+#define   HFSCR_TAR	__MASK(FSCR_TAR_LG)
+#define   HFSCR_EBB	__MASK(FSCR_EBB_LG)
+#define   HFSCR_TM	__MASK(FSCR_TM_LG)
+#define   HFSCR_PM	__MASK(FSCR_PM_LG)
+#define   HFSCR_BHRB	__MASK(FSCR_BHRB_LG)
+#define   HFSCR_DSCR	__MASK(FSCR_DSCR_LG)
+#define   HFSCR_VECVSX	__MASK(FSCR_VECVSX_LG)
+#define   HFSCR_FP	__MASK(FSCR_FP_LG)
 #define SPRN_TAR	0x32f	/* Target Address Register */
 #define SPRN_LPCR	0x13E	/* LPAR Control Register */
 #define   LPCR_VPM0	(1ul << (63-0))
@@ -1088,7 +1097,8 @@
 #define PVR_970MP	0x0044
 #define PVR_970GX	0x0045
 #define PVR_POWER7p	0x004A
-#define PVR_POWER8	0x004B
+#define PVR_POWER8E	0x004B
+#define PVR_POWER8	0x004D
 #define PVR_BE		0x0070
 #define PVR_PA6T	0x0090
 
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index ffbaabe..48cfc85 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -145,6 +145,10 @@
 #define smp_setup_cpu_maps()
 static inline void inhibit_secondary_onlining(void) {}
 static inline void uninhibit_secondary_onlining(void) {}
+static inline const struct cpumask *cpu_sibling_mask(int cpu)
+{
+	return cpumask_of(cpu);
+}
 
 #endif /* CONFIG_SMP */
 
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index 49a13e0..294c2ce 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -15,6 +15,15 @@
 struct thread_struct;
 extern struct task_struct *_switch(struct thread_struct *prev,
 				   struct thread_struct *next);
+#ifdef CONFIG_PPC_BOOK3S_64
+static inline void save_tar(struct thread_struct *prev)
+{
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		prev->tar = mfspr(SPRN_TAR);
+}
+#else
+static inline void save_tar(struct thread_struct *prev) {}
+#endif
 
 extern void giveup_fpu(struct task_struct *);
 extern void load_up_fpu(void);
diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild
index 5182c86..48be855 100644
--- a/arch/powerpc/include/uapi/asm/Kbuild
+++ b/arch/powerpc/include/uapi/asm/Kbuild
@@ -20,6 +20,7 @@
 header-y += msgbuf.h
 header-y += nvram.h
 header-y += param.h
+header-y += perf_event.h
 header-y += poll.h
 header-y += posix_types.h
 header-y += ps3fb.h
diff --git a/arch/powerpc/include/uapi/asm/perf_event.h b/arch/powerpc/include/uapi/asm/perf_event.h
new file mode 100644
index 0000000..80a4d40
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/perf_event.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright 2013 Michael Ellerman, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2 of the
+ * License.
+ */
+
+#ifndef _UAPI_ASM_POWERPC_PERF_EVENT_H
+#define _UAPI_ASM_POWERPC_PERF_EVENT_H
+
+/*
+ * We use bit 63 of perf_event_attr.config as a flag to request EBB.
+ */
+#define PERF_EVENT_CONFIG_EBB_SHIFT	63
+
+#endif /* _UAPI_ASM_POWERPC_PERF_EVENT_H */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index c7e8afc..8207459 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -138,6 +138,9 @@
 	DEFINE(THREAD_TM_TFHAR, offsetof(struct thread_struct, tm_tfhar));
 	DEFINE(THREAD_TM_TEXASR, offsetof(struct thread_struct, tm_texasr));
 	DEFINE(THREAD_TM_TFIAR, offsetof(struct thread_struct, tm_tfiar));
+	DEFINE(THREAD_TM_TAR, offsetof(struct thread_struct, tm_tar));
+	DEFINE(THREAD_TM_PPR, offsetof(struct thread_struct, tm_ppr));
+	DEFINE(THREAD_TM_DSCR, offsetof(struct thread_struct, tm_dscr));
 	DEFINE(PT_CKPT_REGS, offsetof(struct thread_struct, ckpt_regs));
 	DEFINE(THREAD_TRANSACT_VR0, offsetof(struct thread_struct,
 					 transact_vr[0]));
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 2a45d0f..22973a7 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -494,9 +494,27 @@
 		.cpu_restore		= __restore_cpu_power7,
 		.platform		= "power7+",
 	},
-	{	/* Power8 */
+	{	/* Power8E */
 		.pvr_mask		= 0xffff0000,
 		.pvr_value		= 0x004b0000,
+		.cpu_name		= "POWER8E (raw)",
+		.cpu_features		= CPU_FTRS_POWER8,
+		.cpu_user_features	= COMMON_USER_POWER8,
+		.cpu_user_features2	= COMMON_USER2_POWER8,
+		.mmu_features		= MMU_FTRS_POWER8,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 6,
+		.pmc_type		= PPC_PMC_IBM,
+		.oprofile_cpu_type	= "ppc64/power8",
+		.oprofile_type		= PPC_OPROFILE_INVALID,
+		.cpu_setup		= __setup_cpu_power8,
+		.cpu_restore		= __restore_cpu_power8,
+		.platform		= "power8",
+	},
+	{	/* Power8 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x004d0000,
 		.cpu_name		= "POWER8 (raw)",
 		.cpu_features		= CPU_FTRS_POWER8,
 		.cpu_user_features	= COMMON_USER_POWER8,
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 39954fe..55593ee 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -231,7 +231,7 @@
 void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
 {
 	size_t loglen = 0;
-	struct eeh_dev *edev;
+	struct eeh_dev *edev, *tmp;
 	bool valid_cfg_log = true;
 
 	/*
@@ -251,7 +251,7 @@
 		eeh_pe_restore_bars(pe);
 
 		pci_regs_buf[0] = 0;
-		eeh_pe_for_each_dev(pe, edev) {
+		eeh_pe_for_each_dev(pe, edev, tmp) {
 			loglen += eeh_gather_pci_data(edev, pci_regs_buf + loglen,
 						      EEH_PCI_REGS_LOG_LEN - loglen);
 		}
@@ -499,8 +499,6 @@
 	}
 
 	eeh_dev_check_failure(edev);
-
-	pci_dev_put(eeh_dev_to_pci_dev(edev));
 	return val;
 }
 
@@ -838,7 +836,7 @@
  * on the CEC architecture, type of the device, on earlier boot
  * command-line arguments & etc.
  */
-static void eeh_add_device_early(struct device_node *dn)
+void eeh_add_device_early(struct device_node *dn)
 {
 	struct pci_controller *phb;
 
@@ -886,7 +884,7 @@
  * This routine must be used to complete EEH initialization for PCI
  * devices that were added after system boot (e.g. hotplug, dlpar).
  */
-static void eeh_add_device_late(struct pci_dev *dev)
+void eeh_add_device_late(struct pci_dev *dev)
 {
 	struct device_node *dn;
 	struct eeh_dev *edev;
@@ -902,9 +900,23 @@
 		pr_debug("EEH: Already referenced !\n");
 		return;
 	}
-	WARN_ON(edev->pdev);
 
-	pci_dev_get(dev);
+	/*
+	 * The EEH cache might not be removed correctly because of
+	 * unbalanced kref to the device during unplug time, which
+	 * relies on pcibios_release_device(). So we have to remove
+	 * that here explicitly.
+	 */
+	if (edev->pdev) {
+		eeh_rmv_from_parent_pe(edev);
+		eeh_addr_cache_rmv_dev(edev->pdev);
+		eeh_sysfs_remove_device(edev->pdev);
+		edev->mode &= ~EEH_DEV_SYSFS;
+
+		edev->pdev = NULL;
+		dev->dev.archdata.edev = NULL;
+	}
+
 	edev->pdev = dev;
 	dev->dev.archdata.edev = edev;
 
@@ -967,7 +979,6 @@
 /**
  * eeh_remove_device - Undo EEH setup for the indicated pci device
  * @dev: pci device to be removed
- * @purge_pe: remove the PE or not
  *
  * This routine should be called when a device is removed from
  * a running system (e.g. by hotplug or dlpar).  It unregisters
@@ -975,7 +986,7 @@
  * this device will no longer be detected after this call; thus,
  * i/o errors affecting this slot may leave this device unusable.
  */
-static void eeh_remove_device(struct pci_dev *dev, int purge_pe)
+void eeh_remove_device(struct pci_dev *dev)
 {
 	struct eeh_dev *edev;
 
@@ -986,42 +997,29 @@
 	/* Unregister the device with the EEH/PCI address search system */
 	pr_debug("EEH: Removing device %s\n", pci_name(dev));
 
-	if (!edev || !edev->pdev) {
+	if (!edev || !edev->pdev || !edev->pe) {
 		pr_debug("EEH: Not referenced !\n");
 		return;
 	}
+
+	/*
+	 * During the hotplug for EEH error recovery, we need the EEH
+	 * device attached to the parent PE in order for BAR restore
+	 * a bit later. So we keep it for BAR restore and remove it
+	 * from the parent PE during the BAR resotre.
+	 */
 	edev->pdev = NULL;
 	dev->dev.archdata.edev = NULL;
-	pci_dev_put(dev);
+	if (!(edev->pe->state & EEH_PE_KEEP))
+		eeh_rmv_from_parent_pe(edev);
+	else
+		edev->mode |= EEH_DEV_DISCONNECTED;
 
-	eeh_rmv_from_parent_pe(edev, purge_pe);
 	eeh_addr_cache_rmv_dev(dev);
 	eeh_sysfs_remove_device(dev);
+	edev->mode &= ~EEH_DEV_SYSFS;
 }
 
-/**
- * eeh_remove_bus_device - Undo EEH setup for the indicated PCI device
- * @dev: PCI device
- * @purge_pe: remove the corresponding PE or not
- *
- * This routine must be called when a device is removed from the
- * running system through hotplug or dlpar. The corresponding
- * PCI address cache will be removed.
- */
-void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe)
-{
-	struct pci_bus *bus = dev->subordinate;
-	struct pci_dev *child, *tmp;
-
-	eeh_remove_device(dev, purge_pe);
-
-	if (bus && dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
-		list_for_each_entry_safe(child, tmp, &bus->devices, bus_list)
-			 eeh_remove_bus_device(child, purge_pe);
-	}
-}
-EXPORT_SYMBOL_GPL(eeh_remove_bus_device);
-
 static int proc_eeh_show(struct seq_file *m, void *v)
 {
 	if (0 == eeh_subsystem_enabled) {
@@ -1063,7 +1061,7 @@
 
 static int __init eeh_init_proc(void)
 {
-	if (machine_is(pseries))
+	if (machine_is(pseries) || machine_is(powernv))
 		proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations);
 	return 0;
 }
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index f9ac123..e8c9fd5 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -68,16 +68,12 @@
 		struct pci_io_addr_range *piar;
 		piar = rb_entry(n, struct pci_io_addr_range, rb_node);
 
-		if (addr < piar->addr_lo) {
+		if (addr < piar->addr_lo)
 			n = n->rb_left;
-		} else {
-			if (addr > piar->addr_hi) {
-				n = n->rb_right;
-			} else {
-				pci_dev_get(piar->pcidev);
-				return piar->edev;
-			}
-		}
+		else if (addr > piar->addr_hi)
+			n = n->rb_right;
+		else
+			return piar->edev;
 	}
 
 	return NULL;
@@ -156,7 +152,6 @@
 	if (!piar)
 		return NULL;
 
-	pci_dev_get(dev);
 	piar->addr_lo = alo;
 	piar->addr_hi = ahi;
 	piar->edev = pci_dev_to_eeh_dev(dev);
@@ -250,7 +245,6 @@
 
 		if (piar->pcidev == dev) {
 			rb_erase(n, &pci_io_addr_cache_root.rb_root);
-			pci_dev_put(piar->pcidev);
 			kfree(piar);
 			goto restart;
 		}
@@ -302,12 +296,10 @@
 		if (!edev)
 			continue;
 
-		pci_dev_get(dev);  /* matching put is in eeh_remove_device() */
 		dev->dev.archdata.edev = edev;
 		edev->pdev = dev;
 
 		eeh_addr_cache_insert_dev(dev);
-
 		eeh_sysfs_add_device(dev);
 	}
 
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 2b1ce17..36bed5a 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -143,10 +143,14 @@
 static void eeh_enable_irq(struct pci_dev *dev)
 {
 	struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
+	struct irq_desc *desc;
 
 	if ((edev->mode) & EEH_DEV_IRQ_DISABLED) {
 		edev->mode &= ~EEH_DEV_IRQ_DISABLED;
-		enable_irq(dev->irq);
+
+		desc = irq_to_desc(dev->irq);
+		if (desc && desc->depth > 0)
+			enable_irq(dev->irq);
 	}
 }
 
@@ -338,6 +342,54 @@
 	return NULL;
 }
 
+static void *eeh_rmv_device(void *data, void *userdata)
+{
+	struct pci_driver *driver;
+	struct eeh_dev *edev = (struct eeh_dev *)data;
+	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+	int *removed = (int *)userdata;
+
+	/*
+	 * Actually, we should remove the PCI bridges as well.
+	 * However, that's lots of complexity to do that,
+	 * particularly some of devices under the bridge might
+	 * support EEH. So we just care about PCI devices for
+	 * simplicity here.
+	 */
+	if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE))
+		return NULL;
+	driver = eeh_pcid_get(dev);
+	if (driver && driver->err_handler)
+		return NULL;
+
+	/* Remove it from PCI subsystem */
+	pr_debug("EEH: Removing %s without EEH sensitive driver\n",
+		 pci_name(dev));
+	edev->bus = dev->bus;
+	edev->mode |= EEH_DEV_DISCONNECTED;
+	(*removed)++;
+
+	pci_stop_and_remove_bus_device(dev);
+
+	return NULL;
+}
+
+static void *eeh_pe_detach_dev(void *data, void *userdata)
+{
+	struct eeh_pe *pe = (struct eeh_pe *)data;
+	struct eeh_dev *edev, *tmp;
+
+	eeh_pe_for_each_dev(pe, edev, tmp) {
+		if (!(edev->mode & EEH_DEV_DISCONNECTED))
+			continue;
+
+		edev->mode &= ~(EEH_DEV_DISCONNECTED | EEH_DEV_IRQ_DISABLED);
+		eeh_rmv_from_parent_pe(edev);
+	}
+
+	return NULL;
+}
+
 /**
  * eeh_reset_device - Perform actual reset of a pci slot
  * @pe: EEH PE
@@ -349,8 +401,9 @@
  */
 static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
 {
+	struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
 	struct timeval tstamp;
-	int cnt, rc;
+	int cnt, rc, removed = 0;
 
 	/* pcibios will clear the counter; save the value */
 	cnt = pe->freeze_count;
@@ -362,8 +415,11 @@
 	 * devices are expected to be attached soon when calling
 	 * into pcibios_add_pci_devices().
 	 */
+	eeh_pe_state_mark(pe, EEH_PE_KEEP);
 	if (bus)
-		__pcibios_remove_pci_devices(bus, 0);
+		pcibios_remove_pci_devices(bus);
+	else if (frozen_bus)
+		eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed);
 
 	/* Reset the pci controller. (Asserts RST#; resets config space).
 	 * Reconfigure bridges and devices. Don't try to bring the system
@@ -384,9 +440,24 @@
 	 * potentially weird things happen.
 	 */
 	if (bus) {
+		pr_info("EEH: Sleep 5s ahead of complete hotplug\n");
 		ssleep(5);
+
+		/*
+		 * The EEH device is still connected with its parent
+		 * PE. We should disconnect it so the binding can be
+		 * rebuilt when adding PCI devices.
+		 */
+		eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
 		pcibios_add_pci_devices(bus);
+	} else if (frozen_bus && removed) {
+		pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
+		ssleep(5);
+
+		eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
+		pcibios_add_pci_devices(frozen_bus);
 	}
+	eeh_pe_state_clear(pe, EEH_PE_KEEP);
 
 	pe->tstamp = tstamp;
 	pe->freeze_count = cnt;
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 016588a..f945053 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -149,8 +149,8 @@
  * callback returns something other than NULL, or no more PEs
  * to be traversed.
  */
-static void *eeh_pe_traverse(struct eeh_pe *root,
-			eeh_traverse_func fn, void *flag)
+void *eeh_pe_traverse(struct eeh_pe *root,
+		      eeh_traverse_func fn, void *flag)
 {
 	struct eeh_pe *pe;
 	void *ret;
@@ -176,7 +176,7 @@
 		eeh_traverse_func fn, void *flag)
 {
 	struct eeh_pe *pe;
-	struct eeh_dev *edev;
+	struct eeh_dev *edev, *tmp;
 	void *ret;
 
 	if (!root) {
@@ -186,7 +186,7 @@
 
 	/* Traverse root PE */
 	for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
-		eeh_pe_for_each_dev(pe, edev) {
+		eeh_pe_for_each_dev(pe, edev, tmp) {
 			ret = fn(edev, flag);
 			if (ret)
 				return ret;
@@ -333,7 +333,7 @@
 		while (parent) {
 			if (!(parent->type & EEH_PE_INVALID))
 				break;
-			parent->type &= ~EEH_PE_INVALID;
+			parent->type &= ~(EEH_PE_INVALID | EEH_PE_KEEP);
 			parent = parent->parent;
 		}
 		pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
@@ -397,21 +397,20 @@
 /**
  * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE
  * @edev: EEH device
- * @purge_pe: remove PE or not
  *
  * The PE hierarchy tree might be changed when doing PCI hotplug.
  * Also, the PCI devices or buses could be removed from the system
  * during EEH recovery. So we have to call the function remove the
  * corresponding PE accordingly if necessary.
  */
-int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe)
+int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
 {
 	struct eeh_pe *pe, *parent, *child;
 	int cnt;
 
 	if (!edev->pe) {
-		pr_warning("%s: No PE found for EEH device %s\n",
-			__func__, edev->dn->full_name);
+		pr_debug("%s: No PE found for EEH device %s\n",
+			 __func__, edev->dn->full_name);
 		return -EEXIST;
 	}
 
@@ -431,7 +430,7 @@
 		if (pe->type & EEH_PE_PHB)
 			break;
 
-		if (purge_pe) {
+		if (!(pe->state & EEH_PE_KEEP)) {
 			if (list_empty(&pe->edevs) &&
 			    list_empty(&pe->child_list)) {
 				list_del(&pe->child);
@@ -502,7 +501,7 @@
 {
 	struct eeh_pe *pe = (struct eeh_pe *)data;
 	int state = *((int *)flag);
-	struct eeh_dev *tmp;
+	struct eeh_dev *edev, *tmp;
 	struct pci_dev *pdev;
 
 	/*
@@ -512,8 +511,8 @@
 	 * the PCI device driver.
 	 */
 	pe->state |= state;
-	eeh_pe_for_each_dev(pe, tmp) {
-		pdev = eeh_dev_to_pci_dev(tmp);
+	eeh_pe_for_each_dev(pe, edev, tmp) {
+		pdev = eeh_dev_to_pci_dev(edev);
 		if (pdev)
 			pdev->error_state = pci_channel_io_frozen;
 	}
@@ -579,7 +578,7 @@
  * blocked on normal path during the stage. So we need utilize
  * eeh operations, which is always permitted.
  */
-static void eeh_bridge_check_link(struct pci_dev *pdev,
+static void eeh_bridge_check_link(struct eeh_dev *edev,
 				  struct device_node *dn)
 {
 	int cap;
@@ -590,16 +589,17 @@
 	 * We only check root port and downstream ports of
 	 * PCIe switches
 	 */
-	if (!pci_is_pcie(pdev) ||
-	    (pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT &&
-	     pci_pcie_type(pdev) != PCI_EXP_TYPE_DOWNSTREAM))
+	if (!(edev->mode & (EEH_DEV_ROOT_PORT | EEH_DEV_DS_PORT)))
 		return;
 
-	pr_debug("%s: Check PCIe link for %s ...\n",
-		 __func__, pci_name(pdev));
+	pr_debug("%s: Check PCIe link for %04x:%02x:%02x.%01x ...\n",
+		 __func__, edev->phb->global_number,
+		 edev->config_addr >> 8,
+		 PCI_SLOT(edev->config_addr & 0xFF),
+		 PCI_FUNC(edev->config_addr & 0xFF));
 
 	/* Check slot status */
-	cap = pdev->pcie_cap;
+	cap = edev->pcie_cap;
 	eeh_ops->read_config(dn, cap + PCI_EXP_SLTSTA, 2, &val);
 	if (!(val & PCI_EXP_SLTSTA_PDS)) {
 		pr_debug("  No card in the slot (0x%04x) !\n", val);
@@ -653,8 +653,7 @@
 #define BYTE_SWAP(OFF)	(8*((OFF)/4)+3-(OFF))
 #define SAVED_BYTE(OFF)	(((u8 *)(edev->config_space))[BYTE_SWAP(OFF)])
 
-static void eeh_restore_bridge_bars(struct pci_dev *pdev,
-				    struct eeh_dev *edev,
+static void eeh_restore_bridge_bars(struct eeh_dev *edev,
 				    struct device_node *dn)
 {
 	int i;
@@ -680,7 +679,7 @@
 	eeh_ops->write_config(dn, PCI_COMMAND, 4, edev->config_space[1]);
 
 	/* Check the PCIe link is ready */
-	eeh_bridge_check_link(pdev, dn);
+	eeh_bridge_check_link(edev, dn);
 }
 
 static void eeh_restore_device_bars(struct eeh_dev *edev,
@@ -729,19 +728,12 @@
  */
 static void *eeh_restore_one_device_bars(void *data, void *flag)
 {
-	struct pci_dev *pdev = NULL;
 	struct eeh_dev *edev = (struct eeh_dev *)data;
 	struct device_node *dn = eeh_dev_to_of_node(edev);
 
-	/* Trace the PCI bridge */
-	if (eeh_probe_mode_dev()) {
-		pdev = eeh_dev_to_pci_dev(edev);
-		if (pdev->hdr_type != PCI_HEADER_TYPE_BRIDGE)
-                        pdev = NULL;
-        }
-
-	if (pdev)
-		eeh_restore_bridge_bars(pdev, edev, dn);
+	/* Do special restore for bridges */
+	if (edev->mode & EEH_DEV_BRIDGE)
+		eeh_restore_bridge_bars(edev, dn);
 	else
 		eeh_restore_device_bars(edev, dn);
 
diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
index e7ae348..5d753d4 100644
--- a/arch/powerpc/kernel/eeh_sysfs.c
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -56,19 +56,40 @@
 
 void eeh_sysfs_add_device(struct pci_dev *pdev)
 {
+	struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
 	int rc=0;
 
+	if (edev && (edev->mode & EEH_DEV_SYSFS))
+		return;
+
 	rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode);
 	rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr);
 	rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
 
 	if (rc)
 		printk(KERN_WARNING "EEH: Unable to create sysfs entries\n");
+	else if (edev)
+		edev->mode |= EEH_DEV_SYSFS;
 }
 
 void eeh_sysfs_remove_device(struct pci_dev *pdev)
 {
+	struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+
+	/*
+	 * The parent directory might have been removed. We needn't
+	 * continue for that case.
+	 */
+	if (!pdev->dev.kobj.sd) {
+		if (edev)
+			edev->mode &= ~EEH_DEV_SYSFS;
+		return;
+	}
+
 	device_remove_file(&pdev->dev, &dev_attr_eeh_mode);
 	device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr);
 	device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
+
+	if (edev)
+		edev->mode &= ~EEH_DEV_SYSFS;
 }
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index ab15b8d..2bd0b88 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -449,15 +449,6 @@
 
 #ifdef CONFIG_PPC_BOOK3S_64
 BEGIN_FTR_SECTION
-	/*
-	 * Back up the TAR across context switches.  Note that the TAR is not
-	 * available for use in the kernel.  (To provide this, the TAR should
-	 * be backed up/restored on exception entry/exit instead, and be in
-	 * pt_regs.  FIXME, this should be in pt_regs anyway (for debug).)
-	 */
-	mfspr	r0,SPRN_TAR
-	std	r0,THREAD_TAR(r3)
-
 	/* Event based branch registers */
 	mfspr	r0, SPRN_BESCR
 	std	r0, THREAD_BESCR(r3)
@@ -584,9 +575,34 @@
 	ld	r7,DSCR_DEFAULT@toc(2)
 	ld	r0,THREAD_DSCR(r4)
 	cmpwi	r6,0
+	li	r8, FSCR_DSCR
 	bne	1f
 	ld	r0,0(r7)
-1:	cmpd	r0,r25
+	b	3f
+1:
+  BEGIN_FTR_SECTION_NESTED(70)
+	mfspr	r6, SPRN_FSCR
+	or	r6, r6, r8
+	mtspr	SPRN_FSCR, r6
+    BEGIN_FTR_SECTION_NESTED(69)
+	mfspr	r6, SPRN_HFSCR
+	or	r6, r6, r8
+	mtspr	SPRN_HFSCR, r6
+    END_FTR_SECTION_NESTED(CPU_FTR_HVMODE, CPU_FTR_HVMODE, 69)
+	b	4f
+  END_FTR_SECTION_NESTED(CPU_FTR_ARCH_207S, CPU_FTR_ARCH_207S, 70)
+3:
+  BEGIN_FTR_SECTION_NESTED(70)
+	mfspr	r6, SPRN_FSCR
+	andc	r6, r6, r8
+	mtspr	SPRN_FSCR, r6
+    BEGIN_FTR_SECTION_NESTED(69)
+	mfspr	r6, SPRN_HFSCR
+	andc	r6, r6, r8
+	mtspr	SPRN_HFSCR, r6
+    END_FTR_SECTION_NESTED(CPU_FTR_HVMODE, CPU_FTR_HVMODE, 69)
+  END_FTR_SECTION_NESTED(CPU_FTR_ARCH_207S, CPU_FTR_ARCH_207S, 70)
+4:	cmpd	r0,r25
 	beq	2f
 	mtspr	SPRN_DSCR,r0
 2:
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 4e00d22..902ca3c 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -848,7 +848,7 @@
 	. = 0x4f80
 	SET_SCRATCH0(r13)
 	EXCEPTION_PROLOG_0(PACA_EXGEN)
-	b	facility_unavailable_relon_hv
+	b	hv_facility_unavailable_relon_hv
 
 	STD_RELON_EXCEPTION_PSERIES(0x5300, 0x1300, instruction_breakpoint)
 #ifdef CONFIG_PPC_DENORMALISATION
@@ -1175,6 +1175,7 @@
 	b	.ret_from_except
 
 	STD_EXCEPTION_COMMON(0xf60, facility_unavailable, .facility_unavailable_exception)
+	STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, .facility_unavailable_exception)
 
 	.align	7
 	.globl	__end_handlers
@@ -1188,7 +1189,7 @@
 	STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable)
 	STD_RELON_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable)
 	STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable)
-	STD_RELON_EXCEPTION_HV_OOL(0xf80, facility_unavailable)
+	STD_RELON_EXCEPTION_HV_OOL(0xf80, hv_facility_unavailable)
 
 #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
 /*
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 2e51cde..c69440c 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -362,7 +362,7 @@
 		seq_printf(p, "%10u ", per_cpu(irq_stat, j).spurious_irqs);
 	seq_printf(p, "  Spurious interrupts\n");
 
-	seq_printf(p, "%*s: ", prec, "CNT");
+	seq_printf(p, "%*s: ", prec, "PMI");
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", per_cpu(irq_stat, j).pmu_irqs);
 	seq_printf(p, "  Performance monitoring interrupts\n");
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index f46914a..7d22a67 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1462,6 +1462,8 @@
 	/* Allocate bus and devices resources */
 	pcibios_allocate_bus_resources(bus);
 	pcibios_claim_one_bus(bus);
+	if (!pci_has_flag(PCI_PROBE_ONLY))
+		pci_assign_unassigned_bus_resources(bus);
 
 	/* Fixup EEH */
 	eeh_add_device_tree_late(bus);
diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
index 3f60880..c1e17ae 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -22,32 +22,14 @@
 #include <asm/eeh.h>
 
 /**
- * __pcibios_remove_pci_devices - remove all devices under this bus
- * @bus: the indicated PCI bus
- * @purge_pe: destroy the PE on removal of PCI devices
+ * pcibios_release_device - release PCI device
+ * @dev: PCI device
  *
- * Remove all of the PCI devices under this bus both from the
- * linux pci device tree, and from the powerpc EEH address cache.
- * By default, the corresponding PE will be destroied during the
- * normal PCI hotplug path. For PCI hotplug during EEH recovery,
- * the corresponding PE won't be destroied and deallocated.
+ * The function is called before releasing the indicated PCI device.
  */
-void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe)
+void pcibios_release_device(struct pci_dev *dev)
 {
-	struct pci_dev *dev, *tmp;
-	struct pci_bus *child_bus;
-
-	/* First go down child busses */
-	list_for_each_entry(child_bus, &bus->children, node)
-		__pcibios_remove_pci_devices(child_bus, purge_pe);
-
-	pr_debug("PCI: Removing devices on bus %04x:%02x\n",
-		 pci_domain_nr(bus),  bus->number);
-	list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) {
-		pr_debug("     * Removing %s...\n", pci_name(dev));
-		eeh_remove_bus_device(dev, purge_pe);
-		pci_stop_and_remove_bus_device(dev);
-	}
+	eeh_remove_device(dev);
 }
 
 /**
@@ -59,8 +41,21 @@
  */
 void pcibios_remove_pci_devices(struct pci_bus *bus)
 {
-	__pcibios_remove_pci_devices(bus, 1);
+	struct pci_dev *dev, *tmp;
+	struct pci_bus *child_bus;
+
+	/* First go down child busses */
+	list_for_each_entry(child_bus, &bus->children, node)
+		pcibios_remove_pci_devices(child_bus);
+
+	pr_debug("PCI: Removing devices on bus %04x:%02x\n",
+		 pci_domain_nr(bus),  bus->number);
+	list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) {
+		pr_debug("   Removing %s...\n", pci_name(dev));
+		pci_stop_and_remove_bus_device(dev);
+	}
 }
+
 EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices);
 
 /**
@@ -76,7 +71,7 @@
  */
 void pcibios_add_pci_devices(struct pci_bus * bus)
 {
-	int slotno, num, mode, pass, max;
+	int slotno, mode, pass, max;
 	struct pci_dev *dev;
 	struct device_node *dn = pci_bus_to_OF_node(bus);
 
@@ -90,11 +85,15 @@
 		/* use ofdt-based probe */
 		of_rescan_bus(dn, bus);
 	} else if (mode == PCI_PROBE_NORMAL) {
-		/* use legacy probe */
+		/*
+		 * Use legacy probe. In the partial hotplug case, we
+		 * probably have grandchildren devices unplugged. So
+		 * we don't check the return value from pci_scan_slot() in
+		 * order for fully rescan all the way down to pick them up.
+		 * They can have been removed during partial hotplug.
+		 */
 		slotno = PCI_SLOT(PCI_DN(dn->child)->devfn);
-		num = pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
-		if (!num)
-			return;
+		pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
 		pcibios_setup_bus_devices(bus);
 		max = bus->busn_res.start;
 		for (pass = 0; pass < 2; pass++) {
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 6b0ba58..15d9105 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -230,11 +230,14 @@
 		return;
 	}
 
-	bus = pci_add_new_bus(dev->bus, dev, busrange[0]);
+	bus = pci_find_bus(pci_domain_nr(dev->bus), busrange[0]);
 	if (!bus) {
-		printk(KERN_ERR "Failed to create pci bus for %s\n",
-		       node->full_name);
-		return;
+		bus = pci_add_new_bus(dev->bus, dev, busrange[0]);
+		if (!bus) {
+			printk(KERN_ERR "Failed to create pci bus for %s\n",
+			       node->full_name);
+			return;
+		}
 	}
 
 	bus->primary = dev->bus->number;
@@ -292,6 +295,38 @@
 }
 EXPORT_SYMBOL(of_scan_pci_bridge);
 
+static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,
+			    struct device_node *dn)
+{
+	struct pci_dev *dev = NULL;
+	const u32 *reg;
+	int reglen, devfn;
+
+	pr_debug("  * %s\n", dn->full_name);
+	if (!of_device_is_available(dn))
+		return NULL;
+
+	reg = of_get_property(dn, "reg", &reglen);
+	if (reg == NULL || reglen < 20)
+		return NULL;
+	devfn = (reg[0] >> 8) & 0xff;
+
+	/* Check if the PCI device is already there */
+	dev = pci_get_slot(bus, devfn);
+	if (dev) {
+		pci_dev_put(dev);
+		return dev;
+	}
+
+	/* create a new pci_dev for this device */
+	dev = of_create_pci_dev(dn, bus, devfn);
+	if (!dev)
+		return NULL;
+
+	pr_debug("  dev header type: %x\n", dev->hdr_type);
+	return dev;
+}
+
 /**
  * __of_scan_bus - given a PCI bus node, setup bus and scan for child devices
  * @node: device tree node for the PCI bus
@@ -302,8 +337,6 @@
 			  int rescan_existing)
 {
 	struct device_node *child;
-	const u32 *reg;
-	int reglen, devfn;
 	struct pci_dev *dev;
 
 	pr_debug("of_scan_bus(%s) bus no %d...\n",
@@ -311,16 +344,7 @@
 
 	/* Scan direct children */
 	for_each_child_of_node(node, child) {
-		pr_debug("  * %s\n", child->full_name);
-		if (!of_device_is_available(child))
-			continue;
-		reg = of_get_property(child, "reg", &reglen);
-		if (reg == NULL || reglen < 20)
-			continue;
-		devfn = (reg[0] >> 8) & 0xff;
-
-		/* create a new pci_dev for this device */
-		dev = of_create_pci_dev(child, bus, devfn);
+		dev = of_scan_pci_dev(bus, child);
 		if (!dev)
 			continue;
 		pr_debug("    dev header type: %x\n", dev->hdr_type);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index c517dbe..8083be2 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -600,6 +600,16 @@
 	struct ppc64_tlb_batch *batch;
 #endif
 
+	/* Back up the TAR across context switches.
+	 * Note that the TAR is not available for use in the kernel.  (To
+	 * provide this, the TAR should be backed up/restored on exception
+	 * entry/exit instead, and be in pt_regs.  FIXME, this should be in
+	 * pt_regs anyway (for debug).)
+	 * Save the TAR here before we do treclaim/trecheckpoint as these
+	 * will change the TAR.
+	 */
+	save_tar(&prev->thread);
+
 	__switch_to_tm(prev);
 
 #ifdef CONFIG_SMP
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 5eccda9..6079024 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -644,7 +644,8 @@
 	W(0xfffe0000), W(0x003a0000),	/* POWER5/POWER5+ */
 	W(0xffff0000), W(0x003e0000),	/* POWER6 */
 	W(0xffff0000), W(0x003f0000),	/* POWER7 */
-	W(0xffff0000), W(0x004b0000),	/* POWER8 */
+	W(0xffff0000), W(0x004b0000),	/* POWER8E */
+	W(0xffff0000), W(0x004d0000),	/* POWER8 */
 	W(0xffffffff), W(0x0f000004),	/* all 2.07-compliant */
 	W(0xffffffff), W(0x0f000003),	/* all 2.06-compliant */
 	W(0xffffffff), W(0x0f000002),	/* all 2.05-compliant */
@@ -706,7 +707,7 @@
 	 * must match by the macro below. Update the definition if
 	 * the structure layout changes.
 	 */
-#define IBM_ARCH_VEC_NRCORES_OFFSET	117
+#define IBM_ARCH_VEC_NRCORES_OFFSET	125
 	W(NR_CPUS),			/* number of cores supported */
 	0,
 	0,
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 51be8fb..0554d1f 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -233,6 +233,16 @@
 	std	r5, _CCR(r7)
 	std	r6, _XER(r7)
 
+
+	/* ******************** TAR, PPR, DSCR ********** */
+	mfspr	r3, SPRN_TAR
+	mfspr	r4, SPRN_PPR
+	mfspr	r5, SPRN_DSCR
+
+	std	r3, THREAD_TM_TAR(r12)
+	std	r4, THREAD_TM_PPR(r12)
+	std	r5, THREAD_TM_DSCR(r12)
+
 	/* MSR and flags:  We don't change CRs, and we don't need to alter
 	 * MSR.
 	 */
@@ -347,6 +357,16 @@
 	mtmsr	r6				/* FP/Vec off again! */
 
 restore_gprs:
+
+	/* ******************** TAR, PPR, DSCR ********** */
+	ld	r4, THREAD_TM_TAR(r3)
+	ld	r5, THREAD_TM_PPR(r3)
+	ld	r6, THREAD_TM_DSCR(r3)
+
+	mtspr	SPRN_TAR,	r4
+	mtspr	SPRN_PPR,	r5
+	mtspr	SPRN_DSCR,	r6
+
 	/* ******************** CR,LR,CCR,MSR ********** */
 	ld	r3, _CTR(r7)
 	ld	r4, _LINK(r7)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index bf33c22..e435bc0 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -44,9 +44,7 @@
 #include <asm/machdep.h>
 #include <asm/rtas.h>
 #include <asm/pmc.h>
-#ifdef CONFIG_PPC32
 #include <asm/reg.h>
-#endif
 #ifdef CONFIG_PMAC_BACKLIGHT
 #include <asm/backlight.h>
 #endif
@@ -1296,43 +1294,54 @@
 	die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT);
 }
 
+#ifdef CONFIG_PPC64
 void facility_unavailable_exception(struct pt_regs *regs)
 {
 	static char *facility_strings[] = {
-		"FPU",
-		"VMX/VSX",
-		"DSCR",
-		"PMU SPRs",
-		"BHRB",
-		"TM",
-		"AT",
-		"EBB",
-		"TAR",
+		[FSCR_FP_LG] = "FPU",
+		[FSCR_VECVSX_LG] = "VMX/VSX",
+		[FSCR_DSCR_LG] = "DSCR",
+		[FSCR_PM_LG] = "PMU SPRs",
+		[FSCR_BHRB_LG] = "BHRB",
+		[FSCR_TM_LG] = "TM",
+		[FSCR_EBB_LG] = "EBB",
+		[FSCR_TAR_LG] = "TAR",
 	};
-	char *facility, *prefix;
+	char *facility = "unknown";
 	u64 value;
+	u8 status;
+	bool hv;
 
-	if (regs->trap == 0xf60) {
-		value = mfspr(SPRN_FSCR);
-		prefix = "";
-	} else {
+	hv = (regs->trap == 0xf80);
+	if (hv)
 		value = mfspr(SPRN_HFSCR);
-		prefix = "Hypervisor ";
+	else
+		value = mfspr(SPRN_FSCR);
+
+	status = value >> 56;
+	if (status == FSCR_DSCR_LG) {
+		/* User is acessing the DSCR.  Set the inherit bit and allow
+		 * the user to set it directly in future by setting via the
+		 * H/FSCR DSCR bit.
+		 */
+		current->thread.dscr_inherit = 1;
+		if (hv)
+			mtspr(SPRN_HFSCR, value | HFSCR_DSCR);
+		else
+			mtspr(SPRN_FSCR,  value | FSCR_DSCR);
+		return;
 	}
 
-	value = value >> 56;
+	if ((status < ARRAY_SIZE(facility_strings)) &&
+	    facility_strings[status])
+		facility = facility_strings[status];
 
 	/* We restore the interrupt state now */
 	if (!arch_irq_disabled_regs(regs))
 		local_irq_enable();
 
-	if (value < ARRAY_SIZE(facility_strings))
-		facility = facility_strings[value];
-	else
-		facility = "unknown";
-
 	pr_err("%sFacility '%s' unavailable, exception at 0x%lx, MSR=%lx\n",
-		prefix, facility, regs->nip, regs->msr);
+	       hv ? "Hypervisor " : "", facility, regs->nip, regs->msr);
 
 	if (user_mode(regs)) {
 		_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
@@ -1341,6 +1350,7 @@
 
 	die("Unexpected facility unavailable exception", regs, SIGABRT);
 }
+#endif
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 654e479..f096e72 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -38,9 +38,6 @@
 #endif
 SECTIONS
 {
-	. = 0;
-	reloc_start = .;
-
 	. = KERNELBASE;
 
 /*
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 2efa9dd..7629cd3 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1809,7 +1809,7 @@
 		rma_size <<= PAGE_SHIFT;
 		rmls = lpcr_rmls(rma_size);
 		err = -EINVAL;
-		if (rmls < 0) {
+		if ((long)rmls < 0) {
 			pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
 			goto out_srcu;
 		}
@@ -1874,7 +1874,7 @@
 	/* Allocate the guest's logical partition ID */
 
 	lpid = kvmppc_alloc_lpid();
-	if (lpid < 0)
+	if ((long)lpid < 0)
 		return -ENOMEM;
 	kvm->arch.lpid = lpid;
 
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 19498a5..c6e13d9 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1047,11 +1047,12 @@
 	if (err)
 		goto free_shadow_vcpu;
 
+	err = -ENOMEM;
 	p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
-	/* the real shared page fills the last 4k of our page */
-	vcpu->arch.shared = (void*)(p + PAGE_SIZE - 4096);
 	if (!p)
 		goto uninit_vcpu;
+	/* the real shared page fills the last 4k of our page */
+	vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096);
 
 #ifdef CONFIG_PPC_BOOK3S_64
 	/* default to book3s_64 (970fx) */
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 3f0c30a..c33d939 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -43,6 +43,7 @@
 {
 	unsigned long va;
 	unsigned int penc;
+	unsigned long sllp;
 
 	/*
 	 * We need 14 to 65 bits of va for a tlibe of 4K page
@@ -64,7 +65,9 @@
 		/* clear out bits after (52) [0....52.....63] */
 		va &= ~((1ul << (64 - 52)) - 1);
 		va |= ssize << 8;
-		va |= mmu_psize_defs[apsize].sllp << 6;
+		sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
+			((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
+		va |= sllp << 5;
 		asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
 			     : : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
 			     : "memory");
@@ -98,6 +101,7 @@
 {
 	unsigned long va;
 	unsigned int penc;
+	unsigned long sllp;
 
 	/* VPN_SHIFT can be atmost 12 */
 	va = vpn << VPN_SHIFT;
@@ -113,7 +117,9 @@
 		/* clear out bits after(52) [0....52.....63] */
 		va &= ~((1ul << (64 - 52)) - 1);
 		va |= ssize << 8;
-		va |= mmu_psize_defs[apsize].sllp << 6;
+		sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
+			((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
+		va |= sllp << 5;
 		asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
 			     : : "r"(va) : "memory");
 		break;
@@ -554,6 +560,7 @@
 			seg_off |= vpi << shift;
 		}
 		*vpn = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT;
+		break;
 	case MMU_SEGSIZE_1T:
 		/* We only have 40 - 23 bits of seg_off in avpn */
 		seg_off = (avpn & 0x1ffff) << 23;
@@ -563,6 +570,7 @@
 			seg_off |= vpi << shift;
 		}
 		*vpn = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT;
+		break;
 	default:
 		*vpn = size = 0;
 	}
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 0839721..5850798 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -27,6 +27,7 @@
 #include <linux/seq_file.h>
 #include <linux/uaccess.h>
 #include <linux/slab.h>
+#include <asm/cputhreads.h>
 #include <asm/sparsemem.h>
 #include <asm/prom.h>
 #include <asm/smp.h>
@@ -1318,7 +1319,8 @@
 			}
 		}
 		if (changed) {
-			cpumask_set_cpu(cpu, changes);
+			cpumask_or(changes, changes, cpu_sibling_mask(cpu));
+			cpu = cpu_last_thread_sibling(cpu);
 		}
 	}
 
@@ -1426,7 +1428,7 @@
 	if (!data)
 		return -EINVAL;
 
-	cpu = get_cpu();
+	cpu = smp_processor_id();
 
 	for (update = data; update; update = update->next) {
 		if (cpu != update->cpu)
@@ -1446,12 +1448,12 @@
  */
 int arch_update_cpu_topology(void)
 {
-	unsigned int cpu, changed = 0;
+	unsigned int cpu, sibling, changed = 0;
 	struct topology_update_data *updates, *ud;
 	unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0};
 	cpumask_t updated_cpus;
 	struct device *dev;
-	int weight, i = 0;
+	int weight, new_nid, i = 0;
 
 	weight = cpumask_weight(&cpu_associativity_changes_mask);
 	if (!weight)
@@ -1464,19 +1466,46 @@
 	cpumask_clear(&updated_cpus);
 
 	for_each_cpu(cpu, &cpu_associativity_changes_mask) {
-		ud = &updates[i++];
-		ud->cpu = cpu;
+		/*
+		 * If siblings aren't flagged for changes, updates list
+		 * will be too short. Skip on this update and set for next
+		 * update.
+		 */
+		if (!cpumask_subset(cpu_sibling_mask(cpu),
+					&cpu_associativity_changes_mask)) {
+			pr_info("Sibling bits not set for associativity "
+					"change, cpu%d\n", cpu);
+			cpumask_or(&cpu_associativity_changes_mask,
+					&cpu_associativity_changes_mask,
+					cpu_sibling_mask(cpu));
+			cpu = cpu_last_thread_sibling(cpu);
+			continue;
+		}
+
+		/* Use associativity from first thread for all siblings */
 		vphn_get_associativity(cpu, associativity);
-		ud->new_nid = associativity_to_nid(associativity);
+		new_nid = associativity_to_nid(associativity);
+		if (new_nid < 0 || !node_online(new_nid))
+			new_nid = first_online_node;
 
-		if (ud->new_nid < 0 || !node_online(ud->new_nid))
-			ud->new_nid = first_online_node;
+		if (new_nid == numa_cpu_lookup_table[cpu]) {
+			cpumask_andnot(&cpu_associativity_changes_mask,
+					&cpu_associativity_changes_mask,
+					cpu_sibling_mask(cpu));
+			cpu = cpu_last_thread_sibling(cpu);
+			continue;
+		}
 
-		ud->old_nid = numa_cpu_lookup_table[cpu];
-		cpumask_set_cpu(cpu, &updated_cpus);
-
-		if (i < weight)
-			ud->next = &updates[i];
+		for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
+			ud = &updates[i++];
+			ud->cpu = sibling;
+			ud->new_nid = new_nid;
+			ud->old_nid = numa_cpu_lookup_table[sibling];
+			cpumask_set_cpu(sibling, &updated_cpus);
+			if (i < weight)
+				ud->next = &updates[i];
+		}
+		cpu = cpu_last_thread_sibling(cpu);
 	}
 
 	stop_machine(update_cpu_topology, &updates[0], &updated_cpus);
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index a3985ae..eeae308 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -484,7 +484,7 @@
 	 * use bit 63 of the event code for something else if they wish.
 	 */
 	return (ppmu->flags & PPMU_EBB) &&
-	       ((event->attr.config >> EVENT_CONFIG_EBB_SHIFT) & 1);
+	       ((event->attr.config >> PERF_EVENT_CONFIG_EBB_SHIFT) & 1);
 }
 
 static int ebb_event_check(struct perf_event *event)
@@ -1252,8 +1252,11 @@
 
 	ret = 0;
  out:
-	if (has_branch_stack(event))
+	if (has_branch_stack(event)) {
 		power_pmu_bhrb_enable(event);
+		cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
+					event->attr.branch_sample_type);
+	}
 
 	perf_pmu_enable(event->pmu);
 	local_irq_restore(flags);
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 96a64d6..2ee4a70 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -118,7 +118,7 @@
 	 (EVENT_UNIT_MASK      << EVENT_UNIT_SHIFT)		|	\
 	 (EVENT_COMBINE_MASK   << EVENT_COMBINE_SHIFT)		|	\
 	 (EVENT_MARKED_MASK    << EVENT_MARKED_SHIFT)		|	\
-	 (EVENT_EBB_MASK       << EVENT_CONFIG_EBB_SHIFT)	|	\
+	 (EVENT_EBB_MASK       << PERF_EVENT_CONFIG_EBB_SHIFT)	|	\
 	  EVENT_PSEL_MASK)
 
 /* MMCRA IFM bits - POWER8 */
@@ -233,10 +233,10 @@
 	pmc   = (event >> EVENT_PMC_SHIFT)        & EVENT_PMC_MASK;
 	unit  = (event >> EVENT_UNIT_SHIFT)       & EVENT_UNIT_MASK;
 	cache = (event >> EVENT_CACHE_SEL_SHIFT)  & EVENT_CACHE_SEL_MASK;
-	ebb   = (event >> EVENT_CONFIG_EBB_SHIFT) & EVENT_EBB_MASK;
+	ebb   = (event >> PERF_EVENT_CONFIG_EBB_SHIFT) & EVENT_EBB_MASK;
 
 	/* Clear the EBB bit in the event, so event checks work below */
-	event &= ~(EVENT_EBB_MASK << EVENT_CONFIG_EBB_SHIFT);
+	event &= ~(EVENT_EBB_MASK << PERF_EVENT_CONFIG_EBB_SHIFT);
 
 	if (pmc) {
 		if (pmc > 6)
@@ -561,18 +561,13 @@
 static u64 power8_bhrb_filter_map(u64 branch_sample_type)
 {
 	u64 pmu_bhrb_filter = 0;
-	u64 br_privilege = branch_sample_type & ONLY_PLM;
 
-	/* BHRB and regular PMU events share the same prvillege state
+	/* BHRB and regular PMU events share the same privilege state
 	 * filter configuration. BHRB is always recorded along with a
-	 * regular PMU event. So privilege state filter criteria for BHRB
-	 * and the companion PMU events has to be the same. As a default
-	 * "perf record" tool sets all privillege bits ON when no filter
-	 * criteria is provided in the command line. So as along as all
-	 * privillege bits are ON or they are OFF, we are good to go.
+	 * regular PMU event. As the privilege state filter is handled
+	 * in the basic PMC configuration of the accompanying regular
+	 * PMU event, we ignore any separate BHRB specific request.
 	 */
-	if ((br_privilege != 7) && (br_privilege != 0))
-		return -1;
 
 	/* No branch filter requested */
 	if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY)
@@ -621,10 +616,19 @@
 
 static int __init init_power8_pmu(void)
 {
+	int rc;
+
 	if (!cur_cpu_spec->oprofile_cpu_type ||
 	    strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8"))
 		return -ENODEV;
 
-	return register_power_pmu(&power8_pmu);
+	rc = register_power_pmu(&power8_pmu);
+	if (rc)
+		return rc;
+
+	/* Tell userspace that EBB is supported */
+	cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB;
+
+	return 0;
 }
 early_initcall(init_power8_pmu);
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 969cce7..79663d2 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -114,7 +114,7 @@
 	 * the root bridge. So it's not reasonable to continue
 	 * the probing.
 	 */
-	if (!dn || !edev)
+	if (!dn || !edev || edev->pe)
 		return 0;
 
 	/* Skip for PCI-ISA bridge */
@@ -122,8 +122,19 @@
 		return 0;
 
 	/* Initialize eeh device */
-	edev->class_code	= dev->class;
-	edev->mode		= 0;
+	edev->class_code = dev->class;
+	edev->mode	&= 0xFFFFFF00;
+	if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
+		edev->mode |= EEH_DEV_BRIDGE;
+	if (pci_is_pcie(dev)) {
+		edev->pcie_cap = pci_pcie_cap(dev);
+
+		if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT)
+			edev->mode |= EEH_DEV_ROOT_PORT;
+		else if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM)
+			edev->mode |= EEH_DEV_DS_PORT;
+	}
+
 	edev->config_addr	= ((dev->bus->number << 8) | dev->devfn);
 	edev->pe_config_addr	= phb->bdfn_to_pe(phb, dev->bus, dev->devfn & 0xff);
 
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 49b57b9..d8140b1 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1266,7 +1266,7 @@
 		opal_pci_set_pe(phb_id, 0, 0, 7, 1, 1 , OPAL_MAP_PE);
 }
 
-void pnv_pci_init_ioda2_phb(struct device_node *np)
+void __init pnv_pci_init_ioda2_phb(struct device_node *np)
 {
 	pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2);
 }
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 1bd3399..62b4f80 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -19,7 +19,6 @@
 	select ZLIB_DEFLATE
 	select PPC_DOORBELL
 	select HAVE_CONTEXT_TRACKING
-	select HOTPLUG if SMP
 	select HOTPLUG_CPU if SMP
 	default y
 
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index b456b15..7fbc25b 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -133,6 +133,48 @@
 	return 0;
 }
 
+static int pseries_eeh_cap_start(struct device_node *dn)
+{
+	struct pci_dn *pdn = PCI_DN(dn);
+	u32 status;
+
+	if (!pdn)
+		return 0;
+
+	rtas_read_config(pdn, PCI_STATUS, 2, &status);
+	if (!(status & PCI_STATUS_CAP_LIST))
+		return 0;
+
+	return PCI_CAPABILITY_LIST;
+}
+
+
+static int pseries_eeh_find_cap(struct device_node *dn, int cap)
+{
+	struct pci_dn *pdn = PCI_DN(dn);
+	int pos = pseries_eeh_cap_start(dn);
+	int cnt = 48;	/* Maximal number of capabilities */
+	u32 id;
+
+	if (!pos)
+		return 0;
+
+        while (cnt--) {
+		rtas_read_config(pdn, pos, 1, &pos);
+		if (pos < 0x40)
+			break;
+		pos &= ~3;
+		rtas_read_config(pdn, pos + PCI_CAP_LIST_ID, 1, &id);
+		if (id == 0xff)
+			break;
+		if (id == cap)
+			return pos;
+		pos += PCI_CAP_LIST_NEXT;
+	}
+
+	return 0;
+}
+
 /**
  * pseries_eeh_of_probe - EEH probe on the given device
  * @dn: OF node
@@ -146,14 +188,16 @@
 {
 	struct eeh_dev *edev;
 	struct eeh_pe pe;
+	struct pci_dn *pdn = PCI_DN(dn);
 	const u32 *class_code, *vendor_id, *device_id;
 	const u32 *regs;
+	u32 pcie_flags;
 	int enable = 0;
 	int ret;
 
 	/* Retrieve OF node and eeh device */
 	edev = of_node_to_eeh_dev(dn);
-	if (!of_device_is_available(dn))
+	if (edev->pe || !of_device_is_available(dn))
 		return NULL;
 
 	/* Retrieve class/vendor/device IDs */
@@ -167,9 +211,26 @@
 	if (dn->type && !strcmp(dn->type, "isa"))
 		return NULL;
 
-	/* Update class code and mode of eeh device */
+	/*
+	 * Update class code and mode of eeh device. We need
+	 * correctly reflects that current device is root port
+	 * or PCIe switch downstream port.
+	 */
 	edev->class_code = *class_code;
-	edev->mode = 0;
+	edev->pcie_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_EXP);
+	edev->mode &= 0xFFFFFF00;
+	if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
+		edev->mode |= EEH_DEV_BRIDGE;
+		if (edev->pcie_cap) {
+			rtas_read_config(pdn, edev->pcie_cap + PCI_EXP_FLAGS,
+					 2, &pcie_flags);
+			pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4;
+			if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT)
+				edev->mode |= EEH_DEV_ROOT_PORT;
+			else if (pcie_flags == PCI_EXP_TYPE_DOWNSTREAM)
+				edev->mode |= EEH_DEV_DS_PORT;
+		}
+	}
 
 	/* Retrieve the device address */
 	regs = of_get_property(dn, "reg", NULL);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 02d6e21..8bad880 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -146,7 +146,7 @@
 	flags = 0;
 
 	/* Make pHyp happy */
-	if ((rflags & _PAGE_NO_CACHE) & !(rflags & _PAGE_WRITETHRU))
+	if ((rflags & _PAGE_NO_CACHE) && !(rflags & _PAGE_WRITETHRU))
 		hpte_r &= ~_PAGE_COHERENT;
 	if (firmware_has_feature(FW_FEATURE_XCMO) && !(hpte_r & HPTE_R_N))
 		flags |= H_COALESCE_CAND;
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index 9f8671a..6a5f2b1 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -569,35 +569,6 @@
 	return ret;
 }
 
-static int unzip_oops(char *oops_buf, char *big_buf)
-{
-	struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
-	u64 timestamp = oops_hdr->timestamp;
-	char *big_oops_data = NULL;
-	char *oops_data_buf = NULL;
-	size_t big_oops_data_sz;
-	int unzipped_len;
-
-	big_oops_data = big_buf + sizeof(struct oops_log_info);
-	big_oops_data_sz = big_oops_buf_sz - sizeof(struct oops_log_info);
-	oops_data_buf = oops_buf + sizeof(struct oops_log_info);
-
-	unzipped_len = nvram_decompress(oops_data_buf, big_oops_data,
-					oops_hdr->report_length,
-					big_oops_data_sz);
-
-	if (unzipped_len < 0) {
-		pr_err("nvram: decompression failed; returned %d\n",
-								unzipped_len);
-		return -1;
-	}
-	oops_hdr = (struct oops_log_info *)big_buf;
-	oops_hdr->version = OOPS_HDR_VERSION;
-	oops_hdr->report_length = (u16) unzipped_len;
-	oops_hdr->timestamp = timestamp;
-	return 0;
-}
-
 static int nvram_pstore_open(struct pstore_info *psi)
 {
 	/* Reset the iterator to start reading partitions again */
@@ -685,10 +656,9 @@
 	unsigned int err_type, id_no, size = 0;
 	struct nvram_os_partition *part = NULL;
 	char *buff = NULL, *big_buff = NULL;
-	int rc, sig = 0;
+	int sig = 0;
 	loff_t p;
 
-read_partition:
 	read_type++;
 
 	switch (nvram_type_ids[read_type]) {
@@ -749,30 +719,46 @@
 		*id = id_no;
 
 	if (nvram_type_ids[read_type] == PSTORE_TYPE_DMESG) {
+		int length, unzipped_len;
+		size_t hdr_size;
+
 		oops_hdr = (struct oops_log_info *)buff;
-		*buf = buff + sizeof(*oops_hdr);
+		if (oops_hdr->version < OOPS_HDR_VERSION) {
+			/* Old format oops header had 2-byte record size */
+			hdr_size = sizeof(u16);
+			length = oops_hdr->version;
+			time->tv_sec = 0;
+			time->tv_nsec = 0;
+		} else {
+			hdr_size = sizeof(*oops_hdr);
+			length = oops_hdr->report_length;
+			time->tv_sec = oops_hdr->timestamp;
+			time->tv_nsec = 0;
+		}
+		*buf = kmalloc(length, GFP_KERNEL);
+		if (*buf == NULL)
+			return -ENOMEM;
+		memcpy(*buf, buff + hdr_size, length);
+		kfree(buff);
 
 		if (err_type == ERR_TYPE_KERNEL_PANIC_GZ) {
 			big_buff = kmalloc(big_oops_buf_sz, GFP_KERNEL);
 			if (!big_buff)
 				return -ENOMEM;
 
-			rc = unzip_oops(buff, big_buff);
+			unzipped_len = nvram_decompress(*buf, big_buff,
+						length, big_oops_buf_sz);
 
-			if (rc != 0) {
-				kfree(buff);
+			if (unzipped_len < 0) {
+				pr_err("nvram: decompression failed, returned "
+					"rc %d\n", unzipped_len);
 				kfree(big_buff);
-				goto read_partition;
+			} else {
+				*buf = big_buff;
+				length = unzipped_len;
 			}
-
-			oops_hdr = (struct oops_log_info *)big_buff;
-			*buf = big_buff + sizeof(*oops_hdr);
-			kfree(buff);
 		}
-
-		time->tv_sec = oops_hdr->timestamp;
-		time->tv_nsec = 0;
-		return oops_hdr->report_length;
+		return length;
 	}
 
 	*buf = buff;
@@ -816,6 +802,7 @@
 static void __init nvram_init_oops_partition(int rtas_partition_exists)
 {
 	int rc;
+	size_t size;
 
 	rc = pseries_nvram_init_os_partition(&oops_log_partition);
 	if (rc != 0) {
@@ -844,8 +831,9 @@
 	big_oops_buf_sz = (oops_data_sz * 100) / 45;
 	big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
 	if (big_oops_buf) {
-		stream.workspace = kmalloc(zlib_deflate_workspacesize(
-				WINDOW_BITS, MEM_LEVEL), GFP_KERNEL);
+		size = max(zlib_deflate_workspacesize(WINDOW_BITS, MEM_LEVEL),
+			zlib_inflate_workspacesize());
+		stream.workspace = kmalloc(size, GFP_KERNEL);
 		if (!stream.workspace) {
 			pr_err("nvram: No memory for compression workspace; "
 				"skipping compression of %s partition data\n",
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 7b3cbde..721c058 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -287,6 +287,9 @@
 	unsigned long *savep;
 	struct rtas_error_log *h, *errhdr = NULL;
 
+	/* Mask top two bits */
+	regs->gpr[3] &= ~(0x3UL << 62);
+
 	if (!VALID_FWNMI_BUFFER(regs->gpr[3])) {
 		printk(KERN_ERR "FWNMI: corrupt r3 0x%016lx\n", regs->gpr[3]);
 		return NULL;
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 22f75b5..8a4cae7 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -118,6 +118,7 @@
 	select HAVE_FUNCTION_TRACE_MCOUNT_TEST
 	select HAVE_KERNEL_BZIP2
 	select HAVE_KERNEL_GZIP
+	select HAVE_KERNEL_LZ4
 	select HAVE_KERNEL_LZMA
 	select HAVE_KERNEL_LZO
 	select HAVE_KERNEL_XZ
@@ -227,11 +228,12 @@
 	  not work on older machines.
 
 config MARCH_ZEC12
-	bool "IBM zEC12"
+	bool "IBM zBC12 and zEC12"
 	select HAVE_MARCH_ZEC12_FEATURES if 64BIT
 	help
-	  Select this to enable optimizations for IBM zEC12 (2827 series). The
-	  kernel will be slightly faster but will not work on older machines.
+	  Select this to enable optimizations for IBM zBC12 and zEC12 (2828 and
+	  2827 series). The kernel will be slightly faster but will not work on
+	  older machines.
 
 endchoice
 
@@ -709,6 +711,7 @@
 	def_bool y
 	prompt "s390 support for virtio devices"
 	depends on 64BIT
+	select TTY
 	select VIRTUALIZATION
 	select VIRTIO
 	select VIRTIO_CONSOLE
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index 3ad8f61..866ecbe 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -6,9 +6,9 @@
 
 BITS := $(if $(CONFIG_64BIT),64,31)
 
-targets	:= vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 \
-	   vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo misc.o piggy.o \
-	   sizes.h head$(BITS).o
+targets	:= vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2
+targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4
+targets += misc.o piggy.o sizes.h head$(BITS).o
 
 KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
@@ -48,6 +48,7 @@
 
 suffix-$(CONFIG_KERNEL_GZIP)  := gz
 suffix-$(CONFIG_KERNEL_BZIP2) := bz2
+suffix-$(CONFIG_KERNEL_LZ4)  := lz4
 suffix-$(CONFIG_KERNEL_LZMA)  := lzma
 suffix-$(CONFIG_KERNEL_LZO)  := lzo
 suffix-$(CONFIG_KERNEL_XZ)  := xz
@@ -56,6 +57,8 @@
 	$(call if_changed,gzip)
 $(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y)
 	$(call if_changed,bzip2)
+$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y)
+	$(call if_changed,lz4)
 $(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y)
 	$(call if_changed,lzma)
 $(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y)
diff --git a/arch/s390/boot/compressed/misc.c b/arch/s390/boot/compressed/misc.c
index c4c6a1c..57cbaff 100644
--- a/arch/s390/boot/compressed/misc.c
+++ b/arch/s390/boot/compressed/misc.c
@@ -47,6 +47,10 @@
 #include "../../../../lib/decompress_bunzip2.c"
 #endif
 
+#ifdef CONFIG_KERNEL_LZ4
+#include "../../../../lib/decompress_unlz4.c"
+#endif
+
 #ifdef CONFIG_KERNEL_LZMA
 #include "../../../../lib/decompress_unlzma.c"
 #endif
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index 4d8604e..7d46767 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -693,7 +693,7 @@
 	size -= offset;
 	p = addr + offset / BITS_PER_LONG;
 	if (bit) {
-		set = __flo_word(0, *p & (~0UL << bit));
+		set = __flo_word(0, *p & (~0UL >> bit));
 		if (set >= size)
 			return size + offset;
 		if (set < BITS_PER_LONG)
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index b75d7d6..6d6d92b 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -32,6 +32,7 @@
 	struct mm_struct *mm;
 	struct mmu_table_batch *batch;
 	unsigned int fullmm;
+	unsigned long start, end;
 };
 
 struct mmu_table_batch {
@@ -48,10 +49,13 @@
 
 static inline void tlb_gather_mmu(struct mmu_gather *tlb,
 				  struct mm_struct *mm,
-				  unsigned int full_mm_flush)
+				  unsigned long start,
+				  unsigned long end)
 {
 	tlb->mm = mm;
-	tlb->fullmm = full_mm_flush;
+	tlb->start = start;
+	tlb->end = end;
+	tlb->fullmm = !(start | (end+1));
 	tlb->batch = NULL;
 	if (tlb->fullmm)
 		__tlb_flush_mm(mm);
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index a6fc037..500aa10 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -52,12 +52,13 @@
 
 static bool is_in_guest(struct pt_regs *regs)
 {
-	unsigned long ip = instruction_pointer(regs);
-
 	if (user_mode(regs))
 		return false;
-
-	return ip == (unsigned long) &sie_exit;
+#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
+	return instruction_pointer(regs) == (unsigned long) &sie_exit;
+#else
+	return false;
+#endif
 }
 
 static unsigned long guest_is_user_mode(struct pt_regs *regs)
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 497451e..aeed8a6 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -994,6 +994,7 @@
 		strcpy(elf_platform, "z196");
 		break;
 	case 0x2827:
+	case 0x2828:
 		strcpy(elf_platform, "zEC12");
 		break;
 	}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index ba694d2..34c1c9a 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -702,14 +702,25 @@
 		return rc;
 
 	vcpu->arch.sie_block->icptcode = 0;
-	preempt_disable();
-	kvm_guest_enter();
-	preempt_enable();
 	VCPU_EVENT(vcpu, 6, "entering sie flags %x",
 		   atomic_read(&vcpu->arch.sie_block->cpuflags));
 	trace_kvm_s390_sie_enter(vcpu,
 				 atomic_read(&vcpu->arch.sie_block->cpuflags));
+
+	/*
+	 * As PF_VCPU will be used in fault handler, between guest_enter
+	 * and guest_exit should be no uaccess.
+	 */
+	preempt_disable();
+	kvm_guest_enter();
+	preempt_enable();
 	rc = sie64a(vcpu->arch.sie_block, vcpu->run->s.regs.gprs);
+	kvm_guest_exit();
+
+	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
+		   vcpu->arch.sie_block->icptcode);
+	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
+
 	if (rc > 0)
 		rc = 0;
 	if (rc < 0) {
@@ -721,10 +732,6 @@
 			rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 		}
 	}
-	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
-		   vcpu->arch.sie_block->icptcode);
-	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
-	kvm_guest_exit();
 
 	memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
 	return rc;
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 0da3e6e..4cdc54e 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -16,6 +16,7 @@
 #include <linux/errno.h>
 #include <linux/compat.h>
 #include <asm/asm-offsets.h>
+#include <asm/facility.h>
 #include <asm/current.h>
 #include <asm/debug.h>
 #include <asm/ebcdic.h>
@@ -532,8 +533,7 @@
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	/* Only provide non-quiescing support if the host supports it */
-	if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ &&
-	    S390_lowcore.stfl_fac_list & 0x00020000)
+	if (vcpu->run->s.regs.gprs[reg1] & PFMF_NQ && !test_facility(14))
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	/* No support for conditional-SSKE */
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index ce36ea8..ad446b0 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -69,6 +69,7 @@
 		order = 2;
 		break;
 	case 0x2827:	/* zEC12 */
+	case 0x2828:	/* zEC12 */
 	default:
 		order = 5;
 		break;
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index ffeb17c..930783d 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -440,7 +440,7 @@
 		switch (id.machine) {
 		case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
 		case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
-		case 0x2827:              ops->cpu_type = "s390/zEC12"; break;
+		case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break;
 		default: return -ENODEV;
 		}
 	}
diff --git a/arch/score/Kconfig b/arch/score/Kconfig
index c8def8b..5fc2375 100644
--- a/arch/score/Kconfig
+++ b/arch/score/Kconfig
@@ -87,6 +87,8 @@
 
 source "init/Kconfig"
 
+source "kernel/Kconfig.freezer"
+
 config MMU
 	def_bool y
 
diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig
index 2051821..0cf4097 100644
--- a/arch/sh/configs/sh03_defconfig
+++ b/arch/sh/configs/sh03_defconfig
@@ -22,7 +22,7 @@
 CONFIG_CMDLINE_OVERWRITE=y
 CONFIG_CMDLINE="console=ttySC1,115200 mem=64M root=/dev/nfs"
 CONFIG_PCI=y
-CONFIG_HOTPLUG_PCI=m
+CONFIG_HOTPLUG_PCI=y
 CONFIG_BINFMT_MISC=y
 CONFIG_NET=y
 CONFIG_PACKET=y
diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h
index e61d43d..362192e 100644
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -36,10 +36,12 @@
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
-	tlb->fullmm = full_mm_flush;
+	tlb->start = start;
+	tlb->end = end;
+	tlb->fullmm = !(start | (end+1));
 
 	init_tlb_gather(tlb);
 }
diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h
index 4febacd..29b0301 100644
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -45,10 +45,12 @@
 }
 
 static inline void
-tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
+tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
-	tlb->fullmm = full_mm_flush;
+	tlb->start = start;
+	tlb->end = end;
+	tlb->fullmm = !(start | (end+1));
 
 	init_tlb_gather(tlb);
 }
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index d606463..b7388a4 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -225,7 +225,7 @@
 	unsigned long nr_pages;
 
 	nr_pages = round_up(size, EFI_PAGE_SIZE) / EFI_PAGE_SIZE;
-	efi_call_phys2(sys_table->boottime->free_pages, addr, size);
+	efi_call_phys2(sys_table->boottime->free_pages, addr, nr_pages);
 }
 
 static void find_bits(unsigned long mask, u8 *pos, u8 *size)
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 7d6ba9d..6c63c35 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -27,7 +27,6 @@
 obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
 obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o
 obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
-obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
 
 # These modules require assembler to support AVX.
 ifeq ($(avx_supported),yes)
@@ -82,4 +81,3 @@
 crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
 sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o
 sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
-crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o
diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S
deleted file mode 100644
index 35e9756..0000000
--- a/arch/x86/crypto/crct10dif-pcl-asm_64.S
+++ /dev/null
@@ -1,643 +0,0 @@
-########################################################################
-# Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
-#
-# Copyright (c) 2013, Intel Corporation
-#
-# Authors:
-#     Erdinc Ozturk <erdinc.ozturk@intel.com>
-#     Vinodh Gopal <vinodh.gopal@intel.com>
-#     James Guilford <james.guilford@intel.com>
-#     Tim Chen <tim.c.chen@linux.intel.com>
-#
-# This software is available to you under a choice of one of two
-# licenses.  You may choose to be licensed under the terms of the GNU
-# General Public License (GPL) Version 2, available from the file
-# COPYING in the main directory of this source tree, or the
-# OpenIB.org BSD license below:
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright
-#   notice, this list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above copyright
-#   notice, this list of conditions and the following disclaimer in the
-#   documentation and/or other materials provided with the
-#   distribution.
-#
-# * Neither the name of the Intel Corporation nor the names of its
-#   contributors may be used to endorse or promote products derived from
-#   this software without specific prior written permission.
-#
-#
-# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-########################################################################
-#       Function API:
-#       UINT16 crc_t10dif_pcl(
-#               UINT16 init_crc, //initial CRC value, 16 bits
-#               const unsigned char *buf, //buffer pointer to calculate CRC on
-#               UINT64 len //buffer length in bytes (64-bit data)
-#       );
-#
-#       Reference paper titled "Fast CRC Computation for Generic
-#	Polynomials Using PCLMULQDQ Instruction"
-#       URL: http://www.intel.com/content/dam/www/public/us/en/documents
-#  /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
-#
-#
-
-#include <linux/linkage.h>
-
-.text
-
-#define        arg1 %rdi
-#define        arg2 %rsi
-#define        arg3 %rdx
-
-#define        arg1_low32 %edi
-
-ENTRY(crc_t10dif_pcl)
-.align 16
-
-	# adjust the 16-bit initial_crc value, scale it to 32 bits
-	shl	$16, arg1_low32
-
-	# Allocate Stack Space
-	mov     %rsp, %rcx
-	sub	$16*2, %rsp
-	# align stack to 16 byte boundary
-	and     $~(0x10 - 1), %rsp
-
-	# check if smaller than 256
-	cmp	$256, arg3
-
-	# for sizes less than 128, we can't fold 64B at a time...
-	jl	_less_than_128
-
-
-	# load the initial crc value
-	movd	arg1_low32, %xmm10	# initial crc
-
-	# crc value does not need to be byte-reflected, but it needs
-	# to be moved to the high part of the register.
-	# because data will be byte-reflected and will align with
-	# initial crc at correct place.
-	pslldq	$12, %xmm10
-
-	movdqa  SHUF_MASK(%rip), %xmm11
-	# receive the initial 64B data, xor the initial crc value
-	movdqu	16*0(arg2), %xmm0
-	movdqu	16*1(arg2), %xmm1
-	movdqu	16*2(arg2), %xmm2
-	movdqu	16*3(arg2), %xmm3
-	movdqu	16*4(arg2), %xmm4
-	movdqu	16*5(arg2), %xmm5
-	movdqu	16*6(arg2), %xmm6
-	movdqu	16*7(arg2), %xmm7
-
-	pshufb	%xmm11, %xmm0
-	# XOR the initial_crc value
-	pxor	%xmm10, %xmm0
-	pshufb	%xmm11, %xmm1
-	pshufb	%xmm11, %xmm2
-	pshufb	%xmm11, %xmm3
-	pshufb	%xmm11, %xmm4
-	pshufb	%xmm11, %xmm5
-	pshufb	%xmm11, %xmm6
-	pshufb	%xmm11, %xmm7
-
-	movdqa	rk3(%rip), %xmm10	#xmm10 has rk3 and rk4
-					#imm value of pclmulqdq instruction
-					#will determine which constant to use
-
-	#################################################################
-	# we subtract 256 instead of 128 to save one instruction from the loop
-	sub	$256, arg3
-
-	# at this section of the code, there is 64*x+y (0<=y<64) bytes of
-	# buffer. The _fold_64_B_loop will fold 64B at a time
-	# until we have 64+y Bytes of buffer
-
-
-	# fold 64B at a time. This section of the code folds 4 xmm
-	# registers in parallel
-_fold_64_B_loop:
-
-	# update the buffer pointer
-	add	$128, arg2		#    buf += 64#
-
-	movdqu	16*0(arg2), %xmm9
-	movdqu	16*1(arg2), %xmm12
-	pshufb	%xmm11, %xmm9
-	pshufb	%xmm11, %xmm12
-	movdqa	%xmm0, %xmm8
-	movdqa	%xmm1, %xmm13
-	pclmulqdq	$0x0 , %xmm10, %xmm0
-	pclmulqdq	$0x11, %xmm10, %xmm8
-	pclmulqdq	$0x0 , %xmm10, %xmm1
-	pclmulqdq	$0x11, %xmm10, %xmm13
-	pxor	%xmm9 , %xmm0
-	xorps	%xmm8 , %xmm0
-	pxor	%xmm12, %xmm1
-	xorps	%xmm13, %xmm1
-
-	movdqu	16*2(arg2), %xmm9
-	movdqu	16*3(arg2), %xmm12
-	pshufb	%xmm11, %xmm9
-	pshufb	%xmm11, %xmm12
-	movdqa	%xmm2, %xmm8
-	movdqa	%xmm3, %xmm13
-	pclmulqdq	$0x0, %xmm10, %xmm2
-	pclmulqdq	$0x11, %xmm10, %xmm8
-	pclmulqdq	$0x0, %xmm10, %xmm3
-	pclmulqdq	$0x11, %xmm10, %xmm13
-	pxor	%xmm9 , %xmm2
-	xorps	%xmm8 , %xmm2
-	pxor	%xmm12, %xmm3
-	xorps	%xmm13, %xmm3
-
-	movdqu	16*4(arg2), %xmm9
-	movdqu	16*5(arg2), %xmm12
-	pshufb	%xmm11, %xmm9
-	pshufb	%xmm11, %xmm12
-	movdqa	%xmm4, %xmm8
-	movdqa	%xmm5, %xmm13
-	pclmulqdq	$0x0,  %xmm10, %xmm4
-	pclmulqdq	$0x11, %xmm10, %xmm8
-	pclmulqdq	$0x0,  %xmm10, %xmm5
-	pclmulqdq	$0x11, %xmm10, %xmm13
-	pxor	%xmm9 ,  %xmm4
-	xorps	%xmm8 ,  %xmm4
-	pxor	%xmm12,  %xmm5
-	xorps	%xmm13,  %xmm5
-
-	movdqu	16*6(arg2), %xmm9
-	movdqu	16*7(arg2), %xmm12
-	pshufb	%xmm11, %xmm9
-	pshufb	%xmm11, %xmm12
-	movdqa	%xmm6 , %xmm8
-	movdqa	%xmm7 , %xmm13
-	pclmulqdq	$0x0 , %xmm10, %xmm6
-	pclmulqdq	$0x11, %xmm10, %xmm8
-	pclmulqdq	$0x0 , %xmm10, %xmm7
-	pclmulqdq	$0x11, %xmm10, %xmm13
-	pxor	%xmm9 , %xmm6
-	xorps	%xmm8 , %xmm6
-	pxor	%xmm12, %xmm7
-	xorps	%xmm13, %xmm7
-
-	sub	$128, arg3
-
-	# check if there is another 64B in the buffer to be able to fold
-	jge	_fold_64_B_loop
-	##################################################################
-
-
-	add	$128, arg2
-	# at this point, the buffer pointer is pointing at the last y Bytes
-	# of the buffer the 64B of folded data is in 4 of the xmm
-	# registers: xmm0, xmm1, xmm2, xmm3
-
-
-	# fold the 8 xmm registers to 1 xmm register with different constants
-
-	movdqa	rk9(%rip), %xmm10
-	movdqa	%xmm0, %xmm8
-	pclmulqdq	$0x11, %xmm10, %xmm0
-	pclmulqdq	$0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	xorps	%xmm0, %xmm7
-
-	movdqa	rk11(%rip), %xmm10
-	movdqa	%xmm1, %xmm8
-	pclmulqdq	 $0x11, %xmm10, %xmm1
-	pclmulqdq	 $0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	xorps	%xmm1, %xmm7
-
-	movdqa	rk13(%rip), %xmm10
-	movdqa	%xmm2, %xmm8
-	pclmulqdq	 $0x11, %xmm10, %xmm2
-	pclmulqdq	 $0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	pxor	%xmm2, %xmm7
-
-	movdqa	rk15(%rip), %xmm10
-	movdqa	%xmm3, %xmm8
-	pclmulqdq	$0x11, %xmm10, %xmm3
-	pclmulqdq	$0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	xorps	%xmm3, %xmm7
-
-	movdqa	rk17(%rip), %xmm10
-	movdqa	%xmm4, %xmm8
-	pclmulqdq	$0x11, %xmm10, %xmm4
-	pclmulqdq	$0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	pxor	%xmm4, %xmm7
-
-	movdqa	rk19(%rip), %xmm10
-	movdqa	%xmm5, %xmm8
-	pclmulqdq	$0x11, %xmm10, %xmm5
-	pclmulqdq	$0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	xorps	%xmm5, %xmm7
-
-	movdqa	rk1(%rip), %xmm10	#xmm10 has rk1 and rk2
-					#imm value of pclmulqdq instruction
-					#will determine which constant to use
-	movdqa	%xmm6, %xmm8
-	pclmulqdq	$0x11, %xmm10, %xmm6
-	pclmulqdq	$0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	pxor	%xmm6, %xmm7
-
-
-	# instead of 64, we add 48 to the loop counter to save 1 instruction
-	# from the loop instead of a cmp instruction, we use the negative
-	# flag with the jl instruction
-	add	$128-16, arg3
-	jl	_final_reduction_for_128
-
-	# now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7
-	# and the rest is in memory. We can fold 16 bytes at a time if y>=16
-	# continue folding 16B at a time
-
-_16B_reduction_loop:
-	movdqa	%xmm7, %xmm8
-	pclmulqdq	$0x11, %xmm10, %xmm7
-	pclmulqdq	$0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	movdqu	(arg2), %xmm0
-	pshufb	%xmm11, %xmm0
-	pxor	%xmm0 , %xmm7
-	add	$16, arg2
-	sub	$16, arg3
-	# instead of a cmp instruction, we utilize the flags with the
-	# jge instruction equivalent of: cmp arg3, 16-16
-	# check if there is any more 16B in the buffer to be able to fold
-	jge	_16B_reduction_loop
-
-	#now we have 16+z bytes left to reduce, where 0<= z < 16.
-	#first, we reduce the data in the xmm7 register
-
-
-_final_reduction_for_128:
-	# check if any more data to fold. If not, compute the CRC of
-	# the final 128 bits
-	add	$16, arg3
-	je	_128_done
-
-	# here we are getting data that is less than 16 bytes.
-	# since we know that there was data before the pointer, we can
-	# offset the input pointer before the actual point, to receive
-	# exactly 16 bytes. after that the registers need to be adjusted.
-_get_last_two_xmms:
-	movdqa	%xmm7, %xmm2
-
-	movdqu	-16(arg2, arg3), %xmm1
-	pshufb	%xmm11, %xmm1
-
-	# get rid of the extra data that was loaded before
-	# load the shift constant
-	lea	pshufb_shf_table+16(%rip), %rax
-	sub	arg3, %rax
-	movdqu	(%rax), %xmm0
-
-	# shift xmm2 to the left by arg3 bytes
-	pshufb	%xmm0, %xmm2
-
-	# shift xmm7 to the right by 16-arg3 bytes
-	pxor	mask1(%rip), %xmm0
-	pshufb	%xmm0, %xmm7
-	pblendvb	%xmm2, %xmm1	#xmm0 is implicit
-
-	# fold 16 Bytes
-	movdqa	%xmm1, %xmm2
-	movdqa	%xmm7, %xmm8
-	pclmulqdq	$0x11, %xmm10, %xmm7
-	pclmulqdq	$0x0 , %xmm10, %xmm8
-	pxor	%xmm8, %xmm7
-	pxor	%xmm2, %xmm7
-
-_128_done:
-	# compute crc of a 128-bit value
-	movdqa	rk5(%rip), %xmm10	# rk5 and rk6 in xmm10
-	movdqa	%xmm7, %xmm0
-
-	#64b fold
-	pclmulqdq	$0x1, %xmm10, %xmm7
-	pslldq	$8   ,  %xmm0
-	pxor	%xmm0,  %xmm7
-
-	#32b fold
-	movdqa	%xmm7, %xmm0
-
-	pand	mask2(%rip), %xmm0
-
-	psrldq	$12, %xmm7
-	pclmulqdq	$0x10, %xmm10, %xmm7
-	pxor	%xmm0, %xmm7
-
-	#barrett reduction
-_barrett:
-	movdqa	rk7(%rip), %xmm10	# rk7 and rk8 in xmm10
-	movdqa	%xmm7, %xmm0
-	pclmulqdq	$0x01, %xmm10, %xmm7
-	pslldq	$4, %xmm7
-	pclmulqdq	$0x11, %xmm10, %xmm7
-
-	pslldq	$4, %xmm7
-	pxor	%xmm0, %xmm7
-	pextrd	$1, %xmm7, %eax
-
-_cleanup:
-	# scale the result back to 16 bits
-	shr	$16, %eax
-	mov     %rcx, %rsp
-	ret
-
-########################################################################
-
-.align 16
-_less_than_128:
-
-	# check if there is enough buffer to be able to fold 16B at a time
-	cmp	$32, arg3
-	jl	_less_than_32
-	movdqa  SHUF_MASK(%rip), %xmm11
-
-	# now if there is, load the constants
-	movdqa	rk1(%rip), %xmm10	# rk1 and rk2 in xmm10
-
-	movd	arg1_low32, %xmm0	# get the initial crc value
-	pslldq	$12, %xmm0	# align it to its correct place
-	movdqu	(arg2), %xmm7	# load the plaintext
-	pshufb	%xmm11, %xmm7	# byte-reflect the plaintext
-	pxor	%xmm0, %xmm7
-
-
-	# update the buffer pointer
-	add	$16, arg2
-
-	# update the counter. subtract 32 instead of 16 to save one
-	# instruction from the loop
-	sub	$32, arg3
-
-	jmp	_16B_reduction_loop
-
-
-.align 16
-_less_than_32:
-	# mov initial crc to the return value. this is necessary for
-	# zero-length buffers.
-	mov	arg1_low32, %eax
-	test	arg3, arg3
-	je	_cleanup
-
-	movdqa  SHUF_MASK(%rip), %xmm11
-
-	movd	arg1_low32, %xmm0	# get the initial crc value
-	pslldq	$12, %xmm0	# align it to its correct place
-
-	cmp	$16, arg3
-	je	_exact_16_left
-	jl	_less_than_16_left
-
-	movdqu	(arg2), %xmm7	# load the plaintext
-	pshufb	%xmm11, %xmm7	# byte-reflect the plaintext
-	pxor	%xmm0 , %xmm7	# xor the initial crc value
-	add	$16, arg2
-	sub	$16, arg3
-	movdqa	rk1(%rip), %xmm10	# rk1 and rk2 in xmm10
-	jmp	_get_last_two_xmms
-
-
-.align 16
-_less_than_16_left:
-	# use stack space to load data less than 16 bytes, zero-out
-	# the 16B in memory first.
-
-	pxor	%xmm1, %xmm1
-	mov	%rsp, %r11
-	movdqa	%xmm1, (%r11)
-
-	cmp	$4, arg3
-	jl	_only_less_than_4
-
-	# backup the counter value
-	mov	arg3, %r9
-	cmp	$8, arg3
-	jl	_less_than_8_left
-
-	# load 8 Bytes
-	mov	(arg2), %rax
-	mov	%rax, (%r11)
-	add	$8, %r11
-	sub	$8, arg3
-	add	$8, arg2
-_less_than_8_left:
-
-	cmp	$4, arg3
-	jl	_less_than_4_left
-
-	# load 4 Bytes
-	mov	(arg2), %eax
-	mov	%eax, (%r11)
-	add	$4, %r11
-	sub	$4, arg3
-	add	$4, arg2
-_less_than_4_left:
-
-	cmp	$2, arg3
-	jl	_less_than_2_left
-
-	# load 2 Bytes
-	mov	(arg2), %ax
-	mov	%ax, (%r11)
-	add	$2, %r11
-	sub	$2, arg3
-	add	$2, arg2
-_less_than_2_left:
-	cmp     $1, arg3
-        jl      _zero_left
-
-	# load 1 Byte
-	mov	(arg2), %al
-	mov	%al, (%r11)
-_zero_left:
-	movdqa	(%rsp), %xmm7
-	pshufb	%xmm11, %xmm7
-	pxor	%xmm0 , %xmm7	# xor the initial crc value
-
-	# shl r9, 4
-	lea	pshufb_shf_table+16(%rip), %rax
-	sub	%r9, %rax
-	movdqu	(%rax), %xmm0
-	pxor	mask1(%rip), %xmm0
-
-	pshufb	%xmm0, %xmm7
-	jmp	_128_done
-
-.align 16
-_exact_16_left:
-	movdqu	(arg2), %xmm7
-	pshufb	%xmm11, %xmm7
-	pxor	%xmm0 , %xmm7   # xor the initial crc value
-
-	jmp	_128_done
-
-_only_less_than_4:
-	cmp	$3, arg3
-	jl	_only_less_than_3
-
-	# load 3 Bytes
-	mov	(arg2), %al
-	mov	%al, (%r11)
-
-	mov	1(arg2), %al
-	mov	%al, 1(%r11)
-
-	mov	2(arg2), %al
-	mov	%al, 2(%r11)
-
-	movdqa	 (%rsp), %xmm7
-	pshufb	 %xmm11, %xmm7
-	pxor	 %xmm0 , %xmm7  # xor the initial crc value
-
-	psrldq	$5, %xmm7
-
-	jmp	_barrett
-_only_less_than_3:
-	cmp	$2, arg3
-	jl	_only_less_than_2
-
-	# load 2 Bytes
-	mov	(arg2), %al
-	mov	%al, (%r11)
-
-	mov	1(arg2), %al
-	mov	%al, 1(%r11)
-
-	movdqa	(%rsp), %xmm7
-	pshufb	%xmm11, %xmm7
-	pxor	%xmm0 , %xmm7   # xor the initial crc value
-
-	psrldq	$6, %xmm7
-
-	jmp	_barrett
-_only_less_than_2:
-
-	# load 1 Byte
-	mov	(arg2), %al
-	mov	%al, (%r11)
-
-	movdqa	(%rsp), %xmm7
-	pshufb	%xmm11, %xmm7
-	pxor	%xmm0 , %xmm7   # xor the initial crc value
-
-	psrldq	$7, %xmm7
-
-	jmp	_barrett
-
-ENDPROC(crc_t10dif_pcl)
-
-.data
-
-# precomputed constants
-# these constants are precomputed from the poly:
-# 0x8bb70000 (0x8bb7 scaled to 32 bits)
-.align 16
-# Q = 0x18BB70000
-# rk1 = 2^(32*3) mod Q << 32
-# rk2 = 2^(32*5) mod Q << 32
-# rk3 = 2^(32*15) mod Q << 32
-# rk4 = 2^(32*17) mod Q << 32
-# rk5 = 2^(32*3) mod Q << 32
-# rk6 = 2^(32*2) mod Q << 32
-# rk7 = floor(2^64/Q)
-# rk8 = Q
-rk1:
-.quad 0x2d56000000000000
-rk2:
-.quad 0x06df000000000000
-rk3:
-.quad 0x9d9d000000000000
-rk4:
-.quad 0x7cf5000000000000
-rk5:
-.quad 0x2d56000000000000
-rk6:
-.quad 0x1368000000000000
-rk7:
-.quad 0x00000001f65a57f8
-rk8:
-.quad 0x000000018bb70000
-
-rk9:
-.quad 0xceae000000000000
-rk10:
-.quad 0xbfd6000000000000
-rk11:
-.quad 0x1e16000000000000
-rk12:
-.quad 0x713c000000000000
-rk13:
-.quad 0xf7f9000000000000
-rk14:
-.quad 0x80a6000000000000
-rk15:
-.quad 0x044c000000000000
-rk16:
-.quad 0xe658000000000000
-rk17:
-.quad 0xad18000000000000
-rk18:
-.quad 0xa497000000000000
-rk19:
-.quad 0x6ee3000000000000
-rk20:
-.quad 0xe7b5000000000000
-
-
-
-mask1:
-.octa 0x80808080808080808080808080808080
-mask2:
-.octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF
-
-SHUF_MASK:
-.octa 0x000102030405060708090A0B0C0D0E0F
-
-pshufb_shf_table:
-# use these values for shift constants for the pshufb instruction
-# different alignments result in values as shown:
-#	DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1
-#	DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2
-#	DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3
-#	DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4
-#	DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5
-#	DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6
-#	DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9  (16-7) / shr7
-#	DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8  (16-8) / shr8
-#	DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7  (16-9) / shr9
-#	DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6  (16-10) / shr10
-#	DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5  (16-11) / shr11
-#	DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4  (16-12) / shr12
-#	DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3  (16-13) / shr13
-#	DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2  (16-14) / shr14
-#	DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1  (16-15) / shr15
-.octa 0x8f8e8d8c8b8a89888786858483828100
-.octa 0x000e0d0c0b0a09080706050403020100
diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c
deleted file mode 100644
index 7845d7f..0000000
--- a/arch/x86/crypto/crct10dif-pclmul_glue.c
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Cryptographic API.
- *
- * T10 Data Integrity Field CRC16 Crypto Transform using PCLMULQDQ Instructions
- *
- * Copyright (C) 2013 Intel Corporation
- * Author: Tim Chen <tim.c.chen@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/crc-t10dif.h>
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <asm/i387.h>
-#include <asm/cpufeature.h>
-#include <asm/cpu_device_id.h>
-
-asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf,
-				size_t len);
-
-struct chksum_desc_ctx {
-	__u16 crc;
-};
-
-/*
- * Steps through buffer one byte at at time, calculates reflected
- * crc using table.
- */
-
-static int chksum_init(struct shash_desc *desc)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	ctx->crc = 0;
-
-	return 0;
-}
-
-static int chksum_update(struct shash_desc *desc, const u8 *data,
-			 unsigned int length)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	if (irq_fpu_usable()) {
-		kernel_fpu_begin();
-		ctx->crc = crc_t10dif_pcl(ctx->crc, data, length);
-		kernel_fpu_end();
-	} else
-		ctx->crc = crc_t10dif_generic(ctx->crc, data, length);
-	return 0;
-}
-
-static int chksum_final(struct shash_desc *desc, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	*(__u16 *)out = ctx->crc;
-	return 0;
-}
-
-static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
-			u8 *out)
-{
-	if (irq_fpu_usable()) {
-		kernel_fpu_begin();
-		*(__u16 *)out = crc_t10dif_pcl(*crcp, data, len);
-		kernel_fpu_end();
-	} else
-		*(__u16 *)out = crc_t10dif_generic(*crcp, data, len);
-	return 0;
-}
-
-static int chksum_finup(struct shash_desc *desc, const u8 *data,
-			unsigned int len, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	return __chksum_finup(&ctx->crc, data, len, out);
-}
-
-static int chksum_digest(struct shash_desc *desc, const u8 *data,
-			 unsigned int length, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	return __chksum_finup(&ctx->crc, data, length, out);
-}
-
-static struct shash_alg alg = {
-	.digestsize		=	CRC_T10DIF_DIGEST_SIZE,
-	.init		=	chksum_init,
-	.update		=	chksum_update,
-	.final		=	chksum_final,
-	.finup		=	chksum_finup,
-	.digest		=	chksum_digest,
-	.descsize		=	sizeof(struct chksum_desc_ctx),
-	.base			=	{
-		.cra_name		=	"crct10dif",
-		.cra_driver_name	=	"crct10dif-pclmul",
-		.cra_priority		=	200,
-		.cra_blocksize		=	CRC_T10DIF_BLOCK_SIZE,
-		.cra_module		=	THIS_MODULE,
-	}
-};
-
-static const struct x86_cpu_id crct10dif_cpu_id[] = {
-	X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ),
-	{}
-};
-MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id);
-
-static int __init crct10dif_intel_mod_init(void)
-{
-	if (!x86_match_cpu(crct10dif_cpu_id))
-		return -ENODEV;
-
-	return crypto_register_shash(&alg);
-}
-
-static void __exit crct10dif_intel_mod_fini(void)
-{
-	crypto_unregister_shash(&alg);
-}
-
-module_init(crct10dif_intel_mod_init);
-module_exit(crct10dif_intel_mod_fini);
-
-MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
-MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ.");
-MODULE_LICENSE("GPL");
-
-MODULE_ALIAS("crct10dif");
-MODULE_ALIAS("crct10dif-pclmul");
diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h
index 653668d..4a8cb8d 100644
--- a/arch/x86/include/asm/bootparam_utils.h
+++ b/arch/x86/include/asm/bootparam_utils.h
@@ -35,9 +35,9 @@
 	 */
 	if (boot_params->sentinel) {
 		/* fields in boot_params are left uninitialized, clear them */
-		memset(&boot_params->olpc_ofw_header, 0,
+		memset(&boot_params->ext_ramdisk_image, 0,
 		       (char *)&boot_params->efi_info -
-			(char *)&boot_params->olpc_ofw_header);
+			(char *)&boot_params->ext_ramdisk_image);
 		memset(&boot_params->kbd_status, 0,
 		       (char *)&boot_params->hdr -
 		       (char *)&boot_params->kbd_status);
diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h
index 50e5c58..4c01917 100644
--- a/arch/x86/include/asm/microcode_amd.h
+++ b/arch/x86/include/asm/microcode_amd.h
@@ -59,7 +59,7 @@
 
 extern int __apply_microcode_amd(struct microcode_amd *mc_amd);
 extern int apply_microcode_amd(int cpu);
-extern enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size);
+extern enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size);
 
 #ifdef CONFIG_MICROCODE_AMD_EARLY
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index f2b489c..3bf2dd0 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -55,9 +55,53 @@
 #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
 #endif
 
+#ifdef CONFIG_MEM_SOFT_DIRTY
+
+/*
+ * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE, _PAGE_BIT_SOFT_DIRTY and
+ * _PAGE_BIT_PROTNONE are taken, split up the 28 bits of offset
+ * into this range.
+ */
+#define PTE_FILE_MAX_BITS	28
+#define PTE_FILE_SHIFT1		(_PAGE_BIT_PRESENT + 1)
+#define PTE_FILE_SHIFT2		(_PAGE_BIT_FILE + 1)
+#define PTE_FILE_SHIFT3		(_PAGE_BIT_PROTNONE + 1)
+#define PTE_FILE_SHIFT4		(_PAGE_BIT_SOFT_DIRTY + 1)
+#define PTE_FILE_BITS1		(PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1)
+#define PTE_FILE_BITS2		(PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1)
+#define PTE_FILE_BITS3		(PTE_FILE_SHIFT4 - PTE_FILE_SHIFT3 - 1)
+
+#define pte_to_pgoff(pte)						\
+	((((pte).pte_low >> (PTE_FILE_SHIFT1))				\
+	  & ((1U << PTE_FILE_BITS1) - 1)))				\
+	+ ((((pte).pte_low >> (PTE_FILE_SHIFT2))			\
+	    & ((1U << PTE_FILE_BITS2) - 1))				\
+	   << (PTE_FILE_BITS1))						\
+	+ ((((pte).pte_low >> (PTE_FILE_SHIFT3))			\
+	    & ((1U << PTE_FILE_BITS3) - 1))				\
+	   << (PTE_FILE_BITS1 + PTE_FILE_BITS2))			\
+	+ ((((pte).pte_low >> (PTE_FILE_SHIFT4)))			\
+	    << (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3))
+
+#define pgoff_to_pte(off)						\
+	((pte_t) { .pte_low =						\
+	 ((((off)) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1)	\
+	 + ((((off) >> PTE_FILE_BITS1)					\
+	     & ((1U << PTE_FILE_BITS2) - 1))				\
+	    << PTE_FILE_SHIFT2)						\
+	 + ((((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2))		\
+	     & ((1U << PTE_FILE_BITS3) - 1))				\
+	    << PTE_FILE_SHIFT3)						\
+	 + ((((off) >>							\
+	      (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3)))	\
+	    << PTE_FILE_SHIFT4)						\
+	 + _PAGE_FILE })
+
+#else /* CONFIG_MEM_SOFT_DIRTY */
+
 /*
  * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken,
- * split up the 29 bits of offset into this range:
+ * split up the 29 bits of offset into this range.
  */
 #define PTE_FILE_MAX_BITS	29
 #define PTE_FILE_SHIFT1		(_PAGE_BIT_PRESENT + 1)
@@ -88,6 +132,8 @@
 	    << PTE_FILE_SHIFT3)						\
 	 + _PAGE_FILE })
 
+#endif /* CONFIG_MEM_SOFT_DIRTY */
+
 /* Encode and de-code a swap entry */
 #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
 #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 4cc9f2b..81bb91b 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -179,6 +179,9 @@
 /*
  * Bits 0, 6 and 7 are taken in the low part of the pte,
  * put the 32 bits of offset into the high part.
+ *
+ * For soft-dirty tracking 11 bit is taken from
+ * the low part of pte as well.
  */
 #define pte_to_pgoff(pte) ((pte).pte_high)
 #define pgoff_to_pte(off)						\
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 7dc305a..1c00631 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -314,6 +314,36 @@
 	return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY);
 }
 
+static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
+{
+	return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY);
+}
+
+static inline int pte_swp_soft_dirty(pte_t pte)
+{
+	return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY;
+}
+
+static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
+{
+	return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY);
+}
+
+static inline pte_t pte_file_clear_soft_dirty(pte_t pte)
+{
+	return pte_clear_flags(pte, _PAGE_SOFT_DIRTY);
+}
+
+static inline pte_t pte_file_mksoft_dirty(pte_t pte)
+{
+	return pte_set_flags(pte, _PAGE_SOFT_DIRTY);
+}
+
+static inline int pte_file_soft_dirty(pte_t pte)
+{
+	return pte_flags(pte) & _PAGE_SOFT_DIRTY;
+}
+
 /*
  * Mask out unsupported bits in a present pgprot.  Non-present pgprots
  * can use those bits for other purposes, so leave them be.
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index c98ac63..f4843e0 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -61,12 +61,27 @@
  * they do not conflict with each other.
  */
 
+#define _PAGE_BIT_SOFT_DIRTY	_PAGE_BIT_HIDDEN
+
 #ifdef CONFIG_MEM_SOFT_DIRTY
-#define _PAGE_SOFT_DIRTY	(_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN)
+#define _PAGE_SOFT_DIRTY	(_AT(pteval_t, 1) << _PAGE_BIT_SOFT_DIRTY)
 #else
 #define _PAGE_SOFT_DIRTY	(_AT(pteval_t, 0))
 #endif
 
+/*
+ * Tracking soft dirty bit when a page goes to a swap is tricky.
+ * We need a bit which can be stored in pte _and_ not conflict
+ * with swap entry format. On x86 bits 6 and 7 are *not* involved
+ * into swap entry computation, but bit 6 is used for nonlinear
+ * file mapping, so we borrow bit 7 for soft dirty tracking.
+ */
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _PAGE_SWP_SOFT_DIRTY	_PAGE_PSE
+#else
+#define _PAGE_SWP_SOFT_DIRTY	(_AT(pteval_t, 0))
+#endif
+
 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
 #define _PAGE_NX	(_AT(pteval_t, 1) << _PAGE_BIT_NX)
 #else
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 33692ea..e3ddd7d 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -233,8 +233,4 @@
 #define arch_read_relax(lock)	cpu_relax()
 #define arch_write_relax(lock)	cpu_relax()
 
-/* The {read|write|spin}_lock() on x86 are full memory barriers. */
-static inline void smp_mb__after_lock(void) { }
-#define ARCH_HAS_SMP_MB_AFTER_LOCK
-
 #endif /* _ASM_X86_SPINLOCK_H */
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index f654ece..08a0890 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -512,7 +512,7 @@
 
 static const int amd_erratum_383[];
 static const int amd_erratum_400[];
-static bool cpu_has_amd_erratum(const int *erratum);
+static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum);
 
 static void init_amd(struct cpuinfo_x86 *c)
 {
@@ -729,11 +729,11 @@
 		value &= ~(1ULL << 24);
 		wrmsrl_safe(MSR_AMD64_BU_CFG2, value);
 
-		if (cpu_has_amd_erratum(amd_erratum_383))
+		if (cpu_has_amd_erratum(c, amd_erratum_383))
 			set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH);
 	}
 
-	if (cpu_has_amd_erratum(amd_erratum_400))
+	if (cpu_has_amd_erratum(c, amd_erratum_400))
 		set_cpu_bug(c, X86_BUG_AMD_APIC_C1E);
 
 	rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
@@ -878,23 +878,13 @@
 static const int amd_erratum_383[] =
 	AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf));
 
-static bool cpu_has_amd_erratum(const int *erratum)
+
+static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
 {
-	struct cpuinfo_x86 *cpu = __this_cpu_ptr(&cpu_info);
 	int osvw_id = *erratum++;
 	u32 range;
 	u32 ms;
 
-	/*
-	 * If called early enough that current_cpu_data hasn't been initialized
-	 * yet, fall back to boot_cpu_data.
-	 */
-	if (cpu->x86 == 0)
-		cpu = &boot_cpu_data;
-
-	if (cpu->x86_vendor != X86_VENDOR_AMD)
-		return false;
-
 	if (osvw_id >= 0 && osvw_id < 65536 &&
 	    cpu_has(cpu, X86_FEATURE_OSVW)) {
 		u64 osvw_len;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index e2703520..c370e1c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -111,8 +111,8 @@
 #ifdef	CONFIG_MEMORY_FAILURE
 	MCESEV(
 		KEEP, "Action required but unaffected thread is continuable",
-		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR),
-		MCGMASK(MCG_STATUS_RIPV, MCG_STATUS_RIPV)
+		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR, MCI_UC_SAR|MCI_ADDR),
+		MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, MCG_STATUS_RIPV)
 		),
 	MCESEV(
 		AR, "Action required: data load error in a user process",
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index fbc9210..a45d8d4 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2270,6 +2270,7 @@
 	case 70:
 	case 71:
 	case 63:
+	case 69:
 		x86_pmu.late_ack = true;
 		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index cad791d..1fb6c72 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -314,8 +314,8 @@
 static struct uncore_event_desc snbep_uncore_qpi_events[] = {
 	INTEL_UNCORE_EVENT_DESC(clockticks,       "event=0x14"),
 	INTEL_UNCORE_EVENT_DESC(txl_flits_active, "event=0x00,umask=0x06"),
-	INTEL_UNCORE_EVENT_DESC(drs_data,         "event=0x02,umask=0x08"),
-	INTEL_UNCORE_EVENT_DESC(ncb_data,         "event=0x03,umask=0x04"),
+	INTEL_UNCORE_EVENT_DESC(drs_data,         "event=0x102,umask=0x08"),
+	INTEL_UNCORE_EVENT_DESC(ncb_data,         "event=0x103,umask=0x04"),
 	{ /* end: all zeroes */ },
 };
 
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 94ab6b9..63bdb29 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -196,15 +196,23 @@
 static void __init intel_remapping_check(int num, int slot, int func)
 {
 	u8 revision;
+	u16 device;
 
+	device = read_pci_config_16(num, slot, func, PCI_DEVICE_ID);
 	revision = read_pci_config_byte(num, slot, func, PCI_REVISION_ID);
 
 	/*
-	 * Revision 0x13 of this chipset supports irq remapping
-	 * but has an erratum that breaks its behavior, flag it as such
+ 	 * Revision 13 of all triggering devices id in this quirk have
+	 * a problem draining interrupts when irq remapping is enabled,
+	 * and should be flagged as broken.  Additionally revisions 0x12
+	 * and 0x22 of device id 0x3405 has this problem.
 	 */
 	if (revision == 0x13)
 		set_irq_remapping_broken();
+	else if ((device == 0x3405) &&
+	    ((revision == 0x12) ||
+	     (revision == 0x22)))
+		set_irq_remapping_broken();
 
 }
 
@@ -239,6 +247,8 @@
 	  PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd },
 	{ PCI_VENDOR_ID_INTEL, 0x3403, PCI_CLASS_BRIDGE_HOST,
 	  PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check },
+	{ PCI_VENDOR_ID_INTEL, 0x3405, PCI_CLASS_BRIDGE_HOST,
+	  PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check },
 	{ PCI_VENDOR_ID_INTEL, 0x3406, PCI_CLASS_BRIDGE_HOST,
 	  PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check },
 	{}
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 202d24f..5d576ab 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -116,7 +116,7 @@
 
 	if (cpu_has_fxsr) {
 		memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct));
-		asm volatile("fxsave %0" : : "m" (fx_scratch));
+		asm volatile("fxsave %0" : "+m" (fx_scratch));
 		mask = fx_scratch.mxcsr_mask;
 		if (mask == 0)
 			mask = 0x0000ffbf;
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 47ebb1d..7123b5d 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -145,10 +145,9 @@
 	return 0;
 }
 
-static unsigned int verify_patch_size(int cpu, u32 patch_size,
+static unsigned int verify_patch_size(u8 family, u32 patch_size,
 				      unsigned int size)
 {
-	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	u32 max_size;
 
 #define F1XH_MPB_MAX_SIZE 2048
@@ -156,7 +155,7 @@
 #define F15H_MPB_MAX_SIZE 4096
 #define F16H_MPB_MAX_SIZE 3458
 
-	switch (c->x86) {
+	switch (family) {
 	case 0x14:
 		max_size = F14H_MPB_MAX_SIZE;
 		break;
@@ -220,12 +219,13 @@
 		return 0;
 	}
 
-	if (__apply_microcode_amd(mc_amd))
+	if (__apply_microcode_amd(mc_amd)) {
 		pr_err("CPU%d: update failed for patch_level=0x%08x\n",
 			cpu, mc_amd->hdr.patch_id);
-	else
-		pr_info("CPU%d: new patch_level=0x%08x\n", cpu,
-			mc_amd->hdr.patch_id);
+		return -1;
+	}
+	pr_info("CPU%d: new patch_level=0x%08x\n", cpu,
+		mc_amd->hdr.patch_id);
 
 	uci->cpu_sig.rev = mc_amd->hdr.patch_id;
 	c->microcode = mc_amd->hdr.patch_id;
@@ -276,9 +276,8 @@
  * driver cannot continue functioning normally. In such cases, we tear
  * down everything we've used up so far and exit.
  */
-static int verify_and_add_patch(unsigned int cpu, u8 *fw, unsigned int leftover)
+static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover)
 {
-	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	struct microcode_header_amd *mc_hdr;
 	struct ucode_patch *patch;
 	unsigned int patch_size, crnt_size, ret;
@@ -298,7 +297,7 @@
 
 	/* check if patch is for the current family */
 	proc_fam = ((proc_fam >> 8) & 0xf) + ((proc_fam >> 20) & 0xff);
-	if (proc_fam != c->x86)
+	if (proc_fam != family)
 		return crnt_size;
 
 	if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) {
@@ -307,7 +306,7 @@
 		return crnt_size;
 	}
 
-	ret = verify_patch_size(cpu, patch_size, leftover);
+	ret = verify_patch_size(family, patch_size, leftover);
 	if (!ret) {
 		pr_err("Patch-ID 0x%08x: size mismatch.\n", mc_hdr->patch_id);
 		return crnt_size;
@@ -338,7 +337,8 @@
 	return crnt_size;
 }
 
-static enum ucode_state __load_microcode_amd(int cpu, const u8 *data, size_t size)
+static enum ucode_state __load_microcode_amd(u8 family, const u8 *data,
+					     size_t size)
 {
 	enum ucode_state ret = UCODE_ERROR;
 	unsigned int leftover;
@@ -361,7 +361,7 @@
 	}
 
 	while (leftover) {
-		crnt_size = verify_and_add_patch(cpu, fw, leftover);
+		crnt_size = verify_and_add_patch(family, fw, leftover);
 		if (crnt_size < 0)
 			return ret;
 
@@ -372,22 +372,22 @@
 	return UCODE_OK;
 }
 
-enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size)
+enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size)
 {
 	enum ucode_state ret;
 
 	/* free old equiv table */
 	free_equiv_cpu_table();
 
-	ret = __load_microcode_amd(cpu, data, size);
+	ret = __load_microcode_amd(family, data, size);
 
 	if (ret != UCODE_OK)
 		cleanup();
 
 #if defined(CONFIG_MICROCODE_AMD_EARLY) && defined(CONFIG_X86_32)
 	/* save BSP's matching patch for early load */
-	if (cpu_data(cpu).cpu_index == boot_cpu_data.cpu_index) {
-		struct ucode_patch *p = find_patch(cpu);
+	if (cpu_data(smp_processor_id()).cpu_index == boot_cpu_data.cpu_index) {
+		struct ucode_patch *p = find_patch(smp_processor_id());
 		if (p) {
 			memset(amd_bsp_mpb, 0, MPB_MAX_SIZE);
 			memcpy(amd_bsp_mpb, p->data, min_t(u32, ksize(p->data),
@@ -440,7 +440,7 @@
 		goto fw_release;
 	}
 
-	ret = load_microcode_amd(cpu, fw->data, fw->size);
+	ret = load_microcode_amd(c->x86, fw->data, fw->size);
 
  fw_release:
 	release_firmware(fw);
diff --git a/arch/x86/kernel/microcode_amd_early.c b/arch/x86/kernel/microcode_amd_early.c
index 1d14ffe..6073104 100644
--- a/arch/x86/kernel/microcode_amd_early.c
+++ b/arch/x86/kernel/microcode_amd_early.c
@@ -238,25 +238,17 @@
 	uci->cpu_sig.sig = cpuid_eax(0x00000001);
 }
 #else
-static void collect_cpu_info_amd_early(struct cpuinfo_x86 *c,
-						 struct ucode_cpu_info *uci)
+void load_ucode_amd_ap(void)
 {
+	unsigned int cpu = smp_processor_id();
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 	u32 rev, eax;
 
 	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax);
 	eax = cpuid_eax(0x00000001);
 
-	uci->cpu_sig.sig = eax;
 	uci->cpu_sig.rev = rev;
-	c->microcode = rev;
-	c->x86 = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
-}
-
-void load_ucode_amd_ap(void)
-{
-	unsigned int cpu = smp_processor_id();
-
-	collect_cpu_info_amd_early(&cpu_data(cpu), ucode_cpu_info + cpu);
+	uci->cpu_sig.sig = eax;
 
 	if (cpu && !ucode_loaded) {
 		void *ucode;
@@ -265,8 +257,10 @@
 			return;
 
 		ucode = (void *)(initrd_start + ucode_offset);
-		if (load_microcode_amd(0, ucode, ucode_size) != UCODE_OK)
+		eax   = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
+		if (load_microcode_amd(eax, ucode, ucode_size) != UCODE_OK)
 			return;
+
 		ucode_loaded = true;
 	}
 
@@ -278,6 +272,8 @@
 {
 	enum ucode_state ret;
 	void *ucode;
+	u32 eax;
+
 #ifdef CONFIG_X86_32
 	unsigned int bsp = boot_cpu_data.cpu_index;
 	struct ucode_cpu_info *uci = ucode_cpu_info + bsp;
@@ -293,7 +289,10 @@
 		return 0;
 
 	ucode = (void *)(initrd_start + ucode_offset);
-	ret = load_microcode_amd(0, ucode, ucode_size);
+	eax   = cpuid_eax(0x00000001);
+	eax   = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff);
+
+	ret = load_microcode_amd(eax, ucode, ucode_size);
 	if (ret != UCODE_OK)
 		return -EINVAL;
 
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index dbded5a..30277e2 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -101,7 +101,7 @@
 				*begin = new_begin;
 		}
 	} else {
-		*begin = TASK_UNMAPPED_BASE;
+		*begin = current->mm->mmap_legacy_base;
 		*end = TASK_SIZE;
 	}
 }
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 62c29a5..25e7e13 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -112,11 +112,13 @@
  */
 void arch_pick_mmap_layout(struct mm_struct *mm)
 {
+	mm->mmap_legacy_base = mmap_legacy_base();
+	mm->mmap_base = mmap_base();
+
 	if (mmap_is_legacy()) {
-		mm->mmap_base = mmap_legacy_base();
+		mm->mmap_base = mm->mmap_legacy_base;
 		mm->get_unmapped_area = arch_get_unmapped_area;
 	} else {
-		mm->mmap_base = mmap_base();
 		mm->get_unmapped_area = arch_get_unmapped_area_topdown;
 	}
 }
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
index 643b8b5e..8244f5e 100644
--- a/arch/x86/platform/ce4100/ce4100.c
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -12,6 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/irq.h>
 #include <linux/module.h>
+#include <linux/reboot.h>
 #include <linux/serial_reg.h>
 #include <linux/serial_8250.h>
 #include <linux/reboot.h>
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 056d11f..8f3eea6 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -313,6 +313,17 @@
 	e820_add_region(start, end - start, type);
 }
 
+void xen_ignore_unusable(struct e820entry *list, size_t map_size)
+{
+	struct e820entry *entry;
+	unsigned int i;
+
+	for (i = 0, entry = list; i < map_size; i++, entry++) {
+		if (entry->type == E820_UNUSABLE)
+			entry->type = E820_RAM;
+	}
+}
+
 /**
  * machine_specific_memory_setup - Hook for machine specific memory setup.
  **/
@@ -353,6 +364,17 @@
 	}
 	BUG_ON(rc);
 
+	/*
+	 * Xen won't allow a 1:1 mapping to be created to UNUSABLE
+	 * regions, so if we're using the machine memory map leave the
+	 * region as RAM as it is in the pseudo-physical map.
+	 *
+	 * UNUSABLE regions in domUs are not handled and will need
+	 * a patch in the future.
+	 */
+	if (xen_initial_domain())
+		xen_ignore_unusable(map, memmap.nr_entries);
+
 	/* Make sure the Xen-supplied memory map is well-ordered. */
 	sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries);
 
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index ca92754..b81c88e 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -694,8 +694,15 @@
 static int xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle)
 {
 	int rc;
-	rc = native_cpu_up(cpu, tidle);
-	WARN_ON (xen_smp_intr_init(cpu));
+	/*
+	 * xen_smp_intr_init() needs to run before native_cpu_up()
+	 * so that IPI vectors are set up on the booting CPU before
+	 * it is marked online in native_cpu_up().
+	*/
+	rc = xen_smp_intr_init(cpu);
+	WARN_ON(rc);
+	if (!rc)
+		rc =  native_cpu_up(cpu, tidle);
 	return rc;
 }
 
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 69ce573..aca0116 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -376,25 +376,6 @@
 	  which will enable any routine to use the CRC-32-IEEE 802.3 checksum
 	  and gain better performance as compared with the table implementation.
 
-config CRYPTO_CRCT10DIF
-	tristate "CRCT10DIF algorithm"
-	select CRYPTO_HASH
-	help
-	  CRC T10 Data Integrity Field computation is being cast as
-	  a crypto transform.  This allows for faster crc t10 diff
-	  transforms to be used if they are available.
-
-config CRYPTO_CRCT10DIF_PCLMUL
-	tristate "CRCT10DIF PCLMULQDQ hardware acceleration"
-	depends on X86 && 64BIT && CRC_T10DIF
-	select CRYPTO_HASH
-	help
-	  For x86_64 processors with SSE4.2 and PCLMULQDQ supported,
-	  CRC T10 DIF PCLMULQDQ computation can be hardware
-	  accelerated PCLMULQDQ instruction. This option will create
-	  'crct10dif-plcmul' module, which is faster when computing the
-	  crct10dif checksum as compared with the generic table implementation.
-
 config CRYPTO_GHASH
 	tristate "GHASH digest algorithm"
 	select CRYPTO_GF128MUL
diff --git a/crypto/Makefile b/crypto/Makefile
index 2d5ed08..2ba0df2 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -83,7 +83,6 @@
 obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
 obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o
 obj-$(CONFIG_CRYPTO_CRC32) += crc32.o
-obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif.o
 obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
 obj-$(CONFIG_CRYPTO_LZO) += lzo.o
 obj-$(CONFIG_CRYPTO_LZ4) += lz4.o
diff --git a/crypto/crct10dif.c b/crypto/crct10dif.c
deleted file mode 100644
index 92aca96..0000000
--- a/crypto/crct10dif.c
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Cryptographic API.
- *
- * T10 Data Integrity Field CRC16 Crypto Transform
- *
- * Copyright (c) 2007 Oracle Corporation.  All rights reserved.
- * Written by Martin K. Petersen <martin.petersen@oracle.com>
- * Copyright (C) 2013 Intel Corporation
- * Author: Tim Chen <tim.c.chen@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/crc-t10dif.h>
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-
-struct chksum_desc_ctx {
-	__u16 crc;
-};
-
-/* Table generated using the following polynomium:
- * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1
- * gt: 0x8bb7
- */
-static const __u16 t10_dif_crc_table[256] = {
-	0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
-	0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
-	0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
-	0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
-	0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
-	0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
-	0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
-	0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
-	0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
-	0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
-	0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
-	0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
-	0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
-	0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
-	0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
-	0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
-	0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
-	0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
-	0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
-	0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
-	0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
-	0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
-	0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
-	0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
-	0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
-	0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
-	0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
-	0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
-	0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
-	0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
-	0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
-	0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
-};
-
-__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len)
-{
-	unsigned int i;
-
-	for (i = 0 ; i < len ; i++)
-		crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff];
-
-	return crc;
-}
-EXPORT_SYMBOL(crc_t10dif_generic);
-
-/*
- * Steps through buffer one byte at at time, calculates reflected
- * crc using table.
- */
-
-static int chksum_init(struct shash_desc *desc)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	ctx->crc = 0;
-
-	return 0;
-}
-
-static int chksum_update(struct shash_desc *desc, const u8 *data,
-			 unsigned int length)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	ctx->crc = crc_t10dif_generic(ctx->crc, data, length);
-	return 0;
-}
-
-static int chksum_final(struct shash_desc *desc, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	*(__u16 *)out = ctx->crc;
-	return 0;
-}
-
-static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
-			u8 *out)
-{
-	*(__u16 *)out = crc_t10dif_generic(*crcp, data, len);
-	return 0;
-}
-
-static int chksum_finup(struct shash_desc *desc, const u8 *data,
-			unsigned int len, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	return __chksum_finup(&ctx->crc, data, len, out);
-}
-
-static int chksum_digest(struct shash_desc *desc, const u8 *data,
-			 unsigned int length, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	return __chksum_finup(&ctx->crc, data, length, out);
-}
-
-static struct shash_alg alg = {
-	.digestsize		=	CRC_T10DIF_DIGEST_SIZE,
-	.init		=	chksum_init,
-	.update		=	chksum_update,
-	.final		=	chksum_final,
-	.finup		=	chksum_finup,
-	.digest		=	chksum_digest,
-	.descsize		=	sizeof(struct chksum_desc_ctx),
-	.base			=	{
-		.cra_name		=	"crct10dif",
-		.cra_driver_name	=	"crct10dif-generic",
-		.cra_priority		=	100,
-		.cra_blocksize		=	CRC_T10DIF_BLOCK_SIZE,
-		.cra_module		=	THIS_MODULE,
-	}
-};
-
-static int __init crct10dif_mod_init(void)
-{
-	int ret;
-
-	ret = crypto_register_shash(&alg);
-	return ret;
-}
-
-static void __exit crct10dif_mod_fini(void)
-{
-	crypto_unregister_shash(&alg);
-}
-
-module_init(crct10dif_mod_init);
-module_exit(crct10dif_mod_fini);
-
-MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
-MODULE_DESCRIPTION("T10 DIF CRC calculation.");
-MODULE_LICENSE("GPL");
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 25a5934..66d254c 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -1174,10 +1174,6 @@
 		ret += tcrypt_test("ghash");
 		break;
 
-	case 47:
-		ret += tcrypt_test("crct10dif");
-		break;
-
 	case 100:
 		ret += tcrypt_test("hmac(md5)");
 		break;
@@ -1502,10 +1498,6 @@
 		test_hash_speed("crc32c", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 
-	case 320:
-		test_hash_speed("crct10dif", sec, generic_hash_speed_template);
-		if (mode > 300 && mode < 400) break;
-
 	case 399:
 		break;
 
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 2f00607..ecddf92 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -2046,16 +2046,6 @@
 			}
 		}
 	}, {
-		.alg = "crct10dif",
-		.test = alg_test_hash,
-		.fips_allowed = 1,
-		.suite = {
-			.hash = {
-				.vecs = crct10dif_tv_template,
-				.count = CRCT10DIF_TEST_VECTORS
-			}
-		}
-	}, {
 		.alg = "cryptd(__driver-cbc-aes-aesni)",
 		.test = alg_test_null,
 		.fips_allowed = 1,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 7d44aa3..1e701bc 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -450,39 +450,6 @@
 	}
 };
 
-#define CRCT10DIF_TEST_VECTORS	3
-static struct hash_testvec crct10dif_tv_template[] = {
-	{
-		.plaintext = "abc",
-		.psize  = 3,
-#ifdef __LITTLE_ENDIAN
-		.digest = "\x3b\x44",
-#else
-		.digest = "\x44\x3b",
-#endif
-	}, {
-		.plaintext = "1234567890123456789012345678901234567890"
-			     "123456789012345678901234567890123456789",
-		.psize	= 79,
-#ifdef __LITTLE_ENDIAN
-		.digest	= "\x70\x4b",
-#else
-		.digest	= "\x4b\x70",
-#endif
-	}, {
-		.plaintext =
-		"abcddddddddddddddddddddddddddddddddddddddddddddddddddddd",
-		.psize  = 56,
-#ifdef __LITTLE_ENDIAN
-		.digest = "\xe3\x9c",
-#else
-		.digest = "\x9c\xe3",
-#endif
-		.np     = 2,
-		.tap    = { 28, 28 }
-	}
-};
-
 /*
  * SHA1 test vectors  from from FIPS PUB 180-1
  * Long vector from CAVS 5.0
diff --git a/drivers/accessibility/braille/braille_console.c b/drivers/accessibility/braille/braille_console.c
index d21167b..dc34a5b 100644
--- a/drivers/accessibility/braille/braille_console.c
+++ b/drivers/accessibility/braille/braille_console.c
@@ -359,6 +359,9 @@
 		char *console_options, char *braille_options)
 {
 	int ret;
+
+	if (!(console->flags & CON_BRL))
+		return 0;
 	if (!console_options)
 		/* Only support VisioBraille for now */
 		console_options = "57600o8";
@@ -374,15 +377,17 @@
 	braille_co = console;
 	register_keyboard_notifier(&keyboard_notifier_block);
 	register_vt_notifier(&vt_notifier_block);
-	return 0;
+	return 1;
 }
 
 int braille_unregister_console(struct console *console)
 {
 	if (braille_co != console)
 		return -EINVAL;
+	if (!(console->flags & CON_BRL))
+		return 0;
 	unregister_keyboard_notifier(&keyboard_notifier_block);
 	unregister_vt_notifier(&vt_notifier_block);
 	braille_co = NULL;
-	return 0;
+	return 1;
 }
diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index fd6c51c..5a74a9c 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -451,7 +451,6 @@
 	/* Clean up. */
 	per_cpu(processor_device_array, pr->id) = NULL;
 	per_cpu(processors, pr->id) = NULL;
-	try_offline_node(cpu_to_node(pr->id));
 
 	/* Remove the CPU. */
 	get_online_cpus();
@@ -459,6 +458,8 @@
 	acpi_unmap_lsapic(pr->id);
 	put_online_cpus();
 
+	try_offline_node(cpu_to_node(pr->id));
+
  out:
 	free_cpumask_var(pr->throttling.shared_cpu_map);
 	kfree(pr);
diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 082b4dd..d405fba 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -117,6 +117,7 @@
 	struct acpi_device *device;
 	struct notifier_block pm_nb;
 	unsigned long update_time;
+	int revision;
 	int rate_now;
 	int capacity_now;
 	int voltage_now;
@@ -359,6 +360,7 @@
 };
 
 static struct acpi_offsets extended_info_offsets[] = {
+	{offsetof(struct acpi_battery, revision), 0},
 	{offsetof(struct acpi_battery, power_unit), 0},
 	{offsetof(struct acpi_battery, design_capacity), 0},
 	{offsetof(struct acpi_battery, full_charge_capacity), 0},
diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index f680957..408f6b2 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -31,6 +31,7 @@
 static DECLARE_RWSEM(bus_type_sem);
 
 #define PHYSICAL_NODE_STRING "physical_node"
+#define PHYSICAL_NODE_NAME_SIZE (sizeof(PHYSICAL_NODE_STRING) + 10)
 
 int register_acpi_bus_type(struct acpi_bus_type *type)
 {
@@ -78,41 +79,108 @@
 	return ret;
 }
 
-static acpi_status do_acpi_find_child(acpi_handle handle, u32 lvl_not_used,
-				      void *addr_p, void **ret_p)
+static acpi_status acpi_dev_present(acpi_handle handle, u32 lvl_not_used,
+				  void *not_used, void **ret_p)
 {
-	unsigned long long addr, sta;
-	acpi_status status;
+	struct acpi_device *adev = NULL;
 
-	status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, &addr);
-	if (ACPI_SUCCESS(status) && addr == *((u64 *)addr_p)) {
+	acpi_bus_get_device(handle, &adev);
+	if (adev) {
 		*ret_p = handle;
-		status = acpi_bus_get_status_handle(handle, &sta);
-		if (ACPI_SUCCESS(status) && (sta & ACPI_STA_DEVICE_ENABLED))
-			return AE_CTRL_TERMINATE;
+		return AE_CTRL_TERMINATE;
 	}
 	return AE_OK;
 }
 
-acpi_handle acpi_get_child(acpi_handle parent, u64 address)
+static bool acpi_extra_checks_passed(acpi_handle handle, bool is_bridge)
 {
-	void *ret = NULL;
+	unsigned long long sta;
+	acpi_status status;
 
-	if (!parent)
-		return NULL;
+	status = acpi_bus_get_status_handle(handle, &sta);
+	if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_ENABLED))
+		return false;
 
-	acpi_walk_namespace(ACPI_TYPE_DEVICE, parent, 1, NULL,
-			    do_acpi_find_child, &address, &ret);
-	return (acpi_handle)ret;
+	if (is_bridge) {
+		void *test = NULL;
+
+		/* Check if this object has at least one child device. */
+		acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1,
+				    acpi_dev_present, NULL, NULL, &test);
+		return !!test;
+	}
+	return true;
 }
-EXPORT_SYMBOL(acpi_get_child);
+
+struct find_child_context {
+	u64 addr;
+	bool is_bridge;
+	acpi_handle ret;
+	bool ret_checked;
+};
+
+static acpi_status do_find_child(acpi_handle handle, u32 lvl_not_used,
+				 void *data, void **not_used)
+{
+	struct find_child_context *context = data;
+	unsigned long long addr;
+	acpi_status status;
+
+	status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, &addr);
+	if (ACPI_FAILURE(status) || addr != context->addr)
+		return AE_OK;
+
+	if (!context->ret) {
+		/* This is the first matching object.  Save its handle. */
+		context->ret = handle;
+		return AE_OK;
+	}
+	/*
+	 * There is more than one matching object with the same _ADR value.
+	 * That really is unexpected, so we are kind of beyond the scope of the
+	 * spec here.  We have to choose which one to return, though.
+	 *
+	 * First, check if the previously found object is good enough and return
+	 * its handle if so.  Second, check the same for the object that we've
+	 * just found.
+	 */
+	if (!context->ret_checked) {
+		if (acpi_extra_checks_passed(context->ret, context->is_bridge))
+			return AE_CTRL_TERMINATE;
+		else
+			context->ret_checked = true;
+	}
+	if (acpi_extra_checks_passed(handle, context->is_bridge)) {
+		context->ret = handle;
+		return AE_CTRL_TERMINATE;
+	}
+	return AE_OK;
+}
+
+acpi_handle acpi_find_child(acpi_handle parent, u64 addr, bool is_bridge)
+{
+	if (parent) {
+		struct find_child_context context = {
+			.addr = addr,
+			.is_bridge = is_bridge,
+		};
+
+		acpi_walk_namespace(ACPI_TYPE_DEVICE, parent, 1, do_find_child,
+				    NULL, &context, NULL);
+		return context.ret;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(acpi_find_child);
 
 int acpi_bind_one(struct device *dev, acpi_handle handle)
 {
 	struct acpi_device *acpi_dev;
 	acpi_status status;
 	struct acpi_device_physical_node *physical_node, *pn;
-	char physical_node_name[sizeof(PHYSICAL_NODE_STRING) + 2];
+	char physical_node_name[PHYSICAL_NODE_NAME_SIZE];
+	struct list_head *physnode_list;
+	unsigned int node_id;
 	int retval = -EINVAL;
 
 	if (ACPI_HANDLE(dev)) {
@@ -139,25 +207,27 @@
 
 	mutex_lock(&acpi_dev->physical_node_lock);
 
-	/* Sanity check. */
-	list_for_each_entry(pn, &acpi_dev->physical_node_list, node)
+	/*
+	 * Keep the list sorted by node_id so that the IDs of removed nodes can
+	 * be recycled easily.
+	 */
+	physnode_list = &acpi_dev->physical_node_list;
+	node_id = 0;
+	list_for_each_entry(pn, &acpi_dev->physical_node_list, node) {
+		/* Sanity check. */
 		if (pn->dev == dev) {
 			dev_warn(dev, "Already associated with ACPI node\n");
 			goto err_free;
 		}
-
-	/* allocate physical node id according to physical_node_id_bitmap */
-	physical_node->node_id =
-		find_first_zero_bit(acpi_dev->physical_node_id_bitmap,
-		ACPI_MAX_PHYSICAL_NODE);
-	if (physical_node->node_id >= ACPI_MAX_PHYSICAL_NODE) {
-		retval = -ENOSPC;
-		goto err_free;
+		if (pn->node_id == node_id) {
+			physnode_list = &pn->node;
+			node_id++;
+		}
 	}
 
-	set_bit(physical_node->node_id, acpi_dev->physical_node_id_bitmap);
+	physical_node->node_id = node_id;
 	physical_node->dev = dev;
-	list_add_tail(&physical_node->node, &acpi_dev->physical_node_list);
+	list_add(&physical_node->node, physnode_list);
 	acpi_dev->physical_node_count++;
 
 	mutex_unlock(&acpi_dev->physical_node_lock);
@@ -208,7 +278,7 @@
 
 	mutex_lock(&acpi_dev->physical_node_lock);
 	list_for_each_safe(node, next, &acpi_dev->physical_node_list) {
-		char physical_node_name[sizeof(PHYSICAL_NODE_STRING) + 2];
+		char physical_node_name[PHYSICAL_NODE_NAME_SIZE];
 
 		entry = list_entry(node, struct acpi_device_physical_node,
 			node);
@@ -216,7 +286,6 @@
 			continue;
 
 		list_del(node);
-		clear_bit(entry->node_id, acpi_dev->physical_node_id_bitmap);
 
 		acpi_dev->physical_node_count--;
 
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 227aca7..5da44e8 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -169,10 +169,8 @@
   -------------------------------------------------------------------------- */
 #if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE)
 bool acpi_video_backlight_quirks(void);
-bool acpi_video_verify_backlight_support(void);
 #else
 static inline bool acpi_video_backlight_quirks(void) { return false; }
-static inline bool acpi_video_verify_backlight_support(void) { return false; }
 #endif
 
 #endif /* _ACPI_INTERNAL_H_ */
diff --git a/drivers/acpi/proc.c b/drivers/acpi/proc.c
index aa1227a..04a1378 100644
--- a/drivers/acpi/proc.c
+++ b/drivers/acpi/proc.c
@@ -311,6 +311,8 @@
 			   dev->pnp.bus_id,
 			   (u32) dev->wakeup.sleep_state);
 
+		mutex_lock(&dev->physical_node_lock);
+
 		if (!dev->physical_node_count) {
 			seq_printf(seq, "%c%-8s\n",
 				dev->wakeup.flags.run_wake ? '*' : ' ',
@@ -338,6 +340,8 @@
 				put_device(ldev);
 			}
 		}
+
+		mutex_unlock(&dev->physical_node_lock);
 	}
 	mutex_unlock(&acpi_device_lock);
 	return 0;
@@ -347,12 +351,16 @@
 {
 	struct acpi_device_physical_node *entry;
 
+	mutex_lock(&adev->physical_node_lock);
+
 	list_for_each_entry(entry,
 		&adev->physical_node_list, node)
 		if (entry->dev && device_can_wakeup(entry->dev)) {
 			bool enable = !device_may_wakeup(entry->dev);
 			device_set_wakeup_enable(entry->dev, enable);
 		}
+
+	mutex_unlock(&adev->physical_node_lock);
 }
 
 static ssize_t
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 6dd237e..3270d3c 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -689,7 +689,7 @@
 	 * Some systems always report current brightness level as maximum
 	 * through _BQC, we need to test another value for them.
 	 */
-	test_level = current_level == max_level ? br->levels[2] : max_level;
+	test_level = current_level == max_level ? br->levels[3] : max_level;
 
 	result = acpi_video_device_lcd_set_level(device, test_level);
 	if (result)
@@ -908,10 +908,7 @@
 		device->cap._DDC = 1;
 	}
 
-	if (acpi_video_init_brightness(device))
-		return;
-
-	if (acpi_video_verify_backlight_support()) {
+	if (acpi_video_backlight_support()) {
 		struct backlight_properties props;
 		struct pci_dev *pdev;
 		acpi_handle acpi_parent;
@@ -920,6 +917,9 @@
 		static int count = 0;
 		char *name;
 
+		result = acpi_video_init_brightness(device);
+		if (result)
+			return;
 		name = kasprintf(GFP_KERNEL, "acpi_video%d", count);
 		if (!name)
 			return;
@@ -979,11 +979,6 @@
 		if (result)
 			printk(KERN_ERR PREFIX "Create sysfs link\n");
 
-	} else {
-		/* Remove the brightness object. */
-		kfree(device->brightness->levels);
-		kfree(device->brightness);
-		device->brightness = NULL;
 	}
 }
 
@@ -1366,8 +1361,8 @@
 	unsigned long long level_current, level_next;
 	int result = -EINVAL;
 
-	/* no warning message if acpi_backlight=vendor or a quirk is used */
-	if (!acpi_video_verify_backlight_support())
+	/* no warning message if acpi_backlight=vendor is used */
+	if (!acpi_video_backlight_support())
 		return 0;
 
 	if (!device->brightness)
@@ -1875,46 +1870,6 @@
 	return 0;
 }
 
-static acpi_status video_unregister_backlight(acpi_handle handle, u32 lvl,
-					      void *context, void **rv)
-{
-	struct acpi_device *acpi_dev;
-	struct acpi_video_bus *video;
-	struct acpi_video_device *dev, *next;
-
-	if (acpi_bus_get_device(handle, &acpi_dev))
-		return AE_OK;
-
-	if (acpi_match_device_ids(acpi_dev, video_device_ids))
-		return AE_OK;
-
-	video = acpi_driver_data(acpi_dev);
-	if (!video)
-		return AE_OK;
-
-	acpi_video_bus_stop_devices(video);
-	mutex_lock(&video->device_list_lock);
-	list_for_each_entry_safe(dev, next, &video->video_device_list, entry) {
-		if (dev->backlight) {
-			backlight_device_unregister(dev->backlight);
-			dev->backlight = NULL;
-			kfree(dev->brightness->levels);
-			kfree(dev->brightness);
-		}
-		if (dev->cooling_dev) {
-			sysfs_remove_link(&dev->dev->dev.kobj,
-					  "thermal_cooling");
-			sysfs_remove_link(&dev->cooling_dev->device.kobj,
-					  "device");
-			thermal_cooling_device_unregister(dev->cooling_dev);
-			dev->cooling_dev = NULL;
-		}
-	}
-	mutex_unlock(&video->device_list_lock);
-	acpi_video_bus_start_devices(video);
-	return AE_OK;
-}
-
 static int __init is_i740(struct pci_dev *dev)
 {
 	if (dev->device == 0x00D1)
@@ -1946,25 +1901,14 @@
 	return opregion;
 }
 
-int __acpi_video_register(bool backlight_quirks)
+int acpi_video_register(void)
 {
-	bool no_backlight;
-	int result;
-
-	no_backlight = backlight_quirks ? acpi_video_backlight_quirks() : false;
-
+	int result = 0;
 	if (register_count) {
 		/*
-		 * If acpi_video_register() has been called already, don't try
-		 * to register acpi_video_bus, but unregister backlight devices
-		 * if no backlight support is requested.
+		 * if the function of acpi_video_register is already called,
+		 * don't register the acpi_vide_bus again and return no error.
 		 */
-		if (no_backlight)
-			acpi_walk_namespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT,
-					    ACPI_UINT32_MAX,
-					    video_unregister_backlight,
-					    NULL, NULL, NULL);
-
 		return 0;
 	}
 
@@ -1980,7 +1924,7 @@
 
 	return 0;
 }
-EXPORT_SYMBOL(__acpi_video_register);
+EXPORT_SYMBOL(acpi_video_register);
 
 void acpi_video_unregister(void)
 {
diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index 826e52d..c339774 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -238,12 +238,7 @@
 
 bool acpi_video_backlight_quirks(void)
 {
-	if (acpi_gbl_osi_data >= ACPI_OSI_WIN_8) {
-		acpi_video_caps_check();
-		acpi_video_support |= ACPI_VIDEO_SKIP_BACKLIGHT;
-		return true;
-	}
-	return false;
+	return acpi_gbl_osi_data >= ACPI_OSI_WIN_8;
 }
 EXPORT_SYMBOL(acpi_video_backlight_quirks);
 
@@ -291,14 +286,6 @@
 }
 EXPORT_SYMBOL(acpi_video_backlight_support);
 
-/* For the ACPI video driver use only. */
-bool acpi_video_verify_backlight_support(void)
-{
-	return (acpi_video_support & ACPI_VIDEO_SKIP_BACKLIGHT) ?
-		false : acpi_video_backlight_support();
-}
-EXPORT_SYMBOL(acpi_video_verify_backlight_support);
-
 /*
  * Use acpi_backlight=vendor/video to force that backlight switching
  * is processed by vendor specific acpi drivers or video.ko driver.
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 80dc988..4e73772 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -97,6 +97,15 @@
 
 	  If unsure, say N.
 
+config AHCI_IMX
+	tristate "Freescale i.MX AHCI SATA support"
+	depends on SATA_AHCI_PLATFORM && MFD_SYSCON
+	help
+	  This option enables support for the Freescale i.MX SoC's
+	  onboard AHCI SATA.
+
+	  If unsure, say N.
+
 config SATA_FSL
 	tristate "Freescale 3.0Gbps SATA support"
 	depends on FSL_SOC
@@ -107,7 +116,7 @@
 	  If unsure, say N.
 
 config SATA_INIC162X
-	tristate "Initio 162x SATA support"
+	tristate "Initio 162x SATA support (Very Experimental)"
 	depends on PCI
 	help
 	  This option enables support for Initio 162x Serial ATA.
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index c04d0fd..46518c6 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -10,6 +10,7 @@
 obj-$(CONFIG_SATA_SIL24)	+= sata_sil24.o
 obj-$(CONFIG_SATA_DWC)		+= sata_dwc_460ex.o
 obj-$(CONFIG_SATA_HIGHBANK)	+= sata_highbank.o libahci.o
+obj-$(CONFIG_AHCI_IMX)		+= ahci_imx.o
 
 # SFF w/ custom DMA
 obj-$(CONFIG_PDC_ADMA)		+= pdc_adma.o
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 5064f3e..db4380d 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1146,11 +1146,18 @@
 		return rc;
 
 	for (i = 0; i < host->n_ports; i++) {
+		const char* desc;
 		struct ahci_port_priv *pp = host->ports[i]->private_data;
 
+		/* pp is NULL for dummy ports */
+		if (pp)
+			desc = pp->irq_desc;
+		else
+			desc = dev_driver_string(host->dev);
+
 		rc = devm_request_threaded_irq(host->dev,
 			irq + i, ahci_hw_interrupt, ahci_thread_fn, IRQF_SHARED,
-			pp->irq_desc, host->ports[i]);
+			desc, host->ports[i]);
 		if (rc)
 			goto out_free_irqs;
 	}
diff --git a/drivers/ata/ahci_imx.c b/drivers/ata/ahci_imx.c
new file mode 100644
index 0000000..58debb0
--- /dev/null
+++ b/drivers/ata/ahci_imx.c
@@ -0,0 +1,236 @@
+/*
+ * Freescale IMX AHCI SATA platform driver
+ * Copyright 2013 Freescale Semiconductor, Inc.
+ *
+ * based on the AHCI SATA platform driver by Jeff Garzik and Anton Vorontsov
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/ahci_platform.h>
+#include <linux/of_device.h>
+#include <linux/mfd/syscon.h>
+#include <linux/mfd/syscon/imx6q-iomuxc-gpr.h>
+#include "ahci.h"
+
+enum {
+	HOST_TIMER1MS = 0xe0, /* Timer 1-ms */
+};
+
+struct imx_ahci_priv {
+	struct platform_device *ahci_pdev;
+	struct clk *sata_ref_clk;
+	struct clk *ahb_clk;
+	struct regmap *gpr;
+};
+
+static int imx6q_sata_init(struct device *dev, void __iomem *mmio)
+{
+	int ret = 0;
+	unsigned int reg_val;
+	struct imx_ahci_priv *imxpriv = dev_get_drvdata(dev->parent);
+
+	imxpriv->gpr =
+		syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr");
+	if (IS_ERR(imxpriv->gpr)) {
+		dev_err(dev, "failed to find fsl,imx6q-iomux-gpr regmap\n");
+		return PTR_ERR(imxpriv->gpr);
+	}
+
+	ret = clk_prepare_enable(imxpriv->sata_ref_clk);
+	if (ret < 0) {
+		dev_err(dev, "prepare-enable sata_ref clock err:%d\n", ret);
+		return ret;
+	}
+
+	/*
+	 * set PHY Paremeters, two steps to configure the GPR13,
+	 * one write for rest of parameters, mask of first write
+	 * is 0x07fffffd, and the other one write for setting
+	 * the mpll_clk_en.
+	 */
+	regmap_update_bits(imxpriv->gpr, 0x34, IMX6Q_GPR13_SATA_RX_EQ_VAL_MASK
+			| IMX6Q_GPR13_SATA_RX_LOS_LVL_MASK
+			| IMX6Q_GPR13_SATA_RX_DPLL_MODE_MASK
+			| IMX6Q_GPR13_SATA_SPD_MODE_MASK
+			| IMX6Q_GPR13_SATA_MPLL_SS_EN
+			| IMX6Q_GPR13_SATA_TX_ATTEN_MASK
+			| IMX6Q_GPR13_SATA_TX_BOOST_MASK
+			| IMX6Q_GPR13_SATA_TX_LVL_MASK
+			| IMX6Q_GPR13_SATA_TX_EDGE_RATE
+			, IMX6Q_GPR13_SATA_RX_EQ_VAL_3_0_DB
+			| IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2M
+			| IMX6Q_GPR13_SATA_RX_DPLL_MODE_2P_4F
+			| IMX6Q_GPR13_SATA_SPD_MODE_3P0G
+			| IMX6Q_GPR13_SATA_MPLL_SS_EN
+			| IMX6Q_GPR13_SATA_TX_ATTEN_9_16
+			| IMX6Q_GPR13_SATA_TX_BOOST_3_33_DB
+			| IMX6Q_GPR13_SATA_TX_LVL_1_025_V);
+	regmap_update_bits(imxpriv->gpr, 0x34, IMX6Q_GPR13_SATA_MPLL_CLK_EN,
+			IMX6Q_GPR13_SATA_MPLL_CLK_EN);
+	usleep_range(100, 200);
+
+	/*
+	 * Configure the HWINIT bits of the HOST_CAP and HOST_PORTS_IMPL,
+	 * and IP vendor specific register HOST_TIMER1MS.
+	 * Configure CAP_SSS (support stagered spin up).
+	 * Implement the port0.
+	 * Get the ahb clock rate, and configure the TIMER1MS register.
+	 */
+	reg_val = readl(mmio + HOST_CAP);
+	if (!(reg_val & HOST_CAP_SSS)) {
+		reg_val |= HOST_CAP_SSS;
+		writel(reg_val, mmio + HOST_CAP);
+	}
+	reg_val = readl(mmio + HOST_PORTS_IMPL);
+	if (!(reg_val & 0x1)) {
+		reg_val |= 0x1;
+		writel(reg_val, mmio + HOST_PORTS_IMPL);
+	}
+
+	reg_val = clk_get_rate(imxpriv->ahb_clk) / 1000;
+	writel(reg_val, mmio + HOST_TIMER1MS);
+
+	return 0;
+}
+
+static void imx6q_sata_exit(struct device *dev)
+{
+	struct imx_ahci_priv *imxpriv =  dev_get_drvdata(dev->parent);
+
+	regmap_update_bits(imxpriv->gpr, 0x34, IMX6Q_GPR13_SATA_MPLL_CLK_EN,
+			!IMX6Q_GPR13_SATA_MPLL_CLK_EN);
+	clk_disable_unprepare(imxpriv->sata_ref_clk);
+}
+
+static struct ahci_platform_data imx6q_sata_pdata = {
+	.init = imx6q_sata_init,
+	.exit = imx6q_sata_exit,
+};
+
+static const struct of_device_id imx_ahci_of_match[] = {
+	{ .compatible = "fsl,imx6q-ahci", .data = &imx6q_sata_pdata},
+	{},
+};
+MODULE_DEVICE_TABLE(of, imx_ahci_of_match);
+
+static int imx_ahci_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct resource *mem, *irq, res[2];
+	const struct of_device_id *of_id;
+	const struct ahci_platform_data *pdata = NULL;
+	struct imx_ahci_priv *imxpriv;
+	struct device *ahci_dev;
+	struct platform_device *ahci_pdev;
+	int ret;
+
+	imxpriv = devm_kzalloc(dev, sizeof(*imxpriv), GFP_KERNEL);
+	if (!imxpriv) {
+		dev_err(dev, "can't alloc ahci_host_priv\n");
+		return -ENOMEM;
+	}
+
+	ahci_pdev = platform_device_alloc("ahci", -1);
+	if (!ahci_pdev)
+		return -ENODEV;
+
+	ahci_dev = &ahci_pdev->dev;
+	ahci_dev->parent = dev;
+
+	imxpriv->ahb_clk = devm_clk_get(dev, "ahb");
+	if (IS_ERR(imxpriv->ahb_clk)) {
+		dev_err(dev, "can't get ahb clock.\n");
+		ret = PTR_ERR(imxpriv->ahb_clk);
+		goto err_out;
+	}
+
+	imxpriv->sata_ref_clk = devm_clk_get(dev, "sata_ref");
+	if (IS_ERR(imxpriv->sata_ref_clk)) {
+		dev_err(dev, "can't get sata_ref clock.\n");
+		ret = PTR_ERR(imxpriv->sata_ref_clk);
+		goto err_out;
+	}
+
+	imxpriv->ahci_pdev = ahci_pdev;
+	platform_set_drvdata(pdev, imxpriv);
+
+	of_id = of_match_device(imx_ahci_of_match, dev);
+	if (of_id) {
+		pdata = of_id->data;
+	} else {
+		ret = -EINVAL;
+		goto err_out;
+	}
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!mem || !irq) {
+		dev_err(dev, "no mmio/irq resource\n");
+		ret = -ENOMEM;
+		goto err_out;
+	}
+
+	res[0] = *mem;
+	res[1] = *irq;
+
+	ahci_dev->coherent_dma_mask = DMA_BIT_MASK(32);
+	ahci_dev->dma_mask = &ahci_dev->coherent_dma_mask;
+	ahci_dev->of_node = dev->of_node;
+
+	ret = platform_device_add_resources(ahci_pdev, res, 2);
+	if (ret)
+		goto err_out;
+
+	ret = platform_device_add_data(ahci_pdev, pdata, sizeof(*pdata));
+	if (ret)
+		goto err_out;
+
+	ret = platform_device_add(ahci_pdev);
+	if (ret) {
+err_out:
+		platform_device_put(ahci_pdev);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int imx_ahci_remove(struct platform_device *pdev)
+{
+	struct imx_ahci_priv *imxpriv = platform_get_drvdata(pdev);
+	struct platform_device *ahci_pdev = imxpriv->ahci_pdev;
+
+	platform_device_unregister(ahci_pdev);
+	return 0;
+}
+
+static struct platform_driver imx_ahci_driver = {
+	.probe = imx_ahci_probe,
+	.remove = imx_ahci_remove,
+	.driver = {
+		.name = "ahci-imx",
+		.owner = THIS_MODULE,
+		.of_match_table = imx_ahci_of_match,
+	},
+};
+module_platform_driver(imx_ahci_driver);
+
+MODULE_DESCRIPTION("Freescale i.MX AHCI SATA platform driver");
+MODULE_AUTHOR("Richard Zhu <Hong-Xing.Zhu@freescale.com>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ahci:imx");
diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
index b52a10c..513ad7e 100644
--- a/drivers/ata/ata_piix.c
+++ b/drivers/ata/ata_piix.c
@@ -330,7 +330,7 @@
 	/* SATA Controller IDE (Wellsburg) */
 	{ 0x8086, 0x8d00, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_snb },
 	/* SATA Controller IDE (Wellsburg) */
-	{ 0x8086, 0x8d08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata },
+	{ 0x8086, 0x8d08, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata_snb },
 	/* SATA Controller IDE (Wellsburg) */
 	{ 0x8086, 0x8d60, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_snb },
 	/* SATA Controller IDE (Wellsburg) */
diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c
index 1c41722..20fd337 100644
--- a/drivers/ata/libata-pmp.c
+++ b/drivers/ata/libata-pmp.c
@@ -289,24 +289,24 @@
 
 	/* Disable sending Early R_OK.
 	 * With "cached read" HDD testing and multiple ports busy on a SATA
-	 * host controller, 3726 PMP will very rarely drop a deferred
+	 * host controller, 3x26 PMP will very rarely drop a deferred
 	 * R_OK that was intended for the host. Symptom will be all
 	 * 5 drives under test will timeout, get reset, and recover.
 	 */
-	if (vendor == 0x1095 && devid == 0x3726) {
+	if (vendor == 0x1095 && (devid == 0x3726 || devid == 0x3826)) {
 		u32 reg;
 
 		err_mask = sata_pmp_read(&ap->link, PMP_GSCR_SII_POL, &reg);
 		if (err_mask) {
 			rc = -EIO;
-			reason = "failed to read Sil3726 Private Register";
+			reason = "failed to read Sil3x26 Private Register";
 			goto fail;
 		}
 		reg &= ~0x1;
 		err_mask = sata_pmp_write(&ap->link, PMP_GSCR_SII_POL, reg);
 		if (err_mask) {
 			rc = -EIO;
-			reason = "failed to write Sil3726 Private Register";
+			reason = "failed to write Sil3x26 Private Register";
 			goto fail;
 		}
 	}
@@ -383,8 +383,8 @@
 	u16 devid = sata_pmp_gscr_devid(gscr);
 	struct ata_link *link;
 
-	if (vendor == 0x1095 && devid == 0x3726) {
-		/* sil3726 quirks */
+	if (vendor == 0x1095 && (devid == 0x3726 || devid == 0x3826)) {
+		/* sil3x26 quirks */
 		ata_for_each_link(link, ap, EDGE) {
 			/* link reports offline after LPM */
 			link->flags |= ATA_LFLAG_NO_LPM;
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 83c0890..b1e880a 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -206,8 +206,10 @@
 	unsigned long flags;
 	int rc;
 
-	rc = strict_strtol(buf, 10, &input);
-	if (rc || input < -2)
+	rc = kstrtol(buf, 10, &input);
+	if (rc)
+		return rc;
+	if (input < -2)
 		return -EINVAL;
 	if (input > ATA_TMOUT_MAX_PARK) {
 		rc = -EOVERFLOW;
diff --git a/drivers/ata/pata_imx.c b/drivers/ata/pata_imx.c
index 4ec7c04..26386f0 100644
--- a/drivers/ata/pata_imx.c
+++ b/drivers/ata/pata_imx.c
@@ -237,6 +237,7 @@
 		/* sentinel */
 	}
 };
+MODULE_DEVICE_TABLE(of, imx_pata_dt_ids);
 
 static struct platform_driver pata_imx_driver = {
 	.probe		= pata_imx_probe,
diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c
index 19720a0..851bd3f 100644
--- a/drivers/ata/sata_fsl.c
+++ b/drivers/ata/sata_fsl.c
@@ -293,6 +293,7 @@
 {
 	struct sata_fsl_host_priv *host_priv = host->private_data;
 	void __iomem *hcr_base = host_priv->hcr_base;
+	unsigned long flags;
 
 	if (count > ICC_MAX_INT_COUNT_THRESHOLD)
 		count = ICC_MAX_INT_COUNT_THRESHOLD;
@@ -305,12 +306,12 @@
 			(count > ICC_MIN_INT_COUNT_THRESHOLD))
 		ticks = ICC_SAFE_INT_TICKS;
 
-	spin_lock(&host->lock);
+	spin_lock_irqsave(&host->lock, flags);
 	iowrite32((count << 24 | ticks), hcr_base + ICC);
 
 	intr_coalescing_count = count;
 	intr_coalescing_ticks = ticks;
-	spin_unlock(&host->lock);
+	spin_unlock_irqrestore(&host->lock, flags);
 
 	DPRINTK("interrupt coalescing, count = 0x%x, ticks = %x\n",
 			intr_coalescing_count, intr_coalescing_ticks);
diff --git a/drivers/ata/sata_highbank.c b/drivers/ata/sata_highbank.c
index d047d92..e9a4f46 100644
--- a/drivers/ata/sata_highbank.c
+++ b/drivers/ata/sata_highbank.c
@@ -86,11 +86,11 @@
 
 #define SGPIO_SIGNALS			3
 #define ECX_ACTIVITY_BITS		0x300000
-#define ECX_ACTIVITY_SHIFT		2
+#define ECX_ACTIVITY_SHIFT		0
 #define ECX_LOCATE_BITS			0x80000
 #define ECX_LOCATE_SHIFT		1
 #define ECX_FAULT_BITS			0x400000
-#define ECX_FAULT_SHIFT			0
+#define ECX_FAULT_SHIFT			2
 static inline int sgpio_bit_shift(struct ecx_plat_data *pdata, u32 port,
 				u32 shift)
 {
diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c
index e451317..5c54d95 100644
--- a/drivers/ata/sata_inic162x.c
+++ b/drivers/ata/sata_inic162x.c
@@ -6,6 +6,18 @@
  *
  * This file is released under GPL v2.
  *
+ * **** WARNING ****
+ *
+ * This driver never worked properly and unfortunately data corruption is
+ * relatively common.  There isn't anyone working on the driver and there's
+ * no support from the vendor.  Do not use this driver in any production
+ * environment.
+ *
+ * http://thread.gmane.org/gmane.linux.debian.devel.bugs.rc/378525/focus=54491
+ * https://bugzilla.kernel.org/show_bug.cgi?id=60565
+ *
+ * *****************
+ *
  * This controller is eccentric and easily locks up if something isn't
  * right.  Documentation is available at initio's website but it only
  * documents registers (not programming model).
@@ -807,6 +819,8 @@
 
 	ata_print_version_once(&pdev->dev, DRV_VERSION);
 
+	dev_alert(&pdev->dev, "inic162x support is broken with common data corruption issues and will be disabled by default, contact linux-ide@vger.kernel.org if in production use\n");
+
 	/* alloc host */
 	host = ata_host_alloc_pinfo(&pdev->dev, ppi, NR_PORTS);
 	hpriv = devm_kzalloc(&pdev->dev, sizeof(*hpriv), GFP_KERNEL);
diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c
index e691026..3455f83 100644
--- a/drivers/base/regmap/regcache.c
+++ b/drivers/base/regmap/regcache.c
@@ -719,7 +719,8 @@
 		}
 	}
 
-	return regcache_sync_block_raw_flush(map, &data, base, regtmp);
+	return regcache_sync_block_raw_flush(map, &data, base, regtmp +
+			map->reg_stride);
 }
 
 int regcache_sync_block(struct regmap *map, void *block,
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index b81ddfe..e07a5fd 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -532,11 +532,11 @@
 	  If unsure, say N.
 
 config BLK_DEV_RSXX
-	tristate "IBM FlashSystem 70/80 PCIe SSD Device Driver"
+	tristate "IBM Flash Adapter 900GB Full Height PCIe Device Driver"
 	depends on PCI
 	help
 	  Device driver for IBM's high speed PCIe SSD
-	  storage devices: FlashSystem-70 and FlashSystem-80.
+	  storage device: Flash Adapter 900GB Full Height.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called rsxx.
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index 99cb944..4d45dba 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -906,16 +906,10 @@
 	int i;
 
 	bio_for_each_segment(bv, bio, i) {
-		page = bv->bv_page;
 		/* Non-zero page count for non-head members of
-		 * compound pages is no longer allowed by the kernel,
-		 * but this has never been seen here.
+		 * compound pages is no longer allowed by the kernel.
 		 */
-		if (unlikely(PageCompound(page)))
-			if (compound_trans_head(page) != page) {
-				pr_crit("page tail used for block I/O\n");
-				BUG();
-			}
+		page = compound_trans_head(bv->bv_page);
 		atomic_inc(&page->_count);
 	}
 }
@@ -924,10 +918,13 @@
 bio_pagedec(struct bio *bio)
 {
 	struct bio_vec *bv;
+	struct page *page;
 	int i;
 
-	bio_for_each_segment(bv, bio, i)
-		atomic_dec(&bv->bv_page->_count);
+	bio_for_each_segment(bv, bio, i) {
+		page = compound_trans_head(bv->bv_page);
+		atomic_dec(&page->_count);
+	}
 }
 
 static void
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 6608076..28c73ca 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -659,6 +659,27 @@
 	wake_up(&mdev->al_wait);
 }
 
+int drbd_initialize_al(struct drbd_conf *mdev, void *buffer)
+{
+	struct al_transaction_on_disk *al = buffer;
+	struct drbd_md *md = &mdev->ldev->md;
+	sector_t al_base = md->md_offset + md->al_offset;
+	int al_size_4k = md->al_stripes * md->al_stripe_size_4k;
+	int i;
+
+	memset(al, 0, 4096);
+	al->magic = cpu_to_be32(DRBD_AL_MAGIC);
+	al->transaction_type = cpu_to_be16(AL_TR_INITIALIZED);
+	al->crc32c = cpu_to_be32(crc32c(0, al, 4096));
+
+	for (i = 0; i < al_size_4k; i++) {
+		int err = drbd_md_sync_page_io(mdev, mdev->ldev, al_base + i * 8, WRITE);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
 static int w_update_odbm(struct drbd_work *w, int unused)
 {
 	struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index f943aac..2d7f608 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -832,6 +832,7 @@
 	unsigned susp_nod:1;		/* IO suspended because no data */
 	unsigned susp_fen:1;		/* IO suspended because fence peer handler runs */
 	struct mutex cstate_mutex;	/* Protects graceful disconnects */
+	unsigned int connect_cnt;	/* Inc each time a connection is established */
 
 	unsigned long flags;
 	struct net_conf *net_conf;	/* content protected by rcu */
@@ -1132,6 +1133,7 @@
 void drbd_print_uuids(struct drbd_conf *mdev, const char *text);
 
 extern void conn_md_sync(struct drbd_tconn *tconn);
+extern void drbd_md_write(struct drbd_conf *mdev, void *buffer);
 extern void drbd_md_sync(struct drbd_conf *mdev);
 extern int  drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev);
 extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local);
@@ -1466,8 +1468,16 @@
 extern void drbd_resume_io(struct drbd_conf *mdev);
 extern char *ppsize(char *buf, unsigned long long size);
 extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, sector_t, int);
-enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 };
-extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local);
+enum determine_dev_size {
+	DS_ERROR_SHRINK = -3,
+	DS_ERROR_SPACE_MD = -2,
+	DS_ERROR = -1,
+	DS_UNCHANGED = 0,
+	DS_SHRUNK = 1,
+	DS_GREW = 2
+};
+extern enum determine_dev_size
+drbd_determine_dev_size(struct drbd_conf *, enum dds_flags, struct resize_parms *) __must_hold(local);
 extern void resync_after_online_grow(struct drbd_conf *);
 extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev);
 extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev,
@@ -1633,6 +1643,7 @@
 #define drbd_set_out_of_sync(mdev, sector, size) \
 	__drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__)
 extern void drbd_al_shrink(struct drbd_conf *mdev);
+extern int drbd_initialize_al(struct drbd_conf *, void *);
 
 /* drbd_nl.c */
 /* state info broadcast */
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index a5dca6a..55635ed 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2762,8 +2762,6 @@
 	/*
 	 * allocate all necessary structs
 	 */
-	err = -ENOMEM;
-
 	init_waitqueue_head(&drbd_pp_wait);
 
 	drbd_proc = NULL; /* play safe for drbd_cleanup */
@@ -2773,6 +2771,7 @@
 	if (err)
 		goto fail;
 
+	err = -ENOMEM;
 	drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
 	if (!drbd_proc)	{
 		printk(KERN_ERR "drbd: unable to register proc file\n");
@@ -2803,7 +2802,6 @@
 fail:
 	drbd_cleanup();
 	if (err == -ENOMEM)
-		/* currently always the case */
 		printk(KERN_ERR "drbd: ran out of memory\n");
 	else
 		printk(KERN_ERR "drbd: initialization failure\n");
@@ -2881,34 +2879,14 @@
 	u8 reserved_u8[4096 - (7*8 + 10*4)];
 } __packed;
 
-/**
- * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
- * @mdev:	DRBD device.
- */
-void drbd_md_sync(struct drbd_conf *mdev)
+
+
+void drbd_md_write(struct drbd_conf *mdev, void *b)
 {
-	struct meta_data_on_disk *buffer;
+	struct meta_data_on_disk *buffer = b;
 	sector_t sector;
 	int i;
 
-	/* Don't accidentally change the DRBD meta data layout. */
-	BUILD_BUG_ON(UI_SIZE != 4);
-	BUILD_BUG_ON(sizeof(struct meta_data_on_disk) != 4096);
-
-	del_timer(&mdev->md_sync_timer);
-	/* timer may be rearmed by drbd_md_mark_dirty() now. */
-	if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
-		return;
-
-	/* We use here D_FAILED and not D_ATTACHING because we try to write
-	 * metadata even if we detach due to a disk failure! */
-	if (!get_ldev_if_state(mdev, D_FAILED))
-		return;
-
-	buffer = drbd_md_get_buffer(mdev);
-	if (!buffer)
-		goto out;
-
 	memset(buffer, 0, sizeof(*buffer));
 
 	buffer->la_size_sect = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
@@ -2937,6 +2915,35 @@
 		dev_err(DEV, "meta data update failed!\n");
 		drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
 	}
+}
+
+/**
+ * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
+ * @mdev:	DRBD device.
+ */
+void drbd_md_sync(struct drbd_conf *mdev)
+{
+	struct meta_data_on_disk *buffer;
+
+	/* Don't accidentally change the DRBD meta data layout. */
+	BUILD_BUG_ON(UI_SIZE != 4);
+	BUILD_BUG_ON(sizeof(struct meta_data_on_disk) != 4096);
+
+	del_timer(&mdev->md_sync_timer);
+	/* timer may be rearmed by drbd_md_mark_dirty() now. */
+	if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
+		return;
+
+	/* We use here D_FAILED and not D_ATTACHING because we try to write
+	 * metadata even if we detach due to a disk failure! */
+	if (!get_ldev_if_state(mdev, D_FAILED))
+		return;
+
+	buffer = drbd_md_get_buffer(mdev);
+	if (!buffer)
+		goto out;
+
+	drbd_md_write(mdev, buffer);
 
 	/* Update mdev->ldev->md.la_size_sect,
 	 * since we updated it on metadata. */
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 9e3f441..8cc1e64 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -417,6 +417,7 @@
 
 bool conn_try_outdate_peer(struct drbd_tconn *tconn)
 {
+	unsigned int connect_cnt;
 	union drbd_state mask = { };
 	union drbd_state val = { };
 	enum drbd_fencing_p fp;
@@ -428,6 +429,10 @@
 		return false;
 	}
 
+	spin_lock_irq(&tconn->req_lock);
+	connect_cnt = tconn->connect_cnt;
+	spin_unlock_irq(&tconn->req_lock);
+
 	fp = highest_fencing_policy(tconn);
 	switch (fp) {
 	case FP_NOT_AVAIL:
@@ -492,8 +497,14 @@
 	   here, because we might were able to re-establish the connection in the
 	   meantime. */
 	spin_lock_irq(&tconn->req_lock);
-	if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &tconn->flags))
-		_conn_request_state(tconn, mask, val, CS_VERBOSE);
+	if (tconn->cstate < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &tconn->flags)) {
+		if (tconn->connect_cnt != connect_cnt)
+			/* In case the connection was established and droped
+			   while the fence-peer handler was running, ignore it */
+			conn_info(tconn, "Ignoring fence-peer exit code\n");
+		else
+			_conn_request_state(tconn, mask, val, CS_VERBOSE);
+	}
 	spin_unlock_irq(&tconn->req_lock);
 
 	return conn_highest_pdsk(tconn) <= D_OUTDATED;
@@ -816,15 +827,20 @@
  * Returns 0 on success, negative return values indicate errors.
  * You should call drbd_md_sync() after calling this function.
  */
-enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
+enum determine_dev_size
+drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
 {
 	sector_t prev_first_sect, prev_size; /* previous meta location */
 	sector_t la_size_sect, u_size;
+	struct drbd_md *md = &mdev->ldev->md;
+	u32 prev_al_stripe_size_4k;
+	u32 prev_al_stripes;
 	sector_t size;
 	char ppb[10];
+	void *buffer;
 
 	int md_moved, la_size_changed;
-	enum determine_dev_size rv = unchanged;
+	enum determine_dev_size rv = DS_UNCHANGED;
 
 	/* race:
 	 * application request passes inc_ap_bio,
@@ -836,6 +852,11 @@
 	 * still lock the act_log to not trigger ASSERTs there.
 	 */
 	drbd_suspend_io(mdev);
+	buffer = drbd_md_get_buffer(mdev); /* Lock meta-data IO */
+	if (!buffer) {
+		drbd_resume_io(mdev);
+		return DS_ERROR;
+	}
 
 	/* no wait necessary anymore, actually we could assert that */
 	wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
@@ -844,7 +865,17 @@
 	prev_size = mdev->ldev->md.md_size_sect;
 	la_size_sect = mdev->ldev->md.la_size_sect;
 
-	/* TODO: should only be some assert here, not (re)init... */
+	if (rs) {
+		/* rs is non NULL if we should change the AL layout only */
+
+		prev_al_stripes = md->al_stripes;
+		prev_al_stripe_size_4k = md->al_stripe_size_4k;
+
+		md->al_stripes = rs->al_stripes;
+		md->al_stripe_size_4k = rs->al_stripe_size / 4;
+		md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
+	}
+
 	drbd_md_set_sector_offsets(mdev, mdev->ldev);
 
 	rcu_read_lock();
@@ -852,6 +883,21 @@
 	rcu_read_unlock();
 	size = drbd_new_dev_size(mdev, mdev->ldev, u_size, flags & DDSF_FORCED);
 
+	if (size < la_size_sect) {
+		if (rs && u_size == 0) {
+			/* Remove "rs &&" later. This check should always be active, but
+			   right now the receiver expects the permissive behavior */
+			dev_warn(DEV, "Implicit shrink not allowed. "
+				 "Use --size=%llus for explicit shrink.\n",
+				 (unsigned long long)size);
+			rv = DS_ERROR_SHRINK;
+		}
+		if (u_size > size)
+			rv = DS_ERROR_SPACE_MD;
+		if (rv != DS_UNCHANGED)
+			goto err_out;
+	}
+
 	if (drbd_get_capacity(mdev->this_bdev) != size ||
 	    drbd_bm_capacity(mdev) != size) {
 		int err;
@@ -867,7 +913,7 @@
 				    "Leaving size unchanged at size = %lu KB\n",
 				    (unsigned long)size);
 			}
-			rv = dev_size_error;
+			rv = DS_ERROR;
 		}
 		/* racy, see comments above. */
 		drbd_set_my_capacity(mdev, size);
@@ -875,38 +921,57 @@
 		dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
 		     (unsigned long long)size>>1);
 	}
-	if (rv == dev_size_error)
-		goto out;
+	if (rv <= DS_ERROR)
+		goto err_out;
 
 	la_size_changed = (la_size_sect != mdev->ldev->md.la_size_sect);
 
 	md_moved = prev_first_sect != drbd_md_first_sector(mdev->ldev)
 		|| prev_size	   != mdev->ldev->md.md_size_sect;
 
-	if (la_size_changed || md_moved) {
-		int err;
+	if (la_size_changed || md_moved || rs) {
+		u32 prev_flags;
 
 		drbd_al_shrink(mdev); /* All extents inactive. */
+
+		prev_flags = md->flags;
+		md->flags &= ~MDF_PRIMARY_IND;
+		drbd_md_write(mdev, buffer);
+
 		dev_info(DEV, "Writing the whole bitmap, %s\n",
 			 la_size_changed && md_moved ? "size changed and md moved" :
 			 la_size_changed ? "size changed" : "md moved");
 		/* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
-		err = drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
-				     "size changed", BM_LOCKED_MASK);
-		if (err) {
-			rv = dev_size_error;
-			goto out;
-		}
-		drbd_md_mark_dirty(mdev);
+		drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
+			       "size changed", BM_LOCKED_MASK);
+		drbd_initialize_al(mdev, buffer);
+
+		md->flags = prev_flags;
+		drbd_md_write(mdev, buffer);
+
+		if (rs)
+			dev_info(DEV, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n",
+				 md->al_stripes, md->al_stripe_size_4k * 4);
 	}
 
 	if (size > la_size_sect)
-		rv = grew;
+		rv = DS_GREW;
 	if (size < la_size_sect)
-		rv = shrunk;
-out:
+		rv = DS_SHRUNK;
+
+	if (0) {
+	err_out:
+		if (rs) {
+			md->al_stripes = prev_al_stripes;
+			md->al_stripe_size_4k = prev_al_stripe_size_4k;
+			md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k;
+
+			drbd_md_set_sector_offsets(mdev, mdev->ldev);
+		}
+	}
 	lc_unlock(mdev->act_log);
 	wake_up(&mdev->al_wait);
+	drbd_md_put_buffer(mdev);
 	drbd_resume_io(mdev);
 
 	return rv;
@@ -1607,11 +1672,11 @@
 	    !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
 		set_bit(USE_DEGR_WFC_T, &mdev->flags);
 
-	dd = drbd_determine_dev_size(mdev, 0);
-	if (dd == dev_size_error) {
+	dd = drbd_determine_dev_size(mdev, 0, NULL);
+	if (dd <= DS_ERROR) {
 		retcode = ERR_NOMEM_BITMAP;
 		goto force_diskless_dec;
-	} else if (dd == grew)
+	} else if (dd == DS_GREW)
 		set_bit(RESYNC_AFTER_NEG, &mdev->flags);
 
 	if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC) ||
@@ -2305,6 +2370,7 @@
 	struct drbd_conf *mdev;
 	enum drbd_ret_code retcode;
 	enum determine_dev_size dd;
+	bool change_al_layout = false;
 	enum dds_flags ddsf;
 	sector_t u_size;
 	int err;
@@ -2315,31 +2381,33 @@
 	if (retcode != NO_ERROR)
 		goto fail;
 
+	mdev = adm_ctx.mdev;
+	if (!get_ldev(mdev)) {
+		retcode = ERR_NO_DISK;
+		goto fail;
+	}
+
 	memset(&rs, 0, sizeof(struct resize_parms));
+	rs.al_stripes = mdev->ldev->md.al_stripes;
+	rs.al_stripe_size = mdev->ldev->md.al_stripe_size_4k * 4;
 	if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
 		err = resize_parms_from_attrs(&rs, info);
 		if (err) {
 			retcode = ERR_MANDATORY_TAG;
 			drbd_msg_put_info(from_attrs_err_to_txt(err));
-			goto fail;
+			goto fail_ldev;
 		}
 	}
 
-	mdev = adm_ctx.mdev;
 	if (mdev->state.conn > C_CONNECTED) {
 		retcode = ERR_RESIZE_RESYNC;
-		goto fail;
+		goto fail_ldev;
 	}
 
 	if (mdev->state.role == R_SECONDARY &&
 	    mdev->state.peer == R_SECONDARY) {
 		retcode = ERR_NO_PRIMARY;
-		goto fail;
-	}
-
-	if (!get_ldev(mdev)) {
-		retcode = ERR_NO_DISK;
-		goto fail;
+		goto fail_ldev;
 	}
 
 	if (rs.no_resync && mdev->tconn->agreed_pro_version < 93) {
@@ -2358,6 +2426,28 @@
 		}
 	}
 
+	if (mdev->ldev->md.al_stripes != rs.al_stripes ||
+	    mdev->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) {
+		u32 al_size_k = rs.al_stripes * rs.al_stripe_size;
+
+		if (al_size_k > (16 * 1024 * 1024)) {
+			retcode = ERR_MD_LAYOUT_TOO_BIG;
+			goto fail_ldev;
+		}
+
+		if (al_size_k < MD_32kB_SECT/2) {
+			retcode = ERR_MD_LAYOUT_TOO_SMALL;
+			goto fail_ldev;
+		}
+
+		if (mdev->state.conn != C_CONNECTED) {
+			retcode = ERR_MD_LAYOUT_CONNECTED;
+			goto fail_ldev;
+		}
+
+		change_al_layout = true;
+	}
+
 	if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev))
 		mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
 
@@ -2373,16 +2463,22 @@
 	}
 
 	ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
-	dd = drbd_determine_dev_size(mdev, ddsf);
+	dd = drbd_determine_dev_size(mdev, ddsf, change_al_layout ? &rs : NULL);
 	drbd_md_sync(mdev);
 	put_ldev(mdev);
-	if (dd == dev_size_error) {
+	if (dd == DS_ERROR) {
 		retcode = ERR_NOMEM_BITMAP;
 		goto fail;
+	} else if (dd == DS_ERROR_SPACE_MD) {
+		retcode = ERR_MD_LAYOUT_NO_FIT;
+		goto fail;
+	} else if (dd == DS_ERROR_SHRINK) {
+		retcode = ERR_IMPLICIT_SHRINK;
+		goto fail;
 	}
 
 	if (mdev->state.conn == C_CONNECTED) {
-		if (dd == grew)
+		if (dd == DS_GREW)
 			set_bit(RESIZE_PENDING, &mdev->flags);
 
 		drbd_send_uuids(mdev);
@@ -2658,7 +2754,6 @@
 		const struct sib_info *sib)
 {
 	struct state_info *si = NULL; /* for sizeof(si->member); */
-	struct net_conf *nc;
 	struct nlattr *nla;
 	int got_ldev;
 	int err = 0;
@@ -2688,13 +2783,19 @@
 		goto nla_put_failure;
 
 	rcu_read_lock();
-	if (got_ldev)
-		if (disk_conf_to_skb(skb, rcu_dereference(mdev->ldev->disk_conf), exclude_sensitive))
-			goto nla_put_failure;
+	if (got_ldev) {
+		struct disk_conf *disk_conf;
 
-	nc = rcu_dereference(mdev->tconn->net_conf);
-	if (nc)
-		err = net_conf_to_skb(skb, nc, exclude_sensitive);
+		disk_conf = rcu_dereference(mdev->ldev->disk_conf);
+		err = disk_conf_to_skb(skb, disk_conf, exclude_sensitive);
+	}
+	if (!err) {
+		struct net_conf *nc;
+
+		nc = rcu_dereference(mdev->tconn->net_conf);
+		if (nc)
+			err = net_conf_to_skb(skb, nc, exclude_sensitive);
+	}
 	rcu_read_unlock();
 	if (err)
 		goto nla_put_failure;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 4222aff..cc29cd3 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1039,6 +1039,8 @@
 	rcu_read_lock();
 	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
 		kref_get(&mdev->kref);
+		rcu_read_unlock();
+
 		/* Prevent a race between resync-handshake and
 		 * being promoted to Primary.
 		 *
@@ -1049,8 +1051,6 @@
 		mutex_lock(mdev->state_mutex);
 		mutex_unlock(mdev->state_mutex);
 
-		rcu_read_unlock();
-
 		if (discard_my_data)
 			set_bit(DISCARD_MY_DATA, &mdev->flags);
 		else
@@ -3545,7 +3545,7 @@
 {
 	struct drbd_conf *mdev;
 	struct p_sizes *p = pi->data;
-	enum determine_dev_size dd = unchanged;
+	enum determine_dev_size dd = DS_UNCHANGED;
 	sector_t p_size, p_usize, my_usize;
 	int ldsc = 0; /* local disk size changed */
 	enum dds_flags ddsf;
@@ -3617,9 +3617,9 @@
 
 	ddsf = be16_to_cpu(p->dds_flags);
 	if (get_ldev(mdev)) {
-		dd = drbd_determine_dev_size(mdev, ddsf);
+		dd = drbd_determine_dev_size(mdev, ddsf, NULL);
 		put_ldev(mdev);
-		if (dd == dev_size_error)
+		if (dd == DS_ERROR)
 			return -EIO;
 		drbd_md_sync(mdev);
 	} else {
@@ -3647,7 +3647,7 @@
 			drbd_send_sizes(mdev, 0, ddsf);
 		}
 		if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
-		    (dd == grew && mdev->state.conn == C_CONNECTED)) {
+		    (dd == DS_GREW && mdev->state.conn == C_CONNECTED)) {
 			if (mdev->state.pdsk >= D_INCONSISTENT &&
 			    mdev->state.disk >= D_INCONSISTENT) {
 				if (ddsf & DDSF_NO_RESYNC)
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 90c5be2..216d47b 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -1115,8 +1115,10 @@
 		drbd_thread_restart_nowait(&mdev->tconn->receiver);
 
 	/* Resume AL writing if we get a connection */
-	if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
+	if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
 		drbd_resume_al(mdev);
+		mdev->tconn->connect_cnt++;
+	}
 
 	/* remember last attach time so request_timer_fn() won't
 	 * kill newly established sessions while we are still trying to thaw
diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c
index 5af21f2..6e85e21 100644
--- a/drivers/block/rsxx/core.c
+++ b/drivers/block/rsxx/core.c
@@ -31,6 +31,8 @@
 #include <linux/slab.h>
 #include <linux/bitops.h>
 #include <linux/delay.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
 
 #include <linux/genhd.h>
 #include <linux/idr.h>
@@ -39,8 +41,9 @@
 #include "rsxx_cfg.h"
 
 #define NO_LEGACY 0
+#define SYNC_START_TIMEOUT (10 * 60) /* 10 minutes */
 
-MODULE_DESCRIPTION("IBM FlashSystem 70/80 PCIe SSD Device Driver");
+MODULE_DESCRIPTION("IBM Flash Adapter 900GB Full Height Device Driver");
 MODULE_AUTHOR("Joshua Morris/Philip Kelleher, IBM");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRIVER_VERSION);
@@ -49,9 +52,282 @@
 module_param(force_legacy, uint, 0444);
 MODULE_PARM_DESC(force_legacy, "Force the use of legacy type PCI interrupts");
 
+static unsigned int sync_start = 1;
+module_param(sync_start, uint, 0444);
+MODULE_PARM_DESC(sync_start, "On by Default: Driver load will not complete "
+			     "until the card startup has completed.");
+
 static DEFINE_IDA(rsxx_disk_ida);
 static DEFINE_SPINLOCK(rsxx_ida_lock);
 
+/* --------------------Debugfs Setup ------------------- */
+
+struct rsxx_cram {
+	u32 f_pos;
+	u32 offset;
+	void *i_private;
+};
+
+static int rsxx_attr_pci_regs_show(struct seq_file *m, void *p)
+{
+	struct rsxx_cardinfo *card = m->private;
+
+	seq_printf(m, "HWID		0x%08x\n",
+					ioread32(card->regmap + HWID));
+	seq_printf(m, "SCRATCH		0x%08x\n",
+					ioread32(card->regmap + SCRATCH));
+	seq_printf(m, "IER		0x%08x\n",
+					ioread32(card->regmap + IER));
+	seq_printf(m, "IPR		0x%08x\n",
+					ioread32(card->regmap + IPR));
+	seq_printf(m, "CREG_CMD		0x%08x\n",
+					ioread32(card->regmap + CREG_CMD));
+	seq_printf(m, "CREG_ADD		0x%08x\n",
+					ioread32(card->regmap + CREG_ADD));
+	seq_printf(m, "CREG_CNT		0x%08x\n",
+					ioread32(card->regmap + CREG_CNT));
+	seq_printf(m, "CREG_STAT	0x%08x\n",
+					ioread32(card->regmap + CREG_STAT));
+	seq_printf(m, "CREG_DATA0	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA0));
+	seq_printf(m, "CREG_DATA1	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA1));
+	seq_printf(m, "CREG_DATA2	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA2));
+	seq_printf(m, "CREG_DATA3	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA3));
+	seq_printf(m, "CREG_DATA4	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA4));
+	seq_printf(m, "CREG_DATA5	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA5));
+	seq_printf(m, "CREG_DATA6	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA6));
+	seq_printf(m, "CREG_DATA7	0x%08x\n",
+					ioread32(card->regmap + CREG_DATA7));
+	seq_printf(m, "INTR_COAL	0x%08x\n",
+					ioread32(card->regmap + INTR_COAL));
+	seq_printf(m, "HW_ERROR		0x%08x\n",
+					ioread32(card->regmap + HW_ERROR));
+	seq_printf(m, "DEBUG0		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG0));
+	seq_printf(m, "DEBUG1		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG1));
+	seq_printf(m, "DEBUG2		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG2));
+	seq_printf(m, "DEBUG3		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG3));
+	seq_printf(m, "DEBUG4		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG4));
+	seq_printf(m, "DEBUG5		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG5));
+	seq_printf(m, "DEBUG6		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG6));
+	seq_printf(m, "DEBUG7		0x%08x\n",
+					ioread32(card->regmap + PCI_DEBUG7));
+	seq_printf(m, "RECONFIG		0x%08x\n",
+					ioread32(card->regmap + PCI_RECONFIG));
+
+	return 0;
+}
+
+static int rsxx_attr_stats_show(struct seq_file *m, void *p)
+{
+	struct rsxx_cardinfo *card = m->private;
+	int i;
+
+	for (i = 0; i < card->n_targets; i++) {
+		seq_printf(m, "Ctrl %d CRC Errors	= %d\n",
+				i, card->ctrl[i].stats.crc_errors);
+		seq_printf(m, "Ctrl %d Hard Errors	= %d\n",
+				i, card->ctrl[i].stats.hard_errors);
+		seq_printf(m, "Ctrl %d Soft Errors	= %d\n",
+				i, card->ctrl[i].stats.soft_errors);
+		seq_printf(m, "Ctrl %d Writes Issued	= %d\n",
+				i, card->ctrl[i].stats.writes_issued);
+		seq_printf(m, "Ctrl %d Writes Failed	= %d\n",
+				i, card->ctrl[i].stats.writes_failed);
+		seq_printf(m, "Ctrl %d Reads Issued	= %d\n",
+				i, card->ctrl[i].stats.reads_issued);
+		seq_printf(m, "Ctrl %d Reads Failed	= %d\n",
+				i, card->ctrl[i].stats.reads_failed);
+		seq_printf(m, "Ctrl %d Reads Retried	= %d\n",
+				i, card->ctrl[i].stats.reads_retried);
+		seq_printf(m, "Ctrl %d Discards Issued	= %d\n",
+				i, card->ctrl[i].stats.discards_issued);
+		seq_printf(m, "Ctrl %d Discards Failed	= %d\n",
+				i, card->ctrl[i].stats.discards_failed);
+		seq_printf(m, "Ctrl %d DMA SW Errors	= %d\n",
+				i, card->ctrl[i].stats.dma_sw_err);
+		seq_printf(m, "Ctrl %d DMA HW Faults	= %d\n",
+				i, card->ctrl[i].stats.dma_hw_fault);
+		seq_printf(m, "Ctrl %d DMAs Cancelled	= %d\n",
+				i, card->ctrl[i].stats.dma_cancelled);
+		seq_printf(m, "Ctrl %d SW Queue Depth	= %d\n",
+				i, card->ctrl[i].stats.sw_q_depth);
+		seq_printf(m, "Ctrl %d HW Queue Depth	= %d\n",
+			i, atomic_read(&card->ctrl[i].stats.hw_q_depth));
+	}
+
+	return 0;
+}
+
+static int rsxx_attr_stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, rsxx_attr_stats_show, inode->i_private);
+}
+
+static int rsxx_attr_pci_regs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, rsxx_attr_pci_regs_show, inode->i_private);
+}
+
+static ssize_t rsxx_cram_read(struct file *fp, char __user *ubuf,
+			      size_t cnt, loff_t *ppos)
+{
+	struct rsxx_cram *info = fp->private_data;
+	struct rsxx_cardinfo *card = info->i_private;
+	char *buf;
+	int st;
+
+	buf = kzalloc(sizeof(*buf) * cnt, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	info->f_pos = (u32)*ppos + info->offset;
+
+	st = rsxx_creg_read(card, CREG_ADD_CRAM + info->f_pos, cnt, buf, 1);
+	if (st)
+		return st;
+
+	st = copy_to_user(ubuf, buf, cnt);
+	if (st)
+		return st;
+
+	info->offset += cnt;
+
+	kfree(buf);
+
+	return cnt;
+}
+
+static ssize_t rsxx_cram_write(struct file *fp, const char __user *ubuf,
+			       size_t cnt, loff_t *ppos)
+{
+	struct rsxx_cram *info = fp->private_data;
+	struct rsxx_cardinfo *card = info->i_private;
+	char *buf;
+	int st;
+
+	buf = kzalloc(sizeof(*buf) * cnt, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	st = copy_from_user(buf, ubuf, cnt);
+	if (st)
+		return st;
+
+	info->f_pos = (u32)*ppos + info->offset;
+
+	st = rsxx_creg_write(card, CREG_ADD_CRAM + info->f_pos, cnt, buf, 1);
+	if (st)
+		return st;
+
+	info->offset += cnt;
+
+	kfree(buf);
+
+	return cnt;
+}
+
+static int rsxx_cram_open(struct inode *inode, struct file *file)
+{
+	struct rsxx_cram *info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return -ENOMEM;
+
+	info->i_private = inode->i_private;
+	info->f_pos = file->f_pos;
+	file->private_data = info;
+
+	return 0;
+}
+
+static int rsxx_cram_release(struct inode *inode, struct file *file)
+{
+	struct rsxx_cram *info = file->private_data;
+
+	if (!info)
+		return 0;
+
+	kfree(info);
+	file->private_data = NULL;
+
+	return 0;
+}
+
+static const struct file_operations debugfs_cram_fops = {
+	.owner		= THIS_MODULE,
+	.open		= rsxx_cram_open,
+	.read		= rsxx_cram_read,
+	.write		= rsxx_cram_write,
+	.release	= rsxx_cram_release,
+};
+
+static const struct file_operations debugfs_stats_fops = {
+	.owner		= THIS_MODULE,
+	.open		= rsxx_attr_stats_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static const struct file_operations debugfs_pci_regs_fops = {
+	.owner		= THIS_MODULE,
+	.open		= rsxx_attr_pci_regs_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static void rsxx_debugfs_dev_new(struct rsxx_cardinfo *card)
+{
+	struct dentry *debugfs_stats;
+	struct dentry *debugfs_pci_regs;
+	struct dentry *debugfs_cram;
+
+	card->debugfs_dir = debugfs_create_dir(card->gendisk->disk_name, NULL);
+	if (IS_ERR_OR_NULL(card->debugfs_dir))
+		goto failed_debugfs_dir;
+
+	debugfs_stats = debugfs_create_file("stats", S_IRUGO,
+					    card->debugfs_dir, card,
+					    &debugfs_stats_fops);
+	if (IS_ERR_OR_NULL(debugfs_stats))
+		goto failed_debugfs_stats;
+
+	debugfs_pci_regs = debugfs_create_file("pci_regs", S_IRUGO,
+					       card->debugfs_dir, card,
+					       &debugfs_pci_regs_fops);
+	if (IS_ERR_OR_NULL(debugfs_pci_regs))
+		goto failed_debugfs_pci_regs;
+
+	debugfs_cram = debugfs_create_file("cram", S_IRUGO | S_IWUSR,
+					   card->debugfs_dir, card,
+					   &debugfs_cram_fops);
+	if (IS_ERR_OR_NULL(debugfs_cram))
+		goto failed_debugfs_cram;
+
+	return;
+failed_debugfs_cram:
+	debugfs_remove(debugfs_pci_regs);
+failed_debugfs_pci_regs:
+	debugfs_remove(debugfs_stats);
+failed_debugfs_stats:
+	debugfs_remove(card->debugfs_dir);
+failed_debugfs_dir:
+	card->debugfs_dir = NULL;
+}
+
 /*----------------- Interrupt Control & Handling -------------------*/
 
 static void rsxx_mask_interrupts(struct rsxx_cardinfo *card)
@@ -163,12 +439,13 @@
 		}
 
 		if (isr & CR_INTR_CREG) {
-			schedule_work(&card->creg_ctrl.done_work);
+			queue_work(card->creg_ctrl.creg_wq,
+				   &card->creg_ctrl.done_work);
 			handled++;
 		}
 
 		if (isr & CR_INTR_EVENT) {
-			schedule_work(&card->event_work);
+			queue_work(card->event_wq, &card->event_work);
 			rsxx_disable_ier_and_isr(card, CR_INTR_EVENT);
 			handled++;
 		}
@@ -329,7 +606,7 @@
 	int i;
 	int st;
 
-	dev_warn(&dev->dev, "IBM FlashSystem PCI: preparing for slot reset.\n");
+	dev_warn(&dev->dev, "IBM Flash Adapter PCI: preparing for slot reset.\n");
 
 	card->eeh_state = 1;
 	rsxx_mask_interrupts(card);
@@ -367,15 +644,26 @@
 {
 	struct rsxx_cardinfo *card = pci_get_drvdata(dev);
 	int i;
+	int cnt = 0;
 
-	dev_err(&dev->dev, "IBM FlashSystem PCI: disabling failed card.\n");
+	dev_err(&dev->dev, "IBM Flash Adapter PCI: disabling failed card.\n");
 
 	card->eeh_state = 1;
+	card->halt = 1;
 
-	for (i = 0; i < card->n_targets; i++)
-		del_timer_sync(&card->ctrl[i].activity_timer);
+	for (i = 0; i < card->n_targets; i++) {
+		spin_lock_bh(&card->ctrl[i].queue_lock);
+		cnt = rsxx_cleanup_dma_queue(&card->ctrl[i],
+					     &card->ctrl[i].queue);
+		spin_unlock_bh(&card->ctrl[i].queue_lock);
 
-	rsxx_eeh_cancel_dmas(card);
+		cnt += rsxx_dma_cancel(&card->ctrl[i]);
+
+		if (cnt)
+			dev_info(CARD_TO_DEV(card),
+				"Freed %d queued DMAs on channel %d\n",
+				cnt, card->ctrl[i].id);
+	}
 }
 
 static int rsxx_eeh_fifo_flush_poll(struct rsxx_cardinfo *card)
@@ -432,7 +720,7 @@
 	int st;
 
 	dev_warn(&dev->dev,
-		"IBM FlashSystem PCI: recovering from slot reset.\n");
+		"IBM Flash Adapter PCI: recovering from slot reset.\n");
 
 	st = pci_enable_device(dev);
 	if (st)
@@ -485,7 +773,7 @@
 				&card->ctrl[i].issue_dma_work);
 	}
 
-	dev_info(&dev->dev, "IBM FlashSystem PCI: recovery complete.\n");
+	dev_info(&dev->dev, "IBM Flash Adapter PCI: recovery complete.\n");
 
 	return PCI_ERS_RESULT_RECOVERED;
 
@@ -528,6 +816,7 @@
 {
 	struct rsxx_cardinfo *card;
 	int st;
+	unsigned int sync_timeout;
 
 	dev_info(&dev->dev, "PCI-Flash SSD discovered\n");
 
@@ -610,7 +899,11 @@
 	}
 
 	/************* Setup Processor Command Interface *************/
-	rsxx_creg_setup(card);
+	st = rsxx_creg_setup(card);
+	if (st) {
+		dev_err(CARD_TO_DEV(card), "Failed to setup creg interface.\n");
+		goto failed_creg_setup;
+	}
 
 	spin_lock_irq(&card->irq_lock);
 	rsxx_enable_ier_and_isr(card, CR_INTR_CREG);
@@ -650,6 +943,12 @@
 	}
 
 	/************* Setup Card Event Handler *************/
+	card->event_wq = create_singlethread_workqueue(DRIVER_NAME"_event");
+	if (!card->event_wq) {
+		dev_err(CARD_TO_DEV(card), "Failed card event setup.\n");
+		goto failed_event_handler;
+	}
+
 	INIT_WORK(&card->event_work, card_event_handler);
 
 	st = rsxx_setup_dev(card);
@@ -676,6 +975,33 @@
 		if (st)
 			dev_crit(CARD_TO_DEV(card),
 				"Failed issuing card startup\n");
+		if (sync_start) {
+			sync_timeout = SYNC_START_TIMEOUT;
+
+			dev_info(CARD_TO_DEV(card),
+				 "Waiting for card to startup\n");
+
+			do {
+				ssleep(1);
+				sync_timeout--;
+
+				rsxx_get_card_state(card, &card->state);
+			} while (sync_timeout &&
+				(card->state == CARD_STATE_STARTING));
+
+			if (card->state == CARD_STATE_STARTING) {
+				dev_warn(CARD_TO_DEV(card),
+					 "Card startup timed out\n");
+				card->size8 = 0;
+			} else {
+				dev_info(CARD_TO_DEV(card),
+					"card state: %s\n",
+					rsxx_card_state_to_str(card->state));
+				st = rsxx_get_card_size8(card, &card->size8);
+				if (st)
+					card->size8 = 0;
+			}
+		}
 	} else if (card->state == CARD_STATE_GOOD ||
 		   card->state == CARD_STATE_RD_ONLY_FAULT) {
 		st = rsxx_get_card_size8(card, &card->size8);
@@ -685,12 +1011,21 @@
 
 	rsxx_attach_dev(card);
 
+	/************* Setup Debugfs *************/
+	rsxx_debugfs_dev_new(card);
+
 	return 0;
 
 failed_create_dev:
+	destroy_workqueue(card->event_wq);
+	card->event_wq = NULL;
+failed_event_handler:
 	rsxx_dma_destroy(card);
 failed_dma_setup:
 failed_compatiblity_check:
+	destroy_workqueue(card->creg_ctrl.creg_wq);
+	card->creg_ctrl.creg_wq = NULL;
+failed_creg_setup:
 	spin_lock_irq(&card->irq_lock);
 	rsxx_disable_ier_and_isr(card, CR_INTR_ALL);
 	spin_unlock_irq(&card->irq_lock);
@@ -756,6 +1091,8 @@
 	/* Prevent work_structs from re-queuing themselves. */
 	card->halt = 1;
 
+	debugfs_remove_recursive(card->debugfs_dir);
+
 	free_irq(dev->irq, card);
 
 	if (!force_legacy)
diff --git a/drivers/block/rsxx/cregs.c b/drivers/block/rsxx/cregs.c
index 4b5c020..926dce9 100644
--- a/drivers/block/rsxx/cregs.c
+++ b/drivers/block/rsxx/cregs.c
@@ -431,6 +431,15 @@
 	*hw_stat = completion.creg_status;
 
 	if (completion.st) {
+		/*
+		* This read is needed to verify that there has not been any
+		* extreme errors that might have occurred, i.e. EEH. The
+		* function iowrite32 will not detect EEH errors, so it is
+		* necessary that we recover if such an error is the reason
+		* for the timeout. This is a dummy read.
+		*/
+		ioread32(card->regmap + SCRATCH);
+
 		dev_warn(CARD_TO_DEV(card),
 			"creg command failed(%d x%08x)\n",
 			completion.st, addr);
@@ -727,6 +736,11 @@
 {
 	card->creg_ctrl.active_cmd = NULL;
 
+	card->creg_ctrl.creg_wq =
+			create_singlethread_workqueue(DRIVER_NAME"_creg");
+	if (!card->creg_ctrl.creg_wq)
+		return -ENOMEM;
+
 	INIT_WORK(&card->creg_ctrl.done_work, creg_cmd_done);
 	mutex_init(&card->creg_ctrl.reset_lock);
 	INIT_LIST_HEAD(&card->creg_ctrl.queue);
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 4346d17..d7af441 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -155,7 +155,8 @@
 		atomic_set(&meta->error, 1);
 
 	if (atomic_dec_and_test(&meta->pending_dmas)) {
-		disk_stats_complete(card, meta->bio, meta->start_time);
+		if (!card->eeh_state && card->gendisk)
+			disk_stats_complete(card, meta->bio, meta->start_time);
 
 		bio_endio(meta->bio, atomic_read(&meta->error) ? -EIO : 0);
 		kmem_cache_free(bio_meta_pool, meta);
@@ -170,6 +171,12 @@
 
 	might_sleep();
 
+	if (!card)
+		goto req_err;
+
+	if (bio->bi_sector + (bio->bi_size >> 9) > get_capacity(card->gendisk))
+		goto req_err;
+
 	if (unlikely(card->halt)) {
 		st = -EFAULT;
 		goto req_err;
@@ -196,7 +203,8 @@
 	atomic_set(&bio_meta->pending_dmas, 0);
 	bio_meta->start_time = jiffies;
 
-	disk_stats_start(card, bio);
+	if (!unlikely(card->halt))
+		disk_stats_start(card, bio);
 
 	dev_dbg(CARD_TO_DEV(card), "BIO[%c]: meta: %p addr8: x%llx size: %d\n",
 		 bio_data_dir(bio) ? 'W' : 'R', bio_meta,
@@ -225,24 +233,6 @@
 	return (pci_rev >= RSXX_DISCARD_SUPPORT);
 }
 
-static unsigned short rsxx_get_logical_block_size(
-					struct rsxx_cardinfo *card)
-{
-	u32 capabilities = 0;
-	int st;
-
-	st = rsxx_get_card_capabilities(card, &capabilities);
-	if (st)
-		dev_warn(CARD_TO_DEV(card),
-			"Failed reading card capabilities register\n");
-
-	/* Earlier firmware did not have support for 512 byte accesses */
-	if (capabilities & CARD_CAP_SUBPAGE_WRITES)
-		return 512;
-	else
-		return RSXX_HW_BLK_SIZE;
-}
-
 int rsxx_attach_dev(struct rsxx_cardinfo *card)
 {
 	mutex_lock(&card->dev_lock);
@@ -305,7 +295,7 @@
 		return -ENOMEM;
 	}
 
-	blk_size = rsxx_get_logical_block_size(card);
+	blk_size = card->config.data.block_size;
 
 	blk_queue_make_request(card->queue, rsxx_make_request);
 	blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY);
@@ -347,6 +337,7 @@
 	card->gendisk = NULL;
 
 	blk_cleanup_queue(card->queue);
+	card->queue->queuedata = NULL;
 	unregister_blkdev(card->major, DRIVER_NAME);
 }
 
diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c
index 0607513..bed32f1 100644
--- a/drivers/block/rsxx/dma.c
+++ b/drivers/block/rsxx/dma.c
@@ -245,6 +245,22 @@
 	kmem_cache_free(rsxx_dma_pool, dma);
 }
 
+int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl,
+			   struct list_head *q)
+{
+	struct rsxx_dma *dma;
+	struct rsxx_dma *tmp;
+	int cnt = 0;
+
+	list_for_each_entry_safe(dma, tmp, q, list) {
+		list_del(&dma->list);
+		rsxx_complete_dma(ctrl, dma, DMA_CANCELLED);
+		cnt++;
+	}
+
+	return cnt;
+}
+
 static void rsxx_requeue_dma(struct rsxx_dma_ctrl *ctrl,
 				 struct rsxx_dma *dma)
 {
@@ -252,9 +268,10 @@
 	 * Requeued DMAs go to the front of the queue so they are issued
 	 * first.
 	 */
-	spin_lock(&ctrl->queue_lock);
+	spin_lock_bh(&ctrl->queue_lock);
+	ctrl->stats.sw_q_depth++;
 	list_add(&dma->list, &ctrl->queue);
-	spin_unlock(&ctrl->queue_lock);
+	spin_unlock_bh(&ctrl->queue_lock);
 }
 
 static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl,
@@ -329,6 +346,7 @@
 static void dma_engine_stalled(unsigned long data)
 {
 	struct rsxx_dma_ctrl *ctrl = (struct rsxx_dma_ctrl *)data;
+	int cnt;
 
 	if (atomic_read(&ctrl->stats.hw_q_depth) == 0 ||
 	    unlikely(ctrl->card->eeh_state))
@@ -349,18 +367,28 @@
 			"DMA channel %d has stalled, faulting interface.\n",
 			ctrl->id);
 		ctrl->card->dma_fault = 1;
+
+		/* Clean up the DMA queue */
+		spin_lock(&ctrl->queue_lock);
+		cnt = rsxx_cleanup_dma_queue(ctrl, &ctrl->queue);
+		spin_unlock(&ctrl->queue_lock);
+
+		cnt += rsxx_dma_cancel(ctrl);
+
+		if (cnt)
+			dev_info(CARD_TO_DEV(ctrl->card),
+				"Freed %d queued DMAs on channel %d\n",
+				cnt, ctrl->id);
 	}
 }
 
-static void rsxx_issue_dmas(struct work_struct *work)
+static void rsxx_issue_dmas(struct rsxx_dma_ctrl *ctrl)
 {
-	struct rsxx_dma_ctrl *ctrl;
 	struct rsxx_dma *dma;
 	int tag;
 	int cmds_pending = 0;
 	struct hw_cmd *hw_cmd_buf;
 
-	ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work);
 	hw_cmd_buf = ctrl->cmd.buf;
 
 	if (unlikely(ctrl->card->halt) ||
@@ -368,22 +396,22 @@
 		return;
 
 	while (1) {
-		spin_lock(&ctrl->queue_lock);
+		spin_lock_bh(&ctrl->queue_lock);
 		if (list_empty(&ctrl->queue)) {
-			spin_unlock(&ctrl->queue_lock);
+			spin_unlock_bh(&ctrl->queue_lock);
 			break;
 		}
-		spin_unlock(&ctrl->queue_lock);
+		spin_unlock_bh(&ctrl->queue_lock);
 
 		tag = pop_tracker(ctrl->trackers);
 		if (tag == -1)
 			break;
 
-		spin_lock(&ctrl->queue_lock);
+		spin_lock_bh(&ctrl->queue_lock);
 		dma = list_entry(ctrl->queue.next, struct rsxx_dma, list);
 		list_del(&dma->list);
 		ctrl->stats.sw_q_depth--;
-		spin_unlock(&ctrl->queue_lock);
+		spin_unlock_bh(&ctrl->queue_lock);
 
 		/*
 		 * This will catch any DMAs that slipped in right before the
@@ -440,9 +468,8 @@
 	}
 }
 
-static void rsxx_dma_done(struct work_struct *work)
+static void rsxx_dma_done(struct rsxx_dma_ctrl *ctrl)
 {
-	struct rsxx_dma_ctrl *ctrl;
 	struct rsxx_dma *dma;
 	unsigned long flags;
 	u16 count;
@@ -450,7 +477,6 @@
 	u8 tag;
 	struct hw_status *hw_st_buf;
 
-	ctrl = container_of(work, struct rsxx_dma_ctrl, dma_done_work);
 	hw_st_buf = ctrl->status.buf;
 
 	if (unlikely(ctrl->card->halt) ||
@@ -520,33 +546,32 @@
 	rsxx_enable_ier(ctrl->card, CR_INTR_DMA(ctrl->id));
 	spin_unlock_irqrestore(&ctrl->card->irq_lock, flags);
 
-	spin_lock(&ctrl->queue_lock);
+	spin_lock_bh(&ctrl->queue_lock);
 	if (ctrl->stats.sw_q_depth)
 		queue_work(ctrl->issue_wq, &ctrl->issue_dma_work);
-	spin_unlock(&ctrl->queue_lock);
+	spin_unlock_bh(&ctrl->queue_lock);
 }
 
-static int rsxx_cleanup_dma_queue(struct rsxx_cardinfo *card,
-				      struct list_head *q)
+static void rsxx_schedule_issue(struct work_struct *work)
 {
-	struct rsxx_dma *dma;
-	struct rsxx_dma *tmp;
-	int cnt = 0;
+	struct rsxx_dma_ctrl *ctrl;
 
-	list_for_each_entry_safe(dma, tmp, q, list) {
-		list_del(&dma->list);
+	ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work);
 
-		if (dma->dma_addr)
-			pci_unmap_page(card->dev, dma->dma_addr,
-				       get_dma_size(dma),
-				       (dma->cmd == HW_CMD_BLK_WRITE) ?
-				       PCI_DMA_TODEVICE :
-				       PCI_DMA_FROMDEVICE);
-		kmem_cache_free(rsxx_dma_pool, dma);
-		cnt++;
-	}
+	mutex_lock(&ctrl->work_lock);
+	rsxx_issue_dmas(ctrl);
+	mutex_unlock(&ctrl->work_lock);
+}
 
-	return cnt;
+static void rsxx_schedule_done(struct work_struct *work)
+{
+	struct rsxx_dma_ctrl *ctrl;
+
+	ctrl = container_of(work, struct rsxx_dma_ctrl, dma_done_work);
+
+	mutex_lock(&ctrl->work_lock);
+	rsxx_dma_done(ctrl);
+	mutex_unlock(&ctrl->work_lock);
 }
 
 static int rsxx_queue_discard(struct rsxx_cardinfo *card,
@@ -698,10 +723,10 @@
 
 	for (i = 0; i < card->n_targets; i++) {
 		if (!list_empty(&dma_list[i])) {
-			spin_lock(&card->ctrl[i].queue_lock);
+			spin_lock_bh(&card->ctrl[i].queue_lock);
 			card->ctrl[i].stats.sw_q_depth += dma_cnt[i];
 			list_splice_tail(&dma_list[i], &card->ctrl[i].queue);
-			spin_unlock(&card->ctrl[i].queue_lock);
+			spin_unlock_bh(&card->ctrl[i].queue_lock);
 
 			queue_work(card->ctrl[i].issue_wq,
 				   &card->ctrl[i].issue_dma_work);
@@ -711,8 +736,11 @@
 	return 0;
 
 bvec_err:
-	for (i = 0; i < card->n_targets; i++)
-		rsxx_cleanup_dma_queue(card, &dma_list[i]);
+	for (i = 0; i < card->n_targets; i++) {
+		spin_lock_bh(&card->ctrl[i].queue_lock);
+		rsxx_cleanup_dma_queue(&card->ctrl[i], &dma_list[i]);
+		spin_unlock_bh(&card->ctrl[i].queue_lock);
+	}
 
 	return st;
 }
@@ -780,6 +808,7 @@
 	spin_lock_init(&ctrl->trackers->lock);
 
 	spin_lock_init(&ctrl->queue_lock);
+	mutex_init(&ctrl->work_lock);
 	INIT_LIST_HEAD(&ctrl->queue);
 
 	setup_timer(&ctrl->activity_timer, dma_engine_stalled,
@@ -793,8 +822,8 @@
 	if (!ctrl->done_wq)
 		return -ENOMEM;
 
-	INIT_WORK(&ctrl->issue_dma_work, rsxx_issue_dmas);
-	INIT_WORK(&ctrl->dma_done_work, rsxx_dma_done);
+	INIT_WORK(&ctrl->issue_dma_work, rsxx_schedule_issue);
+	INIT_WORK(&ctrl->dma_done_work, rsxx_schedule_done);
 
 	st = rsxx_hw_buffers_init(dev, ctrl);
 	if (st)
@@ -918,13 +947,30 @@
 	return st;
 }
 
+int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl)
+{
+	struct rsxx_dma *dma;
+	int i;
+	int cnt = 0;
+
+	/* Clean up issued DMAs */
+	for (i = 0; i < RSXX_MAX_OUTSTANDING_CMDS; i++) {
+		dma = get_tracker_dma(ctrl->trackers, i);
+		if (dma) {
+			atomic_dec(&ctrl->stats.hw_q_depth);
+			rsxx_complete_dma(ctrl, dma, DMA_CANCELLED);
+			push_tracker(ctrl->trackers, i);
+			cnt++;
+		}
+	}
+
+	return cnt;
+}
 
 void rsxx_dma_destroy(struct rsxx_cardinfo *card)
 {
 	struct rsxx_dma_ctrl *ctrl;
-	struct rsxx_dma *dma;
-	int i, j;
-	int cnt = 0;
+	int i;
 
 	for (i = 0; i < card->n_targets; i++) {
 		ctrl = &card->ctrl[i];
@@ -943,33 +989,11 @@
 			del_timer_sync(&ctrl->activity_timer);
 
 		/* Clean up the DMA queue */
-		spin_lock(&ctrl->queue_lock);
-		cnt = rsxx_cleanup_dma_queue(card, &ctrl->queue);
-		spin_unlock(&ctrl->queue_lock);
+		spin_lock_bh(&ctrl->queue_lock);
+		rsxx_cleanup_dma_queue(ctrl, &ctrl->queue);
+		spin_unlock_bh(&ctrl->queue_lock);
 
-		if (cnt)
-			dev_info(CARD_TO_DEV(card),
-				"Freed %d queued DMAs on channel %d\n",
-				cnt, i);
-
-		/* Clean up issued DMAs */
-		for (j = 0; j < RSXX_MAX_OUTSTANDING_CMDS; j++) {
-			dma = get_tracker_dma(ctrl->trackers, j);
-			if (dma) {
-				pci_unmap_page(card->dev, dma->dma_addr,
-					       get_dma_size(dma),
-					       (dma->cmd == HW_CMD_BLK_WRITE) ?
-					       PCI_DMA_TODEVICE :
-					       PCI_DMA_FROMDEVICE);
-				kmem_cache_free(rsxx_dma_pool, dma);
-				cnt++;
-			}
-		}
-
-		if (cnt)
-			dev_info(CARD_TO_DEV(card),
-				"Freed %d pending DMAs on channel %d\n",
-				cnt, i);
+		rsxx_dma_cancel(ctrl);
 
 		vfree(ctrl->trackers);
 
@@ -1013,7 +1037,7 @@
 			cnt++;
 		}
 
-		spin_lock(&card->ctrl[i].queue_lock);
+		spin_lock_bh(&card->ctrl[i].queue_lock);
 		list_splice(&issued_dmas[i], &card->ctrl[i].queue);
 
 		atomic_sub(cnt, &card->ctrl[i].stats.hw_q_depth);
@@ -1028,7 +1052,7 @@
 					       PCI_DMA_TODEVICE :
 					       PCI_DMA_FROMDEVICE);
 		}
-		spin_unlock(&card->ctrl[i].queue_lock);
+		spin_unlock_bh(&card->ctrl[i].queue_lock);
 	}
 
 	kfree(issued_dmas);
@@ -1036,30 +1060,13 @@
 	return 0;
 }
 
-void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card)
-{
-	struct rsxx_dma *dma;
-	struct rsxx_dma *tmp;
-	int i;
-
-	for (i = 0; i < card->n_targets; i++) {
-		spin_lock(&card->ctrl[i].queue_lock);
-		list_for_each_entry_safe(dma, tmp, &card->ctrl[i].queue, list) {
-			list_del(&dma->list);
-
-			rsxx_complete_dma(&card->ctrl[i], dma, DMA_CANCELLED);
-		}
-		spin_unlock(&card->ctrl[i].queue_lock);
-	}
-}
-
 int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card)
 {
 	struct rsxx_dma *dma;
 	int i;
 
 	for (i = 0; i < card->n_targets; i++) {
-		spin_lock(&card->ctrl[i].queue_lock);
+		spin_lock_bh(&card->ctrl[i].queue_lock);
 		list_for_each_entry(dma, &card->ctrl[i].queue, list) {
 			dma->dma_addr = pci_map_page(card->dev, dma->page,
 					dma->pg_off, get_dma_size(dma),
@@ -1067,12 +1074,12 @@
 					PCI_DMA_TODEVICE :
 					PCI_DMA_FROMDEVICE);
 			if (!dma->dma_addr) {
-				spin_unlock(&card->ctrl[i].queue_lock);
+				spin_unlock_bh(&card->ctrl[i].queue_lock);
 				kmem_cache_free(rsxx_dma_pool, dma);
 				return -ENOMEM;
 			}
 		}
-		spin_unlock(&card->ctrl[i].queue_lock);
+		spin_unlock_bh(&card->ctrl[i].queue_lock);
 	}
 
 	return 0;
diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h
index 382e8bf..5ad5055 100644
--- a/drivers/block/rsxx/rsxx_priv.h
+++ b/drivers/block/rsxx/rsxx_priv.h
@@ -39,6 +39,7 @@
 #include <linux/vmalloc.h>
 #include <linux/timer.h>
 #include <linux/ioctl.h>
+#include <linux/delay.h>
 
 #include "rsxx.h"
 #include "rsxx_cfg.h"
@@ -114,6 +115,7 @@
 	struct timer_list		activity_timer;
 	struct dma_tracker_list		*trackers;
 	struct rsxx_dma_stats		stats;
+	struct mutex			work_lock;
 };
 
 struct rsxx_cardinfo {
@@ -134,6 +136,7 @@
 		spinlock_t		lock;
 		bool			active;
 		struct creg_cmd		*active_cmd;
+		struct workqueue_struct	*creg_wq;
 		struct work_struct	done_work;
 		struct list_head	queue;
 		unsigned int		q_depth;
@@ -154,6 +157,7 @@
 		int buf_len;
 	} log;
 
+	struct workqueue_struct	*event_wq;
 	struct work_struct	event_work;
 	unsigned int		state;
 	u64			size8;
@@ -181,6 +185,8 @@
 
 	int			n_targets;
 	struct rsxx_dma_ctrl	*ctrl;
+
+	struct dentry		*debugfs_dir;
 };
 
 enum rsxx_pci_regmap {
@@ -283,6 +289,7 @@
 	CREG_ADD_CAPABILITIES		= 0x80001050,
 	CREG_ADD_LOG			= 0x80002000,
 	CREG_ADD_NUM_TARGETS		= 0x80003000,
+	CREG_ADD_CRAM			= 0xA0000000,
 	CREG_ADD_CONFIG			= 0xB0000000,
 };
 
@@ -372,6 +379,8 @@
 int rsxx_dma_setup(struct rsxx_cardinfo *card);
 void rsxx_dma_destroy(struct rsxx_cardinfo *card);
 int rsxx_dma_init(void);
+int rsxx_cleanup_dma_queue(struct rsxx_dma_ctrl *ctrl, struct list_head *q);
+int rsxx_dma_cancel(struct rsxx_dma_ctrl *ctrl);
 void rsxx_dma_cleanup(void);
 void rsxx_dma_queue_reset(struct rsxx_cardinfo *card);
 int rsxx_dma_configure(struct rsxx_cardinfo *card);
@@ -382,7 +391,6 @@
 			   void *cb_data);
 int rsxx_hw_buffers_init(struct pci_dev *dev, struct rsxx_dma_ctrl *ctrl);
 int rsxx_eeh_save_issued_dmas(struct rsxx_cardinfo *card);
-void rsxx_eeh_cancel_dmas(struct rsxx_cardinfo *card);
 int rsxx_eeh_remap_dmas(struct rsxx_cardinfo *card);
 
 /***** cregs.c *****/
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index dd5b2fe..bf4b9d2 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -50,110 +50,118 @@
 #include "common.h"
 
 /*
- * These are rather arbitrary. They are fairly large because adjacent requests
- * pulled from a communication ring are quite likely to end up being part of
- * the same scatter/gather request at the disc.
+ * Maximum number of unused free pages to keep in the internal buffer.
+ * Setting this to a value too low will reduce memory used in each backend,
+ * but can have a performance penalty.
  *
- * ** TRY INCREASING 'xen_blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
- *
- * This will increase the chances of being able to write whole tracks.
- * 64 should be enough to keep us competitive with Linux.
+ * A sane value is xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST, but can
+ * be set to a lower value that might degrade performance on some intensive
+ * IO workloads.
  */
-static int xen_blkif_reqs = 64;
-module_param_named(reqs, xen_blkif_reqs, int, 0);
-MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
+
+static int xen_blkif_max_buffer_pages = 1024;
+module_param_named(max_buffer_pages, xen_blkif_max_buffer_pages, int, 0644);
+MODULE_PARM_DESC(max_buffer_pages,
+"Maximum number of free pages to keep in each block backend buffer");
+
+/*
+ * Maximum number of grants to map persistently in blkback. For maximum
+ * performance this should be the total numbers of grants that can be used
+ * to fill the ring, but since this might become too high, specially with
+ * the use of indirect descriptors, we set it to a value that provides good
+ * performance without using too much memory.
+ *
+ * When the list of persistent grants is full we clean it up using a LRU
+ * algorithm.
+ */
+
+static int xen_blkif_max_pgrants = 1056;
+module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644);
+MODULE_PARM_DESC(max_persistent_grants,
+                 "Maximum number of grants to map persistently");
+
+/*
+ * The LRU mechanism to clean the lists of persistent grants needs to
+ * be executed periodically. The time interval between consecutive executions
+ * of the purge mechanism is set in ms.
+ */
+#define LRU_INTERVAL 100
+
+/*
+ * When the persistent grants list is full we will remove unused grants
+ * from the list. The percent number of grants to be removed at each LRU
+ * execution.
+ */
+#define LRU_PERCENT_CLEAN 5
 
 /* Run-time switchable: /sys/module/blkback/parameters/ */
 static unsigned int log_stats;
 module_param(log_stats, int, 0644);
 
-/*
- * Each outstanding request that we've passed to the lower device layers has a
- * 'pending_req' allocated to it. Each buffer_head that completes decrements
- * the pendcnt towards zero. When it hits zero, the specified domain has a
- * response queued for it, with the saved 'id' passed back.
- */
-struct pending_req {
-	struct xen_blkif	*blkif;
-	u64			id;
-	int			nr_pages;
-	atomic_t		pendcnt;
-	unsigned short		operation;
-	int			status;
-	struct list_head	free_list;
-	DECLARE_BITMAP(unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-};
-
 #define BLKBACK_INVALID_HANDLE (~0)
 
-struct xen_blkbk {
-	struct pending_req	*pending_reqs;
-	/* List of all 'pending_req' available */
-	struct list_head	pending_free;
-	/* And its spinlock. */
-	spinlock_t		pending_free_lock;
-	wait_queue_head_t	pending_free_wq;
-	/* The list of all pages that are available. */
-	struct page		**pending_pages;
-	/* And the grant handles that are available. */
-	grant_handle_t		*pending_grant_handles;
-};
+/* Number of free pages to remove on each call to free_xenballooned_pages */
+#define NUM_BATCH_FREE_PAGES 10
 
-static struct xen_blkbk *blkbk;
-
-/*
- * Maximum number of grant pages that can be mapped in blkback.
- * BLKIF_MAX_SEGMENTS_PER_REQUEST * RING_SIZE is the maximum number of
- * pages that blkback will persistently map.
- * Currently, this is:
- * RING_SIZE = 32 (for all known ring types)
- * BLKIF_MAX_SEGMENTS_PER_REQUEST = 11
- * sizeof(struct persistent_gnt) = 48
- * So the maximum memory used to store the grants is:
- * 32 * 11 * 48 = 16896 bytes
- */
-static inline unsigned int max_mapped_grant_pages(enum blkif_protocol protocol)
+static inline int get_free_page(struct xen_blkif *blkif, struct page **page)
 {
-	switch (protocol) {
-	case BLKIF_PROTOCOL_NATIVE:
-		return __CONST_RING_SIZE(blkif, PAGE_SIZE) *
-			   BLKIF_MAX_SEGMENTS_PER_REQUEST;
-	case BLKIF_PROTOCOL_X86_32:
-		return __CONST_RING_SIZE(blkif_x86_32, PAGE_SIZE) *
-			   BLKIF_MAX_SEGMENTS_PER_REQUEST;
-	case BLKIF_PROTOCOL_X86_64:
-		return __CONST_RING_SIZE(blkif_x86_64, PAGE_SIZE) *
-			   BLKIF_MAX_SEGMENTS_PER_REQUEST;
-	default:
-		BUG();
+	unsigned long flags;
+
+	spin_lock_irqsave(&blkif->free_pages_lock, flags);
+	if (list_empty(&blkif->free_pages)) {
+		BUG_ON(blkif->free_pages_num != 0);
+		spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+		return alloc_xenballooned_pages(1, page, false);
 	}
+	BUG_ON(blkif->free_pages_num == 0);
+	page[0] = list_first_entry(&blkif->free_pages, struct page, lru);
+	list_del(&page[0]->lru);
+	blkif->free_pages_num--;
+	spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+
 	return 0;
 }
 
-
-/*
- * Little helpful macro to figure out the index and virtual address of the
- * pending_pages[..]. For each 'pending_req' we have have up to
- * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through
- * 10 and would index in the pending_pages[..].
- */
-static inline int vaddr_pagenr(struct pending_req *req, int seg)
+static inline void put_free_pages(struct xen_blkif *blkif, struct page **page,
+                                  int num)
 {
-	return (req - blkbk->pending_reqs) *
-		BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&blkif->free_pages_lock, flags);
+	for (i = 0; i < num; i++)
+		list_add(&page[i]->lru, &blkif->free_pages);
+	blkif->free_pages_num += num;
+	spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
 }
 
-#define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)]
-
-static inline unsigned long vaddr(struct pending_req *req, int seg)
+static inline void shrink_free_pagepool(struct xen_blkif *blkif, int num)
 {
-	unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg));
-	return (unsigned long)pfn_to_kaddr(pfn);
+	/* Remove requested pages in batches of NUM_BATCH_FREE_PAGES */
+	struct page *page[NUM_BATCH_FREE_PAGES];
+	unsigned int num_pages = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&blkif->free_pages_lock, flags);
+	while (blkif->free_pages_num > num) {
+		BUG_ON(list_empty(&blkif->free_pages));
+		page[num_pages] = list_first_entry(&blkif->free_pages,
+		                                   struct page, lru);
+		list_del(&page[num_pages]->lru);
+		blkif->free_pages_num--;
+		if (++num_pages == NUM_BATCH_FREE_PAGES) {
+			spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+			free_xenballooned_pages(num_pages, page);
+			spin_lock_irqsave(&blkif->free_pages_lock, flags);
+			num_pages = 0;
+		}
+	}
+	spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
+	if (num_pages != 0)
+		free_xenballooned_pages(num_pages, page);
 }
 
-#define pending_handle(_req, _seg) \
-	(blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)])
-
+#define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page)))
 
 static int do_block_io_op(struct xen_blkif *blkif);
 static int dispatch_rw_block_io(struct xen_blkif *blkif,
@@ -170,13 +178,29 @@
 	     (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL)
 
 
-static void add_persistent_gnt(struct rb_root *root,
+/*
+ * We don't need locking around the persistent grant helpers
+ * because blkback uses a single-thread for each backed, so we
+ * can be sure that this functions will never be called recursively.
+ *
+ * The only exception to that is put_persistent_grant, that can be called
+ * from interrupt context (by xen_blkbk_unmap), so we have to use atomic
+ * bit operations to modify the flags of a persistent grant and to count
+ * the number of used grants.
+ */
+static int add_persistent_gnt(struct xen_blkif *blkif,
 			       struct persistent_gnt *persistent_gnt)
 {
-	struct rb_node **new = &(root->rb_node), *parent = NULL;
+	struct rb_node **new = NULL, *parent = NULL;
 	struct persistent_gnt *this;
 
+	if (blkif->persistent_gnt_c >= xen_blkif_max_pgrants) {
+		if (!blkif->vbd.overflow_max_grants)
+			blkif->vbd.overflow_max_grants = 1;
+		return -EBUSY;
+	}
 	/* Figure out where to put new node */
+	new = &blkif->persistent_gnts.rb_node;
 	while (*new) {
 		this = container_of(*new, struct persistent_gnt, node);
 
@@ -186,22 +210,28 @@
 		else if (persistent_gnt->gnt > this->gnt)
 			new = &((*new)->rb_right);
 		else {
-			pr_alert(DRV_PFX " trying to add a gref that's already in the tree\n");
-			BUG();
+			pr_alert_ratelimited(DRV_PFX " trying to add a gref that's already in the tree\n");
+			return -EINVAL;
 		}
 	}
 
+	bitmap_zero(persistent_gnt->flags, PERSISTENT_GNT_FLAGS_SIZE);
+	set_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
 	/* Add new node and rebalance tree. */
 	rb_link_node(&(persistent_gnt->node), parent, new);
-	rb_insert_color(&(persistent_gnt->node), root);
+	rb_insert_color(&(persistent_gnt->node), &blkif->persistent_gnts);
+	blkif->persistent_gnt_c++;
+	atomic_inc(&blkif->persistent_gnt_in_use);
+	return 0;
 }
 
-static struct persistent_gnt *get_persistent_gnt(struct rb_root *root,
+static struct persistent_gnt *get_persistent_gnt(struct xen_blkif *blkif,
 						 grant_ref_t gref)
 {
 	struct persistent_gnt *data;
-	struct rb_node *node = root->rb_node;
+	struct rb_node *node = NULL;
 
+	node = blkif->persistent_gnts.rb_node;
 	while (node) {
 		data = container_of(node, struct persistent_gnt, node);
 
@@ -209,13 +239,31 @@
 			node = node->rb_left;
 		else if (gref > data->gnt)
 			node = node->rb_right;
-		else
+		else {
+			if(test_bit(PERSISTENT_GNT_ACTIVE, data->flags)) {
+				pr_alert_ratelimited(DRV_PFX " requesting a grant already in use\n");
+				return NULL;
+			}
+			set_bit(PERSISTENT_GNT_ACTIVE, data->flags);
+			atomic_inc(&blkif->persistent_gnt_in_use);
 			return data;
+		}
 	}
 	return NULL;
 }
 
-static void free_persistent_gnts(struct rb_root *root, unsigned int num)
+static void put_persistent_gnt(struct xen_blkif *blkif,
+                               struct persistent_gnt *persistent_gnt)
+{
+	if(!test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
+	          pr_alert_ratelimited(DRV_PFX " freeing a grant already unused");
+	set_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
+	clear_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags);
+	atomic_dec(&blkif->persistent_gnt_in_use);
+}
+
+static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root,
+                                 unsigned int num)
 {
 	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
@@ -240,7 +288,7 @@
 			ret = gnttab_unmap_refs(unmap, NULL, pages,
 				segs_to_unmap);
 			BUG_ON(ret);
-			free_xenballooned_pages(segs_to_unmap, pages);
+			put_free_pages(blkif, pages, segs_to_unmap);
 			segs_to_unmap = 0;
 		}
 
@@ -251,21 +299,148 @@
 	BUG_ON(num != 0);
 }
 
+static void unmap_purged_grants(struct work_struct *work)
+{
+	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct persistent_gnt *persistent_gnt;
+	int ret, segs_to_unmap = 0;
+	struct xen_blkif *blkif = container_of(work, typeof(*blkif), persistent_purge_work);
+
+	while(!list_empty(&blkif->persistent_purge_list)) {
+		persistent_gnt = list_first_entry(&blkif->persistent_purge_list,
+		                                  struct persistent_gnt,
+		                                  remove_node);
+		list_del(&persistent_gnt->remove_node);
+
+		gnttab_set_unmap_op(&unmap[segs_to_unmap],
+			vaddr(persistent_gnt->page),
+			GNTMAP_host_map,
+			persistent_gnt->handle);
+
+		pages[segs_to_unmap] = persistent_gnt->page;
+
+		if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+			ret = gnttab_unmap_refs(unmap, NULL, pages,
+				segs_to_unmap);
+			BUG_ON(ret);
+			put_free_pages(blkif, pages, segs_to_unmap);
+			segs_to_unmap = 0;
+		}
+		kfree(persistent_gnt);
+	}
+	if (segs_to_unmap > 0) {
+		ret = gnttab_unmap_refs(unmap, NULL, pages, segs_to_unmap);
+		BUG_ON(ret);
+		put_free_pages(blkif, pages, segs_to_unmap);
+	}
+}
+
+static void purge_persistent_gnt(struct xen_blkif *blkif)
+{
+	struct persistent_gnt *persistent_gnt;
+	struct rb_node *n;
+	unsigned int num_clean, total;
+	bool scan_used = false, clean_used = false;
+	struct rb_root *root;
+
+	if (blkif->persistent_gnt_c < xen_blkif_max_pgrants ||
+	    (blkif->persistent_gnt_c == xen_blkif_max_pgrants &&
+	    !blkif->vbd.overflow_max_grants)) {
+		return;
+	}
+
+	if (work_pending(&blkif->persistent_purge_work)) {
+		pr_alert_ratelimited(DRV_PFX "Scheduled work from previous purge is still pending, cannot purge list\n");
+		return;
+	}
+
+	num_clean = (xen_blkif_max_pgrants / 100) * LRU_PERCENT_CLEAN;
+	num_clean = blkif->persistent_gnt_c - xen_blkif_max_pgrants + num_clean;
+	num_clean = min(blkif->persistent_gnt_c, num_clean);
+	if ((num_clean == 0) ||
+	    (num_clean > (blkif->persistent_gnt_c - atomic_read(&blkif->persistent_gnt_in_use))))
+		return;
+
+	/*
+	 * At this point, we can assure that there will be no calls
+         * to get_persistent_grant (because we are executing this code from
+         * xen_blkif_schedule), there can only be calls to put_persistent_gnt,
+         * which means that the number of currently used grants will go down,
+         * but never up, so we will always be able to remove the requested
+         * number of grants.
+	 */
+
+	total = num_clean;
+
+	pr_debug(DRV_PFX "Going to purge %u persistent grants\n", num_clean);
+
+	INIT_LIST_HEAD(&blkif->persistent_purge_list);
+	root = &blkif->persistent_gnts;
+purge_list:
+	foreach_grant_safe(persistent_gnt, n, root, node) {
+		BUG_ON(persistent_gnt->handle ==
+			BLKBACK_INVALID_HANDLE);
+
+		if (clean_used) {
+			clear_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags);
+			continue;
+		}
+
+		if (test_bit(PERSISTENT_GNT_ACTIVE, persistent_gnt->flags))
+			continue;
+		if (!scan_used &&
+		    (test_bit(PERSISTENT_GNT_WAS_ACTIVE, persistent_gnt->flags)))
+			continue;
+
+		rb_erase(&persistent_gnt->node, root);
+		list_add(&persistent_gnt->remove_node,
+		         &blkif->persistent_purge_list);
+		if (--num_clean == 0)
+			goto finished;
+	}
+	/*
+	 * If we get here it means we also need to start cleaning
+	 * grants that were used since last purge in order to cope
+	 * with the requested num
+	 */
+	if (!scan_used && !clean_used) {
+		pr_debug(DRV_PFX "Still missing %u purged frames\n", num_clean);
+		scan_used = true;
+		goto purge_list;
+	}
+finished:
+	if (!clean_used) {
+		pr_debug(DRV_PFX "Finished scanning for grants to clean, removing used flag\n");
+		clean_used = true;
+		goto purge_list;
+	}
+
+	blkif->persistent_gnt_c -= (total - num_clean);
+	blkif->vbd.overflow_max_grants = 0;
+
+	/* We can defer this work */
+	INIT_WORK(&blkif->persistent_purge_work, unmap_purged_grants);
+	schedule_work(&blkif->persistent_purge_work);
+	pr_debug(DRV_PFX "Purged %u/%u\n", (total - num_clean), total);
+	return;
+}
+
 /*
  * Retrieve from the 'pending_reqs' a free pending_req structure to be used.
  */
-static struct pending_req *alloc_req(void)
+static struct pending_req *alloc_req(struct xen_blkif *blkif)
 {
 	struct pending_req *req = NULL;
 	unsigned long flags;
 
-	spin_lock_irqsave(&blkbk->pending_free_lock, flags);
-	if (!list_empty(&blkbk->pending_free)) {
-		req = list_entry(blkbk->pending_free.next, struct pending_req,
+	spin_lock_irqsave(&blkif->pending_free_lock, flags);
+	if (!list_empty(&blkif->pending_free)) {
+		req = list_entry(blkif->pending_free.next, struct pending_req,
 				 free_list);
 		list_del(&req->free_list);
 	}
-	spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
+	spin_unlock_irqrestore(&blkif->pending_free_lock, flags);
 	return req;
 }
 
@@ -273,17 +448,17 @@
  * Return the 'pending_req' structure back to the freepool. We also
  * wake up the thread if it was waiting for a free page.
  */
-static void free_req(struct pending_req *req)
+static void free_req(struct xen_blkif *blkif, struct pending_req *req)
 {
 	unsigned long flags;
 	int was_empty;
 
-	spin_lock_irqsave(&blkbk->pending_free_lock, flags);
-	was_empty = list_empty(&blkbk->pending_free);
-	list_add(&req->free_list, &blkbk->pending_free);
-	spin_unlock_irqrestore(&blkbk->pending_free_lock, flags);
+	spin_lock_irqsave(&blkif->pending_free_lock, flags);
+	was_empty = list_empty(&blkif->pending_free);
+	list_add(&req->free_list, &blkif->pending_free);
+	spin_unlock_irqrestore(&blkif->pending_free_lock, flags);
 	if (was_empty)
-		wake_up(&blkbk->pending_free_wq);
+		wake_up(&blkif->pending_free_wq);
 }
 
 /*
@@ -382,10 +557,12 @@
 static void print_stats(struct xen_blkif *blkif)
 {
 	pr_info("xen-blkback (%s): oo %3llu  |  rd %4llu  |  wr %4llu  |  f %4llu"
-		 "  |  ds %4llu\n",
+		 "  |  ds %4llu | pg: %4u/%4d\n",
 		 current->comm, blkif->st_oo_req,
 		 blkif->st_rd_req, blkif->st_wr_req,
-		 blkif->st_f_req, blkif->st_ds_req);
+		 blkif->st_f_req, blkif->st_ds_req,
+		 blkif->persistent_gnt_c,
+		 xen_blkif_max_pgrants);
 	blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
 	blkif->st_rd_req = 0;
 	blkif->st_wr_req = 0;
@@ -397,6 +574,8 @@
 {
 	struct xen_blkif *blkif = arg;
 	struct xen_vbd *vbd = &blkif->vbd;
+	unsigned long timeout;
+	int ret;
 
 	xen_blkif_get(blkif);
 
@@ -406,27 +585,52 @@
 		if (unlikely(vbd->size != vbd_sz(vbd)))
 			xen_vbd_resize(blkif);
 
-		wait_event_interruptible(
+		timeout = msecs_to_jiffies(LRU_INTERVAL);
+
+		timeout = wait_event_interruptible_timeout(
 			blkif->wq,
-			blkif->waiting_reqs || kthread_should_stop());
-		wait_event_interruptible(
-			blkbk->pending_free_wq,
-			!list_empty(&blkbk->pending_free) ||
-			kthread_should_stop());
+			blkif->waiting_reqs || kthread_should_stop(),
+			timeout);
+		if (timeout == 0)
+			goto purge_gnt_list;
+		timeout = wait_event_interruptible_timeout(
+			blkif->pending_free_wq,
+			!list_empty(&blkif->pending_free) ||
+			kthread_should_stop(),
+			timeout);
+		if (timeout == 0)
+			goto purge_gnt_list;
 
 		blkif->waiting_reqs = 0;
 		smp_mb(); /* clear flag *before* checking for work */
 
-		if (do_block_io_op(blkif))
+		ret = do_block_io_op(blkif);
+		if (ret > 0)
 			blkif->waiting_reqs = 1;
+		if (ret == -EACCES)
+			wait_event_interruptible(blkif->shutdown_wq,
+						 kthread_should_stop());
+
+purge_gnt_list:
+		if (blkif->vbd.feature_gnt_persistent &&
+		    time_after(jiffies, blkif->next_lru)) {
+			purge_persistent_gnt(blkif);
+			blkif->next_lru = jiffies + msecs_to_jiffies(LRU_INTERVAL);
+		}
+
+		/* Shrink if we have more than xen_blkif_max_buffer_pages */
+		shrink_free_pagepool(blkif, xen_blkif_max_buffer_pages);
 
 		if (log_stats && time_after(jiffies, blkif->st_print))
 			print_stats(blkif);
 	}
 
+	/* Since we are shutting down remove all pages from the buffer */
+	shrink_free_pagepool(blkif, 0 /* All */);
+
 	/* Free all persistent grant pages */
 	if (!RB_EMPTY_ROOT(&blkif->persistent_gnts))
-		free_persistent_gnts(&blkif->persistent_gnts,
+		free_persistent_gnts(blkif, &blkif->persistent_gnts,
 			blkif->persistent_gnt_c);
 
 	BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
@@ -441,148 +645,98 @@
 	return 0;
 }
 
-struct seg_buf {
-	unsigned int offset;
-	unsigned int nsec;
-};
 /*
  * Unmap the grant references, and also remove the M2P over-rides
  * used in the 'pending_req'.
  */
-static void xen_blkbk_unmap(struct pending_req *req)
+static void xen_blkbk_unmap(struct xen_blkif *blkif,
+                            struct grant_page *pages[],
+                            int num)
 {
 	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-	struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct page *unmap_pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	unsigned int i, invcount = 0;
-	grant_handle_t handle;
 	int ret;
 
-	for (i = 0; i < req->nr_pages; i++) {
-		if (!test_bit(i, req->unmap_seg))
+	for (i = 0; i < num; i++) {
+		if (pages[i]->persistent_gnt != NULL) {
+			put_persistent_gnt(blkif, pages[i]->persistent_gnt);
 			continue;
-		handle = pending_handle(req, i);
-		if (handle == BLKBACK_INVALID_HANDLE)
+		}
+		if (pages[i]->handle == BLKBACK_INVALID_HANDLE)
 			continue;
-		gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i),
-				    GNTMAP_host_map, handle);
-		pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
-		pages[invcount] = virt_to_page(vaddr(req, i));
-		invcount++;
+		unmap_pages[invcount] = pages[i]->page;
+		gnttab_set_unmap_op(&unmap[invcount], vaddr(pages[i]->page),
+				    GNTMAP_host_map, pages[i]->handle);
+		pages[i]->handle = BLKBACK_INVALID_HANDLE;
+		if (++invcount == BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+			ret = gnttab_unmap_refs(unmap, NULL, unmap_pages,
+			                        invcount);
+			BUG_ON(ret);
+			put_free_pages(blkif, unmap_pages, invcount);
+			invcount = 0;
+		}
 	}
-
-	ret = gnttab_unmap_refs(unmap, NULL, pages, invcount);
-	BUG_ON(ret);
+	if (invcount) {
+		ret = gnttab_unmap_refs(unmap, NULL, unmap_pages, invcount);
+		BUG_ON(ret);
+		put_free_pages(blkif, unmap_pages, invcount);
+	}
 }
 
-static int xen_blkbk_map(struct blkif_request *req,
-			 struct pending_req *pending_req,
-			 struct seg_buf seg[],
-			 struct page *pages[])
+static int xen_blkbk_map(struct xen_blkif *blkif,
+			 struct grant_page *pages[],
+			 int num, bool ro)
 {
 	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-	struct persistent_gnt *persistent_gnts[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	struct page *pages_to_gnt[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	struct persistent_gnt *persistent_gnt = NULL;
-	struct xen_blkif *blkif = pending_req->blkif;
 	phys_addr_t addr = 0;
-	int i, j;
-	bool new_map;
-	int nseg = req->u.rw.nr_segments;
+	int i, seg_idx, new_map_idx;
 	int segs_to_map = 0;
 	int ret = 0;
+	int last_map = 0, map_until = 0;
 	int use_persistent_gnts;
 
 	use_persistent_gnts = (blkif->vbd.feature_gnt_persistent);
 
-	BUG_ON(blkif->persistent_gnt_c >
-		   max_mapped_grant_pages(pending_req->blkif->blk_protocol));
-
 	/*
 	 * Fill out preq.nr_sects with proper amount of sectors, and setup
 	 * assign map[..] with the PFN of the page in our domain with the
 	 * corresponding grant reference for each page.
 	 */
-	for (i = 0; i < nseg; i++) {
+again:
+	for (i = map_until; i < num; i++) {
 		uint32_t flags;
 
 		if (use_persistent_gnts)
 			persistent_gnt = get_persistent_gnt(
-				&blkif->persistent_gnts,
-				req->u.rw.seg[i].gref);
+				blkif,
+				pages[i]->gref);
 
 		if (persistent_gnt) {
 			/*
 			 * We are using persistent grants and
 			 * the grant is already mapped
 			 */
-			new_map = false;
-		} else if (use_persistent_gnts &&
-			   blkif->persistent_gnt_c <
-			   max_mapped_grant_pages(blkif->blk_protocol)) {
-			/*
-			 * We are using persistent grants, the grant is
-			 * not mapped but we have room for it
-			 */
-			new_map = true;
-			persistent_gnt = kmalloc(
-				sizeof(struct persistent_gnt),
-				GFP_KERNEL);
-			if (!persistent_gnt)
-				return -ENOMEM;
-			if (alloc_xenballooned_pages(1, &persistent_gnt->page,
-			    false)) {
-				kfree(persistent_gnt);
-				return -ENOMEM;
-			}
-			persistent_gnt->gnt = req->u.rw.seg[i].gref;
-			persistent_gnt->handle = BLKBACK_INVALID_HANDLE;
-
-			pages_to_gnt[segs_to_map] =
-				persistent_gnt->page;
-			addr = (unsigned long) pfn_to_kaddr(
-				page_to_pfn(persistent_gnt->page));
-
-			add_persistent_gnt(&blkif->persistent_gnts,
-				persistent_gnt);
-			blkif->persistent_gnt_c++;
-			pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n",
-				 persistent_gnt->gnt, blkif->persistent_gnt_c,
-				 max_mapped_grant_pages(blkif->blk_protocol));
+			pages[i]->page = persistent_gnt->page;
+			pages[i]->persistent_gnt = persistent_gnt;
 		} else {
-			/*
-			 * We are either using persistent grants and
-			 * hit the maximum limit of grants mapped,
-			 * or we are not using persistent grants.
-			 */
-			if (use_persistent_gnts &&
-				!blkif->vbd.overflow_max_grants) {
-				blkif->vbd.overflow_max_grants = 1;
-				pr_alert(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n",
-					 blkif->domid, blkif->vbd.handle);
-			}
-			new_map = true;
-			pages[i] = blkbk->pending_page(pending_req, i);
-			addr = vaddr(pending_req, i);
-			pages_to_gnt[segs_to_map] =
-				blkbk->pending_page(pending_req, i);
-		}
-
-		if (persistent_gnt) {
-			pages[i] = persistent_gnt->page;
-			persistent_gnts[i] = persistent_gnt;
-		} else {
-			persistent_gnts[i] = NULL;
-		}
-
-		if (new_map) {
+			if (get_free_page(blkif, &pages[i]->page))
+				goto out_of_memory;
+			addr = vaddr(pages[i]->page);
+			pages_to_gnt[segs_to_map] = pages[i]->page;
+			pages[i]->persistent_gnt = NULL;
 			flags = GNTMAP_host_map;
-			if (!persistent_gnt &&
-			    (pending_req->operation != BLKIF_OP_READ))
+			if (!use_persistent_gnts && ro)
 				flags |= GNTMAP_readonly;
 			gnttab_set_map_op(&map[segs_to_map++], addr,
-					  flags, req->u.rw.seg[i].gref,
+					  flags, pages[i]->gref,
 					  blkif->domid);
 		}
+		map_until = i + 1;
+		if (segs_to_map == BLKIF_MAX_SEGMENTS_PER_REQUEST)
+			break;
 	}
 
 	if (segs_to_map) {
@@ -595,49 +749,133 @@
 	 * so that when we access vaddr(pending_req,i) it has the contents of
 	 * the page from the other domain.
 	 */
-	bitmap_zero(pending_req->unmap_seg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-	for (i = 0, j = 0; i < nseg; i++) {
-		if (!persistent_gnts[i] ||
-		    persistent_gnts[i]->handle == BLKBACK_INVALID_HANDLE) {
+	for (seg_idx = last_map, new_map_idx = 0; seg_idx < map_until; seg_idx++) {
+		if (!pages[seg_idx]->persistent_gnt) {
 			/* This is a newly mapped grant */
-			BUG_ON(j >= segs_to_map);
-			if (unlikely(map[j].status != 0)) {
+			BUG_ON(new_map_idx >= segs_to_map);
+			if (unlikely(map[new_map_idx].status != 0)) {
 				pr_debug(DRV_PFX "invalid buffer -- could not remap it\n");
-				map[j].handle = BLKBACK_INVALID_HANDLE;
+				pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE;
 				ret |= 1;
-				if (persistent_gnts[i]) {
-					rb_erase(&persistent_gnts[i]->node,
-						 &blkif->persistent_gnts);
-					blkif->persistent_gnt_c--;
-					kfree(persistent_gnts[i]);
-					persistent_gnts[i] = NULL;
-				}
+				goto next;
 			}
-		}
-		if (persistent_gnts[i]) {
-			if (persistent_gnts[i]->handle ==
-			    BLKBACK_INVALID_HANDLE) {
-				/*
-				 * If this is a new persistent grant
-				 * save the handler
-				 */
-				persistent_gnts[i]->handle = map[j++].handle;
-			}
-			pending_handle(pending_req, i) =
-				persistent_gnts[i]->handle;
-
-			if (ret)
-				continue;
+			pages[seg_idx]->handle = map[new_map_idx].handle;
 		} else {
-			pending_handle(pending_req, i) = map[j++].handle;
-			bitmap_set(pending_req->unmap_seg, i, 1);
-
-			if (ret)
-				continue;
+			continue;
 		}
-		seg[i].offset = (req->u.rw.seg[i].first_sect << 9);
+		if (use_persistent_gnts &&
+		    blkif->persistent_gnt_c < xen_blkif_max_pgrants) {
+			/*
+			 * We are using persistent grants, the grant is
+			 * not mapped but we might have room for it.
+			 */
+			persistent_gnt = kmalloc(sizeof(struct persistent_gnt),
+				                 GFP_KERNEL);
+			if (!persistent_gnt) {
+				/*
+				 * If we don't have enough memory to
+				 * allocate the persistent_gnt struct
+				 * map this grant non-persistenly
+				 */
+				goto next;
+			}
+			persistent_gnt->gnt = map[new_map_idx].ref;
+			persistent_gnt->handle = map[new_map_idx].handle;
+			persistent_gnt->page = pages[seg_idx]->page;
+			if (add_persistent_gnt(blkif,
+			                       persistent_gnt)) {
+				kfree(persistent_gnt);
+				persistent_gnt = NULL;
+				goto next;
+			}
+			pages[seg_idx]->persistent_gnt = persistent_gnt;
+			pr_debug(DRV_PFX " grant %u added to the tree of persistent grants, using %u/%u\n",
+				 persistent_gnt->gnt, blkif->persistent_gnt_c,
+				 xen_blkif_max_pgrants);
+			goto next;
+		}
+		if (use_persistent_gnts && !blkif->vbd.overflow_max_grants) {
+			blkif->vbd.overflow_max_grants = 1;
+			pr_debug(DRV_PFX " domain %u, device %#x is using maximum number of persistent grants\n",
+			         blkif->domid, blkif->vbd.handle);
+		}
+		/*
+		 * We could not map this grant persistently, so use it as
+		 * a non-persistent grant.
+		 */
+next:
+		new_map_idx++;
 	}
+	segs_to_map = 0;
+	last_map = map_until;
+	if (map_until != num)
+		goto again;
+
 	return ret;
+
+out_of_memory:
+	pr_alert(DRV_PFX "%s: out of memory\n", __func__);
+	put_free_pages(blkif, pages_to_gnt, segs_to_map);
+	return -ENOMEM;
+}
+
+static int xen_blkbk_map_seg(struct pending_req *pending_req)
+{
+	int rc;
+
+	rc = xen_blkbk_map(pending_req->blkif, pending_req->segments,
+			   pending_req->nr_pages,
+	                   (pending_req->operation != BLKIF_OP_READ));
+
+	return rc;
+}
+
+static int xen_blkbk_parse_indirect(struct blkif_request *req,
+				    struct pending_req *pending_req,
+				    struct seg_buf seg[],
+				    struct phys_req *preq)
+{
+	struct grant_page **pages = pending_req->indirect_pages;
+	struct xen_blkif *blkif = pending_req->blkif;
+	int indirect_grefs, rc, n, nseg, i;
+	struct blkif_request_segment_aligned *segments = NULL;
+
+	nseg = pending_req->nr_pages;
+	indirect_grefs = INDIRECT_PAGES(nseg);
+	BUG_ON(indirect_grefs > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
+
+	for (i = 0; i < indirect_grefs; i++)
+		pages[i]->gref = req->u.indirect.indirect_grefs[i];
+
+	rc = xen_blkbk_map(blkif, pages, indirect_grefs, true);
+	if (rc)
+		goto unmap;
+
+	for (n = 0, i = 0; n < nseg; n++) {
+		if ((n % SEGS_PER_INDIRECT_FRAME) == 0) {
+			/* Map indirect segments */
+			if (segments)
+				kunmap_atomic(segments);
+			segments = kmap_atomic(pages[n/SEGS_PER_INDIRECT_FRAME]->page);
+		}
+		i = n % SEGS_PER_INDIRECT_FRAME;
+		pending_req->segments[n]->gref = segments[i].gref;
+		seg[n].nsec = segments[i].last_sect -
+			segments[i].first_sect + 1;
+		seg[n].offset = (segments[i].first_sect << 9);
+		if ((segments[i].last_sect >= (PAGE_SIZE >> 9)) ||
+		    (segments[i].last_sect < segments[i].first_sect)) {
+			rc = -EINVAL;
+			goto unmap;
+		}
+		preq->nr_sects += seg[n].nsec;
+	}
+
+unmap:
+	if (segments)
+		kunmap_atomic(segments);
+	xen_blkbk_unmap(blkif, pages, indirect_grefs);
+	return rc;
 }
 
 static int dispatch_discard_io(struct xen_blkif *blkif,
@@ -647,7 +885,18 @@
 	int status = BLKIF_RSP_OKAY;
 	struct block_device *bdev = blkif->vbd.bdev;
 	unsigned long secure;
+	struct phys_req preq;
 
+	preq.sector_number = req->u.discard.sector_number;
+	preq.nr_sects      = req->u.discard.nr_sectors;
+
+	err = xen_vbd_translate(&preq, blkif, WRITE);
+	if (err) {
+		pr_warn(DRV_PFX "access denied: DISCARD [%llu->%llu] on dev=%04x\n",
+			preq.sector_number,
+			preq.sector_number + preq.nr_sects, blkif->vbd.pdevice);
+		goto fail_response;
+	}
 	blkif->st_ds_req++;
 
 	xen_blkif_get(blkif);
@@ -658,7 +907,7 @@
 	err = blkdev_issue_discard(bdev, req->u.discard.sector_number,
 				   req->u.discard.nr_sectors,
 				   GFP_KERNEL, secure);
-
+fail_response:
 	if (err == -EOPNOTSUPP) {
 		pr_debug(DRV_PFX "discard op failed, not supported\n");
 		status = BLKIF_RSP_EOPNOTSUPP;
@@ -674,7 +923,7 @@
 			     struct blkif_request *req,
 			     struct pending_req *pending_req)
 {
-	free_req(pending_req);
+	free_req(blkif, pending_req);
 	make_response(blkif, req->u.other.id, req->operation,
 		      BLKIF_RSP_EOPNOTSUPP);
 	return -EIO;
@@ -726,7 +975,9 @@
 	 * the proper response on the ring.
 	 */
 	if (atomic_dec_and_test(&pending_req->pendcnt)) {
-		xen_blkbk_unmap(pending_req);
+		xen_blkbk_unmap(pending_req->blkif,
+		                pending_req->segments,
+		                pending_req->nr_pages);
 		make_response(pending_req->blkif, pending_req->id,
 			      pending_req->operation, pending_req->status);
 		xen_blkif_put(pending_req->blkif);
@@ -734,7 +985,7 @@
 			if (atomic_read(&pending_req->blkif->drain))
 				complete(&pending_req->blkif->drain_complete);
 		}
-		free_req(pending_req);
+		free_req(pending_req->blkif, pending_req);
 	}
 }
 
@@ -767,6 +1018,12 @@
 	rp = blk_rings->common.sring->req_prod;
 	rmb(); /* Ensure we see queued requests up to 'rp'. */
 
+	if (RING_REQUEST_PROD_OVERFLOW(&blk_rings->common, rp)) {
+		rc = blk_rings->common.rsp_prod_pvt;
+		pr_warn(DRV_PFX "Frontend provided bogus ring requests (%d - %d = %d). Halting ring processing on dev=%04x\n",
+			rp, rc, rp - rc, blkif->vbd.pdevice);
+		return -EACCES;
+	}
 	while (rc != rp) {
 
 		if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc))
@@ -777,7 +1034,7 @@
 			break;
 		}
 
-		pending_req = alloc_req();
+		pending_req = alloc_req(blkif);
 		if (NULL == pending_req) {
 			blkif->st_oo_req++;
 			more_to_do = 1;
@@ -807,11 +1064,12 @@
 		case BLKIF_OP_WRITE:
 		case BLKIF_OP_WRITE_BARRIER:
 		case BLKIF_OP_FLUSH_DISKCACHE:
+		case BLKIF_OP_INDIRECT:
 			if (dispatch_rw_block_io(blkif, &req, pending_req))
 				goto done;
 			break;
 		case BLKIF_OP_DISCARD:
-			free_req(pending_req);
+			free_req(blkif, pending_req);
 			if (dispatch_discard_io(blkif, &req))
 				goto done;
 			break;
@@ -853,17 +1111,28 @@
 				struct pending_req *pending_req)
 {
 	struct phys_req preq;
-	struct seg_buf seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct seg_buf *seg = pending_req->seg;
 	unsigned int nseg;
 	struct bio *bio = NULL;
-	struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct bio **biolist = pending_req->biolist;
 	int i, nbio = 0;
 	int operation;
 	struct blk_plug plug;
 	bool drain = false;
-	struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct grant_page **pages = pending_req->segments;
+	unsigned short req_operation;
 
-	switch (req->operation) {
+	req_operation = req->operation == BLKIF_OP_INDIRECT ?
+			req->u.indirect.indirect_op : req->operation;
+	if ((req->operation == BLKIF_OP_INDIRECT) &&
+	    (req_operation != BLKIF_OP_READ) &&
+	    (req_operation != BLKIF_OP_WRITE)) {
+		pr_debug(DRV_PFX "Invalid indirect operation (%u)\n",
+			 req_operation);
+		goto fail_response;
+	}
+
+	switch (req_operation) {
 	case BLKIF_OP_READ:
 		blkif->st_rd_req++;
 		operation = READ;
@@ -885,33 +1154,47 @@
 	}
 
 	/* Check that the number of segments is sane. */
-	nseg = req->u.rw.nr_segments;
+	nseg = req->operation == BLKIF_OP_INDIRECT ?
+	       req->u.indirect.nr_segments : req->u.rw.nr_segments;
 
 	if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
-	    unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
+	    unlikely((req->operation != BLKIF_OP_INDIRECT) &&
+		     (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) ||
+	    unlikely((req->operation == BLKIF_OP_INDIRECT) &&
+		     (nseg > MAX_INDIRECT_SEGMENTS))) {
 		pr_debug(DRV_PFX "Bad number of segments in request (%d)\n",
 			 nseg);
 		/* Haven't submitted any bio's yet. */
 		goto fail_response;
 	}
 
-	preq.sector_number = req->u.rw.sector_number;
 	preq.nr_sects      = 0;
 
 	pending_req->blkif     = blkif;
 	pending_req->id        = req->u.rw.id;
-	pending_req->operation = req->operation;
+	pending_req->operation = req_operation;
 	pending_req->status    = BLKIF_RSP_OKAY;
 	pending_req->nr_pages  = nseg;
 
-	for (i = 0; i < nseg; i++) {
-		seg[i].nsec = req->u.rw.seg[i].last_sect -
-			req->u.rw.seg[i].first_sect + 1;
-		if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
-		    (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect))
+	if (req->operation != BLKIF_OP_INDIRECT) {
+		preq.dev               = req->u.rw.handle;
+		preq.sector_number     = req->u.rw.sector_number;
+		for (i = 0; i < nseg; i++) {
+			pages[i]->gref = req->u.rw.seg[i].gref;
+			seg[i].nsec = req->u.rw.seg[i].last_sect -
+				req->u.rw.seg[i].first_sect + 1;
+			seg[i].offset = (req->u.rw.seg[i].first_sect << 9);
+			if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
+			    (req->u.rw.seg[i].last_sect <
+			     req->u.rw.seg[i].first_sect))
+				goto fail_response;
+			preq.nr_sects += seg[i].nsec;
+		}
+	} else {
+		preq.dev               = req->u.indirect.handle;
+		preq.sector_number     = req->u.indirect.sector_number;
+		if (xen_blkbk_parse_indirect(req, pending_req, seg, &preq))
 			goto fail_response;
-		preq.nr_sects += seg[i].nsec;
-
 	}
 
 	if (xen_vbd_translate(&preq, blkif, operation) != 0) {
@@ -948,7 +1231,7 @@
 	 * the hypercall to unmap the grants - that is all done in
 	 * xen_blkbk_unmap.
 	 */
-	if (xen_blkbk_map(req, pending_req, seg, pages))
+	if (xen_blkbk_map_seg(pending_req))
 		goto fail_flush;
 
 	/*
@@ -960,11 +1243,12 @@
 	for (i = 0; i < nseg; i++) {
 		while ((bio == NULL) ||
 		       (bio_add_page(bio,
-				     pages[i],
+				     pages[i]->page,
 				     seg[i].nsec << 9,
 				     seg[i].offset) == 0)) {
 
-			bio = bio_alloc(GFP_KERNEL, nseg-i);
+			int nr_iovecs = min_t(int, (nseg-i), BIO_MAX_PAGES);
+			bio = bio_alloc(GFP_KERNEL, nr_iovecs);
 			if (unlikely(bio == NULL))
 				goto fail_put_bio;
 
@@ -1009,11 +1293,12 @@
 	return 0;
 
  fail_flush:
-	xen_blkbk_unmap(pending_req);
+	xen_blkbk_unmap(blkif, pending_req->segments,
+	                pending_req->nr_pages);
  fail_response:
 	/* Haven't submitted any bio's yet. */
-	make_response(blkif, req->u.rw.id, req->operation, BLKIF_RSP_ERROR);
-	free_req(pending_req);
+	make_response(blkif, req->u.rw.id, req_operation, BLKIF_RSP_ERROR);
+	free_req(blkif, pending_req);
 	msleep(1); /* back off a bit */
 	return -EIO;
 
@@ -1070,73 +1355,20 @@
 
 static int __init xen_blkif_init(void)
 {
-	int i, mmap_pages;
 	int rc = 0;
 
 	if (!xen_domain())
 		return -ENODEV;
 
-	blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL);
-	if (!blkbk) {
-		pr_alert(DRV_PFX "%s: out of memory!\n", __func__);
-		return -ENOMEM;
-	}
-
-	mmap_pages = xen_blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
-
-	blkbk->pending_reqs          = kzalloc(sizeof(blkbk->pending_reqs[0]) *
-					xen_blkif_reqs, GFP_KERNEL);
-	blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) *
-					mmap_pages, GFP_KERNEL);
-	blkbk->pending_pages         = kzalloc(sizeof(blkbk->pending_pages[0]) *
-					mmap_pages, GFP_KERNEL);
-
-	if (!blkbk->pending_reqs || !blkbk->pending_grant_handles ||
-	    !blkbk->pending_pages) {
-		rc = -ENOMEM;
-		goto out_of_memory;
-	}
-
-	for (i = 0; i < mmap_pages; i++) {
-		blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
-		blkbk->pending_pages[i] = alloc_page(GFP_KERNEL);
-		if (blkbk->pending_pages[i] == NULL) {
-			rc = -ENOMEM;
-			goto out_of_memory;
-		}
-	}
 	rc = xen_blkif_interface_init();
 	if (rc)
 		goto failed_init;
 
-	INIT_LIST_HEAD(&blkbk->pending_free);
-	spin_lock_init(&blkbk->pending_free_lock);
-	init_waitqueue_head(&blkbk->pending_free_wq);
-
-	for (i = 0; i < xen_blkif_reqs; i++)
-		list_add_tail(&blkbk->pending_reqs[i].free_list,
-			      &blkbk->pending_free);
-
 	rc = xen_blkif_xenbus_init();
 	if (rc)
 		goto failed_init;
 
-	return 0;
-
- out_of_memory:
-	pr_alert(DRV_PFX "%s: out of memory\n", __func__);
  failed_init:
-	kfree(blkbk->pending_reqs);
-	kfree(blkbk->pending_grant_handles);
-	if (blkbk->pending_pages) {
-		for (i = 0; i < mmap_pages; i++) {
-			if (blkbk->pending_pages[i])
-				__free_page(blkbk->pending_pages[i]);
-		}
-		kfree(blkbk->pending_pages);
-	}
-	kfree(blkbk);
-	blkbk = NULL;
 	return rc;
 }
 
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 60103e2..8d88075 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -50,6 +50,19 @@
 		 __func__, __LINE__, ##args)
 
 
+/*
+ * This is the maximum number of segments that would be allowed in indirect
+ * requests. This value will also be passed to the frontend.
+ */
+#define MAX_INDIRECT_SEGMENTS 256
+
+#define SEGS_PER_INDIRECT_FRAME \
+	(PAGE_SIZE/sizeof(struct blkif_request_segment_aligned))
+#define MAX_INDIRECT_PAGES \
+	((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
+#define INDIRECT_PAGES(_segs) \
+	((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
+
 /* Not a real protocol.  Used to generate ring structs which contain
  * the elements common to all protocols only.  This way we get a
  * compiler-checkable way to use common struct elements, so we can
@@ -83,12 +96,31 @@
 	uint64_t       id;           /* private guest value, echoed in resp  */
 } __attribute__((__packed__));
 
+struct blkif_x86_32_request_indirect {
+	uint8_t        indirect_op;
+	uint16_t       nr_segments;
+	uint64_t       id;
+	blkif_sector_t sector_number;
+	blkif_vdev_t   handle;
+	uint16_t       _pad1;
+	grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
+	/*
+	 * The maximum number of indirect segments (and pages) that will
+	 * be used is determined by MAX_INDIRECT_SEGMENTS, this value
+	 * is also exported to the guest (via xenstore
+	 * feature-max-indirect-segments entry), so the frontend knows how
+	 * many indirect segments the backend supports.
+	 */
+	uint64_t       _pad2;        /* make it 64 byte aligned */
+} __attribute__((__packed__));
+
 struct blkif_x86_32_request {
 	uint8_t        operation;    /* BLKIF_OP_???                         */
 	union {
 		struct blkif_x86_32_request_rw rw;
 		struct blkif_x86_32_request_discard discard;
 		struct blkif_x86_32_request_other other;
+		struct blkif_x86_32_request_indirect indirect;
 	} u;
 } __attribute__((__packed__));
 
@@ -127,12 +159,32 @@
 	uint64_t       id;           /* private guest value, echoed in resp  */
 } __attribute__((__packed__));
 
+struct blkif_x86_64_request_indirect {
+	uint8_t        indirect_op;
+	uint16_t       nr_segments;
+	uint32_t       _pad1;        /* offsetof(blkif_..,u.indirect.id)==8   */
+	uint64_t       id;
+	blkif_sector_t sector_number;
+	blkif_vdev_t   handle;
+	uint16_t       _pad2;
+	grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
+	/*
+	 * The maximum number of indirect segments (and pages) that will
+	 * be used is determined by MAX_INDIRECT_SEGMENTS, this value
+	 * is also exported to the guest (via xenstore
+	 * feature-max-indirect-segments entry), so the frontend knows how
+	 * many indirect segments the backend supports.
+	 */
+	uint32_t       _pad3;        /* make it 64 byte aligned */
+} __attribute__((__packed__));
+
 struct blkif_x86_64_request {
 	uint8_t        operation;    /* BLKIF_OP_???                         */
 	union {
 		struct blkif_x86_64_request_rw rw;
 		struct blkif_x86_64_request_discard discard;
 		struct blkif_x86_64_request_other other;
+		struct blkif_x86_64_request_indirect indirect;
 	} u;
 } __attribute__((__packed__));
 
@@ -182,12 +234,26 @@
 
 struct backend_info;
 
+/* Number of available flags */
+#define PERSISTENT_GNT_FLAGS_SIZE	2
+/* This persistent grant is currently in use */
+#define PERSISTENT_GNT_ACTIVE		0
+/*
+ * This persistent grant has been used, this flag is set when we remove the
+ * PERSISTENT_GNT_ACTIVE, to know that this grant has been used recently.
+ */
+#define PERSISTENT_GNT_WAS_ACTIVE	1
+
+/* Number of requests that we can fit in a ring */
+#define XEN_BLKIF_REQS			32
 
 struct persistent_gnt {
 	struct page *page;
 	grant_ref_t gnt;
 	grant_handle_t handle;
+	DECLARE_BITMAP(flags, PERSISTENT_GNT_FLAGS_SIZE);
 	struct rb_node node;
+	struct list_head remove_node;
 };
 
 struct xen_blkif {
@@ -219,6 +285,23 @@
 	/* tree to store persistent grants */
 	struct rb_root		persistent_gnts;
 	unsigned int		persistent_gnt_c;
+	atomic_t		persistent_gnt_in_use;
+	unsigned long           next_lru;
+
+	/* used by the kworker that offload work from the persistent purge */
+	struct list_head	persistent_purge_list;
+	struct work_struct	persistent_purge_work;
+
+	/* buffer of free pages to map grant refs */
+	spinlock_t		free_pages_lock;
+	int			free_pages_num;
+	struct list_head	free_pages;
+
+	/* List of all 'pending_req' available */
+	struct list_head	pending_free;
+	/* And its spinlock. */
+	spinlock_t		pending_free_lock;
+	wait_queue_head_t	pending_free_wq;
 
 	/* statistics */
 	unsigned long		st_print;
@@ -231,6 +314,41 @@
 	unsigned long long			st_wr_sect;
 
 	wait_queue_head_t	waiting_to_free;
+	/* Thread shutdown wait queue. */
+	wait_queue_head_t	shutdown_wq;
+};
+
+struct seg_buf {
+	unsigned long offset;
+	unsigned int nsec;
+};
+
+struct grant_page {
+	struct page 		*page;
+	struct persistent_gnt	*persistent_gnt;
+	grant_handle_t		handle;
+	grant_ref_t		gref;
+};
+
+/*
+ * Each outstanding request that we've passed to the lower device layers has a
+ * 'pending_req' allocated to it. Each buffer_head that completes decrements
+ * the pendcnt towards zero. When it hits zero, the specified domain has a
+ * response queued for it, with the saved 'id' passed back.
+ */
+struct pending_req {
+	struct xen_blkif	*blkif;
+	u64			id;
+	int			nr_pages;
+	atomic_t		pendcnt;
+	unsigned short		operation;
+	int			status;
+	struct list_head	free_list;
+	struct grant_page	*segments[MAX_INDIRECT_SEGMENTS];
+	/* Indirect descriptors */
+	struct grant_page	*indirect_pages[MAX_INDIRECT_PAGES];
+	struct seg_buf		seg[MAX_INDIRECT_SEGMENTS];
+	struct bio		*biolist[MAX_INDIRECT_SEGMENTS];
 };
 
 
@@ -257,6 +375,7 @@
 
 irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
 int xen_blkif_schedule(void *arg);
+int xen_blkif_purge_persistent(void *arg);
 
 int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
 			      struct backend_info *be, int state);
@@ -268,7 +387,7 @@
 static inline void blkif_get_x86_32_req(struct blkif_request *dst,
 					struct blkif_x86_32_request *src)
 {
-	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
 	dst->operation = src->operation;
 	switch (src->operation) {
 	case BLKIF_OP_READ:
@@ -291,6 +410,18 @@
 		dst->u.discard.sector_number = src->u.discard.sector_number;
 		dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
 		break;
+	case BLKIF_OP_INDIRECT:
+		dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
+		dst->u.indirect.nr_segments = src->u.indirect.nr_segments;
+		dst->u.indirect.handle = src->u.indirect.handle;
+		dst->u.indirect.id = src->u.indirect.id;
+		dst->u.indirect.sector_number = src->u.indirect.sector_number;
+		barrier();
+		j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments));
+		for (i = 0; i < j; i++)
+			dst->u.indirect.indirect_grefs[i] =
+				src->u.indirect.indirect_grefs[i];
+		break;
 	default:
 		/*
 		 * Don't know how to translate this op. Only get the
@@ -304,7 +435,7 @@
 static inline void blkif_get_x86_64_req(struct blkif_request *dst,
 					struct blkif_x86_64_request *src)
 {
-	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
 	dst->operation = src->operation;
 	switch (src->operation) {
 	case BLKIF_OP_READ:
@@ -327,6 +458,18 @@
 		dst->u.discard.sector_number = src->u.discard.sector_number;
 		dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
 		break;
+	case BLKIF_OP_INDIRECT:
+		dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
+		dst->u.indirect.nr_segments = src->u.indirect.nr_segments;
+		dst->u.indirect.handle = src->u.indirect.handle;
+		dst->u.indirect.id = src->u.indirect.id;
+		dst->u.indirect.sector_number = src->u.indirect.sector_number;
+		barrier();
+		j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments));
+		for (i = 0; i < j; i++)
+			dst->u.indirect.indirect_grefs[i] =
+				src->u.indirect.indirect_grefs[i];
+		break;
 	default:
 		/*
 		 * Don't know how to translate this op. Only get the
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 04608a6..fe5c3cd 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -98,12 +98,17 @@
 		err = PTR_ERR(blkif->xenblkd);
 		blkif->xenblkd = NULL;
 		xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
+		return;
 	}
 }
 
 static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 {
 	struct xen_blkif *blkif;
+	struct pending_req *req, *n;
+	int i, j;
+
+	BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
 
 	blkif = kmem_cache_zalloc(xen_blkif_cachep, GFP_KERNEL);
 	if (!blkif)
@@ -118,8 +123,57 @@
 	blkif->st_print = jiffies;
 	init_waitqueue_head(&blkif->waiting_to_free);
 	blkif->persistent_gnts.rb_node = NULL;
+	spin_lock_init(&blkif->free_pages_lock);
+	INIT_LIST_HEAD(&blkif->free_pages);
+	blkif->free_pages_num = 0;
+	atomic_set(&blkif->persistent_gnt_in_use, 0);
+
+	INIT_LIST_HEAD(&blkif->pending_free);
+
+	for (i = 0; i < XEN_BLKIF_REQS; i++) {
+		req = kzalloc(sizeof(*req), GFP_KERNEL);
+		if (!req)
+			goto fail;
+		list_add_tail(&req->free_list,
+		              &blkif->pending_free);
+		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
+			req->segments[j] = kzalloc(sizeof(*req->segments[0]),
+			                           GFP_KERNEL);
+			if (!req->segments[j])
+				goto fail;
+		}
+		for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
+			req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
+			                                 GFP_KERNEL);
+			if (!req->indirect_pages[j])
+				goto fail;
+		}
+	}
+	spin_lock_init(&blkif->pending_free_lock);
+	init_waitqueue_head(&blkif->pending_free_wq);
+	init_waitqueue_head(&blkif->shutdown_wq);
 
 	return blkif;
+
+fail:
+	list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
+		list_del(&req->free_list);
+		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
+			if (!req->segments[j])
+				break;
+			kfree(req->segments[j]);
+		}
+		for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
+			if (!req->indirect_pages[j])
+				break;
+			kfree(req->indirect_pages[j]);
+		}
+		kfree(req);
+	}
+
+	kmem_cache_free(xen_blkif_cachep, blkif);
+
+	return ERR_PTR(-ENOMEM);
 }
 
 static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page,
@@ -178,6 +232,7 @@
 {
 	if (blkif->xenblkd) {
 		kthread_stop(blkif->xenblkd);
+		wake_up(&blkif->shutdown_wq);
 		blkif->xenblkd = NULL;
 	}
 
@@ -198,8 +253,28 @@
 
 static void xen_blkif_free(struct xen_blkif *blkif)
 {
+	struct pending_req *req, *n;
+	int i = 0, j;
+
 	if (!atomic_dec_and_test(&blkif->refcnt))
 		BUG();
+
+	/* Check that there is no request in use */
+	list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) {
+		list_del(&req->free_list);
+
+		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
+			kfree(req->segments[j]);
+
+		for (j = 0; j < MAX_INDIRECT_PAGES; j++)
+			kfree(req->indirect_pages[j]);
+
+		kfree(req);
+		i++;
+	}
+
+	WARN_ON(i != XEN_BLKIF_REQS);
+
 	kmem_cache_free(xen_blkif_cachep, blkif);
 }
 
@@ -678,6 +753,11 @@
 				 dev->nodename);
 		goto abort;
 	}
+	err = xenbus_printf(xbt, dev->nodename, "feature-max-indirect-segments", "%u",
+			    MAX_INDIRECT_SEGMENTS);
+	if (err)
+		dev_warn(&dev->dev, "writing %s/feature-max-indirect-segments (%d)",
+			 dev->nodename, err);
 
 	err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
 			    (unsigned long long)vbd_sz(&be->blkif->vbd));
@@ -704,6 +784,11 @@
 				 dev->nodename);
 		goto abort;
 	}
+	err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u",
+			    bdev_physical_block_size(be->blkif->vbd.bdev));
+	if (err)
+		xenbus_dev_error(dev, err, "writing %s/physical-sector-size",
+				 dev->nodename);
 
 	err = xenbus_transaction_end(xbt, 0);
 	if (err == -EAGAIN)
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index d89ef86..a4660bb 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -74,12 +74,30 @@
 struct blk_shadow {
 	struct blkif_request req;
 	struct request *request;
-	struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct grant **grants_used;
+	struct grant **indirect_grants;
+	struct scatterlist *sg;
+};
+
+struct split_bio {
+	struct bio *bio;
+	atomic_t pending;
+	int err;
 };
 
 static DEFINE_MUTEX(blkfront_mutex);
 static const struct block_device_operations xlvbd_block_fops;
 
+/*
+ * Maximum number of segments in indirect requests, the actual value used by
+ * the frontend driver is the minimum of this value and the value provided
+ * by the backend driver.
+ */
+
+static unsigned int xen_blkif_max_segments = 32;
+module_param_named(max, xen_blkif_max_segments, int, S_IRUGO);
+MODULE_PARM_DESC(max, "Maximum amount of segments in indirect requests (default is 32)");
+
 #define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
 
 /*
@@ -98,7 +116,6 @@
 	enum blkif_state connected;
 	int ring_ref;
 	struct blkif_front_ring ring;
-	struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 	unsigned int evtchn, irq;
 	struct request_queue *rq;
 	struct work_struct work;
@@ -114,6 +131,7 @@
 	unsigned int discard_granularity;
 	unsigned int discard_alignment;
 	unsigned int feature_persistent:1;
+	unsigned int max_indirect_segments;
 	int is_ready;
 };
 
@@ -142,6 +160,13 @@
 
 #define DEV_NAME	"xvd"	/* name in /dev */
 
+#define SEGS_PER_INDIRECT_FRAME \
+	(PAGE_SIZE/sizeof(struct blkif_request_segment_aligned))
+#define INDIRECT_GREFS(_segs) \
+	((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
+
+static int blkfront_setup_indirect(struct blkfront_info *info);
+
 static int get_id_from_freelist(struct blkfront_info *info)
 {
 	unsigned long free = info->shadow_free;
@@ -358,7 +383,8 @@
 	struct blkif_request *ring_req;
 	unsigned long id;
 	unsigned int fsect, lsect;
-	int i, ref;
+	int i, ref, n;
+	struct blkif_request_segment_aligned *segments = NULL;
 
 	/*
 	 * Used to store if we are able to queue the request by just using
@@ -369,21 +395,27 @@
 	grant_ref_t gref_head;
 	struct grant *gnt_list_entry = NULL;
 	struct scatterlist *sg;
+	int nseg, max_grefs;
 
 	if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
 		return 1;
 
-	/* Check if we have enought grants to allocate a requests */
-	if (info->persistent_gnts_c < BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+	max_grefs = info->max_indirect_segments ?
+		    info->max_indirect_segments +
+		    INDIRECT_GREFS(info->max_indirect_segments) :
+		    BLKIF_MAX_SEGMENTS_PER_REQUEST;
+
+	/* Check if we have enough grants to allocate a requests */
+	if (info->persistent_gnts_c < max_grefs) {
 		new_persistent_gnts = 1;
 		if (gnttab_alloc_grant_references(
-		    BLKIF_MAX_SEGMENTS_PER_REQUEST - info->persistent_gnts_c,
+		    max_grefs - info->persistent_gnts_c,
 		    &gref_head) < 0) {
 			gnttab_request_free_callback(
 				&info->callback,
 				blkif_restart_queue_callback,
 				info,
-				BLKIF_MAX_SEGMENTS_PER_REQUEST);
+				max_grefs);
 			return 1;
 		}
 	} else
@@ -394,42 +426,67 @@
 	id = get_id_from_freelist(info);
 	info->shadow[id].request = req;
 
-	ring_req->u.rw.id = id;
-	ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
-	ring_req->u.rw.handle = info->handle;
-
-	ring_req->operation = rq_data_dir(req) ?
-		BLKIF_OP_WRITE : BLKIF_OP_READ;
-
-	if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
-		/*
-		 * Ideally we can do an unordered flush-to-disk. In case the
-		 * backend onlysupports barriers, use that. A barrier request
-		 * a superset of FUA, so we can implement it the same
-		 * way.  (It's also a FLUSH+FUA, since it is
-		 * guaranteed ordered WRT previous writes.)
-		 */
-		ring_req->operation = info->flush_op;
-	}
-
 	if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE))) {
-		/* id, sector_number and handle are set above. */
 		ring_req->operation = BLKIF_OP_DISCARD;
 		ring_req->u.discard.nr_sectors = blk_rq_sectors(req);
+		ring_req->u.discard.id = id;
+		ring_req->u.discard.sector_number = (blkif_sector_t)blk_rq_pos(req);
 		if ((req->cmd_flags & REQ_SECURE) && info->feature_secdiscard)
 			ring_req->u.discard.flag = BLKIF_DISCARD_SECURE;
 		else
 			ring_req->u.discard.flag = 0;
 	} else {
-		ring_req->u.rw.nr_segments = blk_rq_map_sg(req->q, req,
-							   info->sg);
-		BUG_ON(ring_req->u.rw.nr_segments >
-		       BLKIF_MAX_SEGMENTS_PER_REQUEST);
-
-		for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) {
+		BUG_ON(info->max_indirect_segments == 0 &&
+		       req->nr_phys_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
+		BUG_ON(info->max_indirect_segments &&
+		       req->nr_phys_segments > info->max_indirect_segments);
+		nseg = blk_rq_map_sg(req->q, req, info->shadow[id].sg);
+		ring_req->u.rw.id = id;
+		if (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) {
+			/*
+			 * The indirect operation can only be a BLKIF_OP_READ or
+			 * BLKIF_OP_WRITE
+			 */
+			BUG_ON(req->cmd_flags & (REQ_FLUSH | REQ_FUA));
+			ring_req->operation = BLKIF_OP_INDIRECT;
+			ring_req->u.indirect.indirect_op = rq_data_dir(req) ?
+				BLKIF_OP_WRITE : BLKIF_OP_READ;
+			ring_req->u.indirect.sector_number = (blkif_sector_t)blk_rq_pos(req);
+			ring_req->u.indirect.handle = info->handle;
+			ring_req->u.indirect.nr_segments = nseg;
+		} else {
+			ring_req->u.rw.sector_number = (blkif_sector_t)blk_rq_pos(req);
+			ring_req->u.rw.handle = info->handle;
+			ring_req->operation = rq_data_dir(req) ?
+				BLKIF_OP_WRITE : BLKIF_OP_READ;
+			if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
+				/*
+				 * Ideally we can do an unordered flush-to-disk. In case the
+				 * backend onlysupports barriers, use that. A barrier request
+				 * a superset of FUA, so we can implement it the same
+				 * way.  (It's also a FLUSH+FUA, since it is
+				 * guaranteed ordered WRT previous writes.)
+				 */
+				ring_req->operation = info->flush_op;
+			}
+			ring_req->u.rw.nr_segments = nseg;
+		}
+		for_each_sg(info->shadow[id].sg, sg, nseg, i) {
 			fsect = sg->offset >> 9;
 			lsect = fsect + (sg->length >> 9) - 1;
 
+			if ((ring_req->operation == BLKIF_OP_INDIRECT) &&
+			    (i % SEGS_PER_INDIRECT_FRAME == 0)) {
+				if (segments)
+					kunmap_atomic(segments);
+
+				n = i / SEGS_PER_INDIRECT_FRAME;
+				gnt_list_entry = get_grant(&gref_head, info);
+				info->shadow[id].indirect_grants[n] = gnt_list_entry;
+				segments = kmap_atomic(pfn_to_page(gnt_list_entry->pfn));
+				ring_req->u.indirect.indirect_grefs[n] = gnt_list_entry->gref;
+			}
+
 			gnt_list_entry = get_grant(&gref_head, info);
 			ref = gnt_list_entry->gref;
 
@@ -441,8 +498,7 @@
 
 				BUG_ON(sg->offset + sg->length > PAGE_SIZE);
 
-				shared_data = kmap_atomic(
-					pfn_to_page(gnt_list_entry->pfn));
+				shared_data = kmap_atomic(pfn_to_page(gnt_list_entry->pfn));
 				bvec_data = kmap_atomic(sg_page(sg));
 
 				/*
@@ -461,13 +517,23 @@
 				kunmap_atomic(bvec_data);
 				kunmap_atomic(shared_data);
 			}
-
-			ring_req->u.rw.seg[i] =
-					(struct blkif_request_segment) {
-						.gref       = ref,
-						.first_sect = fsect,
-						.last_sect  = lsect };
+			if (ring_req->operation != BLKIF_OP_INDIRECT) {
+				ring_req->u.rw.seg[i] =
+						(struct blkif_request_segment) {
+							.gref       = ref,
+							.first_sect = fsect,
+							.last_sect  = lsect };
+			} else {
+				n = i % SEGS_PER_INDIRECT_FRAME;
+				segments[n] =
+					(struct blkif_request_segment_aligned) {
+							.gref       = ref,
+							.first_sect = fsect,
+							.last_sect  = lsect };
+			}
 		}
+		if (segments)
+			kunmap_atomic(segments);
 	}
 
 	info->ring.req_prod_pvt++;
@@ -542,7 +608,9 @@
 		flush_requests(info);
 }
 
-static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
+static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
+				unsigned int physical_sector_size,
+				unsigned int segments)
 {
 	struct request_queue *rq;
 	struct blkfront_info *info = gd->private_data;
@@ -564,14 +632,15 @@
 
 	/* Hard sector size and max sectors impersonate the equiv. hardware. */
 	blk_queue_logical_block_size(rq, sector_size);
-	blk_queue_max_hw_sectors(rq, 512);
+	blk_queue_physical_block_size(rq, physical_sector_size);
+	blk_queue_max_hw_sectors(rq, (segments * PAGE_SIZE) / 512);
 
 	/* Each segment in a request is up to an aligned page in size. */
 	blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
 	blk_queue_max_segment_size(rq, PAGE_SIZE);
 
 	/* Ensure a merged request will fit in a single I/O ring slot. */
-	blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+	blk_queue_max_segments(rq, segments);
 
 	/* Make sure buffer addresses are sector-aligned. */
 	blk_queue_dma_alignment(rq, 511);
@@ -588,13 +657,16 @@
 static void xlvbd_flush(struct blkfront_info *info)
 {
 	blk_queue_flush(info->rq, info->feature_flush);
-	printk(KERN_INFO "blkfront: %s: %s: %s %s\n",
+	printk(KERN_INFO "blkfront: %s: %s: %s %s %s %s %s\n",
 	       info->gd->disk_name,
 	       info->flush_op == BLKIF_OP_WRITE_BARRIER ?
 		"barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ?
 		"flush diskcache" : "barrier or flush"),
-	       info->feature_flush ? "enabled" : "disabled",
-	       info->feature_persistent ? "using persistent grants" : "");
+	       info->feature_flush ? "enabled;" : "disabled;",
+	       "persistent grants:",
+	       info->feature_persistent ? "enabled;" : "disabled;",
+	       "indirect descriptors:",
+	       info->max_indirect_segments ? "enabled;" : "disabled;");
 }
 
 static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
@@ -667,7 +739,8 @@
 
 static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
 			       struct blkfront_info *info,
-			       u16 vdisk_info, u16 sector_size)
+			       u16 vdisk_info, u16 sector_size,
+			       unsigned int physical_sector_size)
 {
 	struct gendisk *gd;
 	int nr_minors = 1;
@@ -734,7 +807,9 @@
 	gd->driverfs_dev = &(info->xbdev->dev);
 	set_capacity(gd, capacity);
 
-	if (xlvbd_init_blk_queue(gd, sector_size)) {
+	if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size,
+				 info->max_indirect_segments ? :
+				 BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
 		del_gendisk(gd);
 		goto release;
 	}
@@ -818,6 +893,7 @@
 {
 	struct grant *persistent_gnt;
 	struct grant *n;
+	int i, j, segs;
 
 	/* Prevent new requests being issued until we fix things up. */
 	spin_lock_irq(&info->io_lock);
@@ -843,6 +919,47 @@
 	}
 	BUG_ON(info->persistent_gnts_c != 0);
 
+	for (i = 0; i < BLK_RING_SIZE; i++) {
+		/*
+		 * Clear persistent grants present in requests already
+		 * on the shared ring
+		 */
+		if (!info->shadow[i].request)
+			goto free_shadow;
+
+		segs = info->shadow[i].req.operation == BLKIF_OP_INDIRECT ?
+		       info->shadow[i].req.u.indirect.nr_segments :
+		       info->shadow[i].req.u.rw.nr_segments;
+		for (j = 0; j < segs; j++) {
+			persistent_gnt = info->shadow[i].grants_used[j];
+			gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
+			__free_page(pfn_to_page(persistent_gnt->pfn));
+			kfree(persistent_gnt);
+		}
+
+		if (info->shadow[i].req.operation != BLKIF_OP_INDIRECT)
+			/*
+			 * If this is not an indirect operation don't try to
+			 * free indirect segments
+			 */
+			goto free_shadow;
+
+		for (j = 0; j < INDIRECT_GREFS(segs); j++) {
+			persistent_gnt = info->shadow[i].indirect_grants[j];
+			gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
+			__free_page(pfn_to_page(persistent_gnt->pfn));
+			kfree(persistent_gnt);
+		}
+
+free_shadow:
+		kfree(info->shadow[i].grants_used);
+		info->shadow[i].grants_used = NULL;
+		kfree(info->shadow[i].indirect_grants);
+		info->shadow[i].indirect_grants = NULL;
+		kfree(info->shadow[i].sg);
+		info->shadow[i].sg = NULL;
+	}
+
 	/* No more gnttab callback work. */
 	gnttab_cancel_free_callback(&info->callback);
 	spin_unlock_irq(&info->io_lock);
@@ -867,12 +984,13 @@
 			     struct blkif_response *bret)
 {
 	int i = 0;
-	struct bio_vec *bvec;
-	struct req_iterator iter;
-	unsigned long flags;
+	struct scatterlist *sg;
 	char *bvec_data;
 	void *shared_data;
-	unsigned int offset = 0;
+	int nseg;
+
+	nseg = s->req.operation == BLKIF_OP_INDIRECT ?
+		s->req.u.indirect.nr_segments : s->req.u.rw.nr_segments;
 
 	if (bret->operation == BLKIF_OP_READ) {
 		/*
@@ -881,26 +999,29 @@
 		 * than PAGE_SIZE, we have to keep track of the current offset,
 		 * to be sure we are copying the data from the right shared page.
 		 */
-		rq_for_each_segment(bvec, s->request, iter) {
-			BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE);
-			if (bvec->bv_offset < offset)
-				i++;
-			BUG_ON(i >= s->req.u.rw.nr_segments);
+		for_each_sg(s->sg, sg, nseg, i) {
+			BUG_ON(sg->offset + sg->length > PAGE_SIZE);
 			shared_data = kmap_atomic(
 				pfn_to_page(s->grants_used[i]->pfn));
-			bvec_data = bvec_kmap_irq(bvec, &flags);
-			memcpy(bvec_data, shared_data + bvec->bv_offset,
-				bvec->bv_len);
-			bvec_kunmap_irq(bvec_data, &flags);
+			bvec_data = kmap_atomic(sg_page(sg));
+			memcpy(bvec_data   + sg->offset,
+			       shared_data + sg->offset,
+			       sg->length);
+			kunmap_atomic(bvec_data);
 			kunmap_atomic(shared_data);
-			offset = bvec->bv_offset + bvec->bv_len;
 		}
 	}
 	/* Add the persistent grant into the list of free grants */
-	for (i = 0; i < s->req.u.rw.nr_segments; i++) {
+	for (i = 0; i < nseg; i++) {
 		list_add(&s->grants_used[i]->node, &info->persistent_gnts);
 		info->persistent_gnts_c++;
 	}
+	if (s->req.operation == BLKIF_OP_INDIRECT) {
+		for (i = 0; i < INDIRECT_GREFS(nseg); i++) {
+			list_add(&s->indirect_grants[i]->node, &info->persistent_gnts);
+			info->persistent_gnts_c++;
+		}
+	}
 }
 
 static irqreturn_t blkif_interrupt(int irq, void *dev_id)
@@ -1034,14 +1155,6 @@
 	SHARED_RING_INIT(sring);
 	FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
 
-	sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST);
-
-	/* Allocate memory for grants */
-	err = fill_grant_buffer(info, BLK_RING_SIZE *
-	                              BLKIF_MAX_SEGMENTS_PER_REQUEST);
-	if (err)
-		goto fail;
-
 	err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
 	if (err < 0) {
 		free_page((unsigned long)sring);
@@ -1223,13 +1336,84 @@
 	return 0;
 }
 
+/*
+ * This is a clone of md_trim_bio, used to split a bio into smaller ones
+ */
+static void trim_bio(struct bio *bio, int offset, int size)
+{
+	/* 'bio' is a cloned bio which we need to trim to match
+	 * the given offset and size.
+	 * This requires adjusting bi_sector, bi_size, and bi_io_vec
+	 */
+	int i;
+	struct bio_vec *bvec;
+	int sofar = 0;
+
+	size <<= 9;
+	if (offset == 0 && size == bio->bi_size)
+		return;
+
+	bio->bi_sector += offset;
+	bio->bi_size = size;
+	offset <<= 9;
+	clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+
+	while (bio->bi_idx < bio->bi_vcnt &&
+	       bio->bi_io_vec[bio->bi_idx].bv_len <= offset) {
+		/* remove this whole bio_vec */
+		offset -= bio->bi_io_vec[bio->bi_idx].bv_len;
+		bio->bi_idx++;
+	}
+	if (bio->bi_idx < bio->bi_vcnt) {
+		bio->bi_io_vec[bio->bi_idx].bv_offset += offset;
+		bio->bi_io_vec[bio->bi_idx].bv_len -= offset;
+	}
+	/* avoid any complications with bi_idx being non-zero*/
+	if (bio->bi_idx) {
+		memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
+			(bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
+		bio->bi_vcnt -= bio->bi_idx;
+		bio->bi_idx = 0;
+	}
+	/* Make sure vcnt and last bv are not too big */
+	bio_for_each_segment(bvec, bio, i) {
+		if (sofar + bvec->bv_len > size)
+			bvec->bv_len = size - sofar;
+		if (bvec->bv_len == 0) {
+			bio->bi_vcnt = i;
+			break;
+		}
+		sofar += bvec->bv_len;
+	}
+}
+
+static void split_bio_end(struct bio *bio, int error)
+{
+	struct split_bio *split_bio = bio->bi_private;
+
+	if (error)
+		split_bio->err = error;
+
+	if (atomic_dec_and_test(&split_bio->pending)) {
+		split_bio->bio->bi_phys_segments = 0;
+		bio_endio(split_bio->bio, split_bio->err);
+		kfree(split_bio);
+	}
+	bio_put(bio);
+}
 
 static int blkif_recover(struct blkfront_info *info)
 {
 	int i;
-	struct blkif_request *req;
+	struct request *req, *n;
 	struct blk_shadow *copy;
-	int j;
+	int rc;
+	struct bio *bio, *cloned_bio;
+	struct bio_list bio_list, merge_bio;
+	unsigned int segs, offset;
+	int pending, size;
+	struct split_bio *split_bio;
+	struct list_head requests;
 
 	/* Stage 1: Make a safe copy of the shadow state. */
 	copy = kmemdup(info->shadow, sizeof(info->shadow),
@@ -1244,36 +1428,64 @@
 	info->shadow_free = info->ring.req_prod_pvt;
 	info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff;
 
-	/* Stage 3: Find pending requests and requeue them. */
+	rc = blkfront_setup_indirect(info);
+	if (rc) {
+		kfree(copy);
+		return rc;
+	}
+
+	segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	blk_queue_max_segments(info->rq, segs);
+	bio_list_init(&bio_list);
+	INIT_LIST_HEAD(&requests);
 	for (i = 0; i < BLK_RING_SIZE; i++) {
 		/* Not in use? */
 		if (!copy[i].request)
 			continue;
 
-		/* Grab a request slot and copy shadow state into it. */
-		req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
-		*req = copy[i].req;
-
-		/* We get a new request id, and must reset the shadow state. */
-		req->u.rw.id = get_id_from_freelist(info);
-		memcpy(&info->shadow[req->u.rw.id], &copy[i], sizeof(copy[i]));
-
-		if (req->operation != BLKIF_OP_DISCARD) {
-		/* Rewrite any grant references invalidated by susp/resume. */
-			for (j = 0; j < req->u.rw.nr_segments; j++)
-				gnttab_grant_foreign_access_ref(
-					req->u.rw.seg[j].gref,
-					info->xbdev->otherend_id,
-					pfn_to_mfn(copy[i].grants_used[j]->pfn),
-					0);
+		/*
+		 * Get the bios in the request so we can re-queue them.
+		 */
+		if (copy[i].request->cmd_flags &
+		    (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
+			/*
+			 * Flush operations don't contain bios, so
+			 * we need to requeue the whole request
+			 */
+			list_add(&copy[i].request->queuelist, &requests);
+			continue;
 		}
-		info->shadow[req->u.rw.id].req = *req;
-
-		info->ring.req_prod_pvt++;
+		merge_bio.head = copy[i].request->bio;
+		merge_bio.tail = copy[i].request->biotail;
+		bio_list_merge(&bio_list, &merge_bio);
+		copy[i].request->bio = NULL;
+		blk_put_request(copy[i].request);
 	}
 
 	kfree(copy);
 
+	/*
+	 * Empty the queue, this is important because we might have
+	 * requests in the queue with more segments than what we
+	 * can handle now.
+	 */
+	spin_lock_irq(&info->io_lock);
+	while ((req = blk_fetch_request(info->rq)) != NULL) {
+		if (req->cmd_flags &
+		    (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
+			list_add(&req->queuelist, &requests);
+			continue;
+		}
+		merge_bio.head = req->bio;
+		merge_bio.tail = req->biotail;
+		bio_list_merge(&bio_list, &merge_bio);
+		req->bio = NULL;
+		if (req->cmd_flags & (REQ_FLUSH | REQ_FUA))
+			pr_alert("diskcache flush request found!\n");
+		__blk_put_request(info->rq, req);
+	}
+	spin_unlock_irq(&info->io_lock);
+
 	xenbus_switch_state(info->xbdev, XenbusStateConnected);
 
 	spin_lock_irq(&info->io_lock);
@@ -1281,14 +1493,50 @@
 	/* Now safe for us to use the shared ring */
 	info->connected = BLKIF_STATE_CONNECTED;
 
-	/* Send off requeued requests */
-	flush_requests(info);
-
 	/* Kick any other new requests queued since we resumed */
 	kick_pending_request_queues(info);
 
+	list_for_each_entry_safe(req, n, &requests, queuelist) {
+		/* Requeue pending requests (flush or discard) */
+		list_del_init(&req->queuelist);
+		BUG_ON(req->nr_phys_segments > segs);
+		blk_requeue_request(info->rq, req);
+	}
 	spin_unlock_irq(&info->io_lock);
 
+	while ((bio = bio_list_pop(&bio_list)) != NULL) {
+		/* Traverse the list of pending bios and re-queue them */
+		if (bio_segments(bio) > segs) {
+			/*
+			 * This bio has more segments than what we can
+			 * handle, we have to split it.
+			 */
+			pending = (bio_segments(bio) + segs - 1) / segs;
+			split_bio = kzalloc(sizeof(*split_bio), GFP_NOIO);
+			BUG_ON(split_bio == NULL);
+			atomic_set(&split_bio->pending, pending);
+			split_bio->bio = bio;
+			for (i = 0; i < pending; i++) {
+				offset = (i * segs * PAGE_SIZE) >> 9;
+				size = min((unsigned int)(segs * PAGE_SIZE) >> 9,
+					   (unsigned int)(bio->bi_size >> 9) - offset);
+				cloned_bio = bio_clone(bio, GFP_NOIO);
+				BUG_ON(cloned_bio == NULL);
+				trim_bio(cloned_bio, offset, size);
+				cloned_bio->bi_private = split_bio;
+				cloned_bio->bi_end_io = split_bio_end;
+				submit_bio(cloned_bio->bi_rw, cloned_bio);
+			}
+			/*
+			 * Now we have to wait for all those smaller bios to
+			 * end, so we can also end the "parent" bio.
+			 */
+			continue;
+		}
+		/* We don't need to split this bio */
+		submit_bio(bio->bi_rw, bio);
+	}
+
 	return 0;
 }
 
@@ -1308,8 +1556,12 @@
 	blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
 
 	err = talk_to_blkback(dev, info);
-	if (info->connected == BLKIF_STATE_SUSPENDED && !err)
-		err = blkif_recover(info);
+
+	/*
+	 * We have to wait for the backend to switch to
+	 * connected state, since we want to read which
+	 * features it supports.
+	 */
 
 	return err;
 }
@@ -1387,6 +1639,60 @@
 	kfree(type);
 }
 
+static int blkfront_setup_indirect(struct blkfront_info *info)
+{
+	unsigned int indirect_segments, segs;
+	int err, i;
+
+	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+			    "feature-max-indirect-segments", "%u", &indirect_segments,
+			    NULL);
+	if (err) {
+		info->max_indirect_segments = 0;
+		segs = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	} else {
+		info->max_indirect_segments = min(indirect_segments,
+						  xen_blkif_max_segments);
+		segs = info->max_indirect_segments;
+	}
+
+	err = fill_grant_buffer(info, (segs + INDIRECT_GREFS(segs)) * BLK_RING_SIZE);
+	if (err)
+		goto out_of_memory;
+
+	for (i = 0; i < BLK_RING_SIZE; i++) {
+		info->shadow[i].grants_used = kzalloc(
+			sizeof(info->shadow[i].grants_used[0]) * segs,
+			GFP_NOIO);
+		info->shadow[i].sg = kzalloc(sizeof(info->shadow[i].sg[0]) * segs, GFP_NOIO);
+		if (info->max_indirect_segments)
+			info->shadow[i].indirect_grants = kzalloc(
+				sizeof(info->shadow[i].indirect_grants[0]) *
+				INDIRECT_GREFS(segs),
+				GFP_NOIO);
+		if ((info->shadow[i].grants_used == NULL) ||
+			(info->shadow[i].sg == NULL) ||
+		     (info->max_indirect_segments &&
+		     (info->shadow[i].indirect_grants == NULL)))
+			goto out_of_memory;
+		sg_init_table(info->shadow[i].sg, segs);
+	}
+
+
+	return 0;
+
+out_of_memory:
+	for (i = 0; i < BLK_RING_SIZE; i++) {
+		kfree(info->shadow[i].grants_used);
+		info->shadow[i].grants_used = NULL;
+		kfree(info->shadow[i].sg);
+		info->shadow[i].sg = NULL;
+		kfree(info->shadow[i].indirect_grants);
+		info->shadow[i].indirect_grants = NULL;
+	}
+	return -ENOMEM;
+}
+
 /*
  * Invoked when the backend is finally 'ready' (and has told produced
  * the details about the physical device - #sectors, size, etc).
@@ -1395,6 +1701,7 @@
 {
 	unsigned long long sectors;
 	unsigned long sector_size;
+	unsigned int physical_sector_size;
 	unsigned int binfo;
 	int err;
 	int barrier, flush, discard, persistent;
@@ -1414,8 +1721,15 @@
 		set_capacity(info->gd, sectors);
 		revalidate_disk(info->gd);
 
-		/* fall through */
+		return;
 	case BLKIF_STATE_SUSPENDED:
+		/*
+		 * If we are recovering from suspension, we need to wait
+		 * for the backend to announce it's features before
+		 * reconnecting, at least we need to know if the backend
+		 * supports indirect descriptors, and how many.
+		 */
+		blkif_recover(info);
 		return;
 
 	default:
@@ -1437,6 +1751,16 @@
 		return;
 	}
 
+	/*
+	 * physcial-sector-size is a newer field, so old backends may not
+	 * provide this. Assume physical sector size to be the same as
+	 * sector_size in that case.
+	 */
+	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+			   "physical-sector-size", "%u", &physical_sector_size);
+	if (err != 1)
+		physical_sector_size = sector_size;
+
 	info->feature_flush = 0;
 	info->flush_op = 0;
 
@@ -1483,7 +1807,15 @@
 	else
 		info->feature_persistent = persistent;
 
-	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
+	err = blkfront_setup_indirect(info);
+	if (err) {
+		xenbus_dev_fatal(info->xbdev, err, "setup_indirect at %s",
+				 info->xbdev->otherend);
+		return;
+	}
+
+	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size,
+				  physical_sector_size);
 	if (err) {
 		xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
 				 info->xbdev->otherend);
diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index 11f467c..a12b923 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -91,6 +91,10 @@
 	{ USB_DEVICE(0x0489, 0xe04e) },
 	{ USB_DEVICE(0x0489, 0xe056) },
 	{ USB_DEVICE(0x0489, 0xe04d) },
+	{ USB_DEVICE(0x04c5, 0x1330) },
+	{ USB_DEVICE(0x13d3, 0x3402) },
+	{ USB_DEVICE(0x0cf3, 0x3121) },
+	{ USB_DEVICE(0x0cf3, 0xe003) },
 
 	/* Atheros AR5BBU12 with sflash firmware */
 	{ USB_DEVICE(0x0489, 0xE02C) },
@@ -128,6 +132,10 @@
 	{ USB_DEVICE(0x0489, 0xe04e), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0489, 0xe056), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0489, 0xe04d), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x04c5, 0x1330), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x13d3, 0x3402), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x0cf3, 0x3121), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x0cf3, 0xe003), .driver_info = BTUSB_ATH3012 },
 
 	/* Atheros AR5BBU22 with sflash firmware */
 	{ USB_DEVICE(0x0489, 0xE03C), .driver_info = BTUSB_ATH3012 },
@@ -193,24 +201,44 @@
 
 static int ath3k_get_state(struct usb_device *udev, unsigned char *state)
 {
-	int pipe = 0;
+	int ret, pipe = 0;
+	char *buf;
+
+	buf = kmalloc(sizeof(*buf), GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
 
 	pipe = usb_rcvctrlpipe(udev, 0);
-	return usb_control_msg(udev, pipe, ATH3K_GETSTATE,
-			USB_TYPE_VENDOR | USB_DIR_IN, 0, 0,
-			state, 0x01, USB_CTRL_SET_TIMEOUT);
+	ret = usb_control_msg(udev, pipe, ATH3K_GETSTATE,
+			      USB_TYPE_VENDOR | USB_DIR_IN, 0, 0,
+			      buf, sizeof(*buf), USB_CTRL_SET_TIMEOUT);
+
+	*state = *buf;
+	kfree(buf);
+
+	return ret;
 }
 
 static int ath3k_get_version(struct usb_device *udev,
 			struct ath3k_version *version)
 {
-	int pipe = 0;
+	int ret, pipe = 0;
+	struct ath3k_version *buf;
+	const int size = sizeof(*buf);
+
+	buf = kmalloc(size, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
 
 	pipe = usb_rcvctrlpipe(udev, 0);
-	return usb_control_msg(udev, pipe, ATH3K_GETVERSION,
-			USB_TYPE_VENDOR | USB_DIR_IN, 0, 0, version,
-			sizeof(struct ath3k_version),
-			USB_CTRL_SET_TIMEOUT);
+	ret = usb_control_msg(udev, pipe, ATH3K_GETVERSION,
+			      USB_TYPE_VENDOR | USB_DIR_IN, 0, 0,
+			      buf, size, USB_CTRL_SET_TIMEOUT);
+
+	memcpy(version, buf, size);
+	kfree(buf);
+
+	return ret;
 }
 
 static int ath3k_load_fwfile(struct usb_device *udev,
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index de4cf4d..8e16f0a 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -154,6 +154,10 @@
 	{ USB_DEVICE(0x0489, 0xe04e), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0489, 0xe056), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0489, 0xe04d), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x04c5, 0x1330), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x13d3, 0x3402), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x0cf3, 0x3121), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x0cf3, 0xe003), .driver_info = BTUSB_ATH3012 },
 
 	/* Atheros AR5BBU12 with sflash firmware */
 	{ USB_DEVICE(0x0489, 0xe02c), .driver_info = BTUSB_IGNORE },
@@ -1095,7 +1099,7 @@
 	if (IS_ERR(skb)) {
 		BT_ERR("%s sending Intel patch command (0x%4.4x) failed (%ld)",
 		       hdev->name, cmd->opcode, PTR_ERR(skb));
-		return -PTR_ERR(skb);
+		return PTR_ERR(skb);
 	}
 
 	/* It ensures that the returned event matches the event data read from
@@ -1147,7 +1151,7 @@
 	if (IS_ERR(skb)) {
 		BT_ERR("%s sending initial HCI reset command failed (%ld)",
 		       hdev->name, PTR_ERR(skb));
-		return -PTR_ERR(skb);
+		return PTR_ERR(skb);
 	}
 	kfree_skb(skb);
 
@@ -1161,7 +1165,7 @@
 	if (IS_ERR(skb)) {
 		BT_ERR("%s reading Intel fw version command failed (%ld)",
 		       hdev->name, PTR_ERR(skb));
-		return -PTR_ERR(skb);
+		return PTR_ERR(skb);
 	}
 
 	if (skb->len != sizeof(*ver)) {
@@ -1219,7 +1223,7 @@
 		BT_ERR("%s entering Intel manufacturer mode failed (%ld)",
 		       hdev->name, PTR_ERR(skb));
 		release_firmware(fw);
-		return -PTR_ERR(skb);
+		return PTR_ERR(skb);
 	}
 
 	if (skb->data[0]) {
@@ -1276,7 +1280,7 @@
 	if (IS_ERR(skb)) {
 		BT_ERR("%s exiting Intel manufacturer mode failed (%ld)",
 		       hdev->name, PTR_ERR(skb));
-		return -PTR_ERR(skb);
+		return PTR_ERR(skb);
 	}
 	kfree_skb(skb);
 
@@ -1292,7 +1296,7 @@
 	if (IS_ERR(skb)) {
 		BT_ERR("%s exiting Intel manufacturer mode failed (%ld)",
 		       hdev->name, PTR_ERR(skb));
-		return -PTR_ERR(skb);
+		return PTR_ERR(skb);
 	}
 	kfree_skb(skb);
 
@@ -1310,7 +1314,7 @@
 	if (IS_ERR(skb)) {
 		BT_ERR("%s exiting Intel manufacturer mode failed (%ld)",
 		       hdev->name, PTR_ERR(skb));
-		return -PTR_ERR(skb);
+		return PTR_ERR(skb);
 	}
 	kfree_skb(skb);
 
diff --git a/drivers/char/agp/parisc-agp.c b/drivers/char/agp/parisc-agp.c
index bf5d247..15f2e70 100644
--- a/drivers/char/agp/parisc-agp.c
+++ b/drivers/char/agp/parisc-agp.c
@@ -129,7 +129,8 @@
 	off_t j, io_pg_start;
 	int io_pg_count;
 
-	if (type != 0 || mem->type != 0) {
+	if (type != mem->type ||
+		agp_bridge->driver->agp_type_to_mask_type(agp_bridge, type)) {
 		return -EINVAL;
 	}
 
@@ -175,7 +176,8 @@
 	struct _parisc_agp_info *info = &parisc_agp_info;
 	int i, io_pg_start, io_pg_count;
 
-	if (type != 0 || mem->type != 0) {
+	if (type != mem->type ||
+		agp_bridge->driver->agp_type_to_mask_type(agp_bridge, type)) {
 		return -EINVAL;
 	}
 
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 1b456fe..fc45567 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -272,9 +272,12 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&portdev->ports_lock, flags);
-	list_for_each_entry(port, &portdev->ports, list)
-		if (port->cdev->dev == dev)
+	list_for_each_entry(port, &portdev->ports, list) {
+		if (port->cdev->dev == dev) {
+			kref_get(&port->kref);
 			goto out;
+		}
+	}
 	port = NULL;
 out:
 	spin_unlock_irqrestore(&portdev->ports_lock, flags);
@@ -746,6 +749,10 @@
 
 	port = filp->private_data;
 
+	/* Port is hot-unplugged. */
+	if (!port->guest_connected)
+		return -ENODEV;
+
 	if (!port_has_data(port)) {
 		/*
 		 * If nothing's connected on the host just return 0 in
@@ -762,7 +769,7 @@
 		if (ret < 0)
 			return ret;
 	}
-	/* Port got hot-unplugged. */
+	/* Port got hot-unplugged while we were waiting above. */
 	if (!port->guest_connected)
 		return -ENODEV;
 	/*
@@ -932,13 +939,25 @@
 	if (is_rproc_serial(port->out_vq->vdev))
 		return -EINVAL;
 
+	/*
+	 * pipe->nrbufs == 0 means there are no data to transfer,
+	 * so this returns just 0 for no data.
+	 */
+	pipe_lock(pipe);
+	if (!pipe->nrbufs) {
+		ret = 0;
+		goto error_out;
+	}
+
 	ret = wait_port_writable(port, filp->f_flags & O_NONBLOCK);
 	if (ret < 0)
-		return ret;
+		goto error_out;
 
 	buf = alloc_buf(port->out_vq, 0, pipe->nrbufs);
-	if (!buf)
-		return -ENOMEM;
+	if (!buf) {
+		ret = -ENOMEM;
+		goto error_out;
+	}
 
 	sgl.n = 0;
 	sgl.len = 0;
@@ -946,12 +965,17 @@
 	sgl.sg = buf->sg;
 	sg_init_table(sgl.sg, sgl.size);
 	ret = __splice_from_pipe(pipe, &sd, pipe_to_sg);
+	pipe_unlock(pipe);
 	if (likely(ret > 0))
 		ret = __send_to_port(port, buf->sg, sgl.n, sgl.len, buf, true);
 
 	if (unlikely(ret <= 0))
 		free_buf(buf, true);
 	return ret;
+
+error_out:
+	pipe_unlock(pipe);
+	return ret;
 }
 
 static unsigned int port_fops_poll(struct file *filp, poll_table *wait)
@@ -1019,14 +1043,14 @@
 	struct port *port;
 	int ret;
 
+	/* We get the port with a kref here */
 	port = find_port_by_devt(cdev->dev);
+	if (!port) {
+		/* Port was unplugged before we could proceed */
+		return -ENXIO;
+	}
 	filp->private_data = port;
 
-	/* Prevent against a port getting hot-unplugged at the same time */
-	spin_lock_irq(&port->portdev->ports_lock);
-	kref_get(&port->kref);
-	spin_unlock_irq(&port->portdev->ports_lock);
-
 	/*
 	 * Don't allow opening of console port devices -- that's done
 	 * via /dev/hvc
@@ -1498,14 +1522,6 @@
 
 	port = container_of(kref, struct port, kref);
 
-	sysfs_remove_group(&port->dev->kobj, &port_attribute_group);
-	device_destroy(pdrvdata.class, port->dev->devt);
-	cdev_del(port->cdev);
-
-	kfree(port->name);
-
-	debugfs_remove(port->debugfs_file);
-
 	kfree(port);
 }
 
@@ -1539,12 +1555,14 @@
 	spin_unlock_irq(&port->portdev->ports_lock);
 
 	if (port->guest_connected) {
-		port->guest_connected = false;
-		port->host_connected = false;
-		wake_up_interruptible(&port->waitqueue);
-
 		/* Let the app know the port is going down. */
 		send_sigio_to_port(port);
+
+		/* Do this after sigio is actually sent */
+		port->guest_connected = false;
+		port->host_connected = false;
+
+		wake_up_interruptible(&port->waitqueue);
 	}
 
 	if (is_console_port(port)) {
@@ -1563,6 +1581,14 @@
 	 */
 	port->portdev = NULL;
 
+	sysfs_remove_group(&port->dev->kobj, &port_attribute_group);
+	device_destroy(pdrvdata.class, port->dev->devt);
+	cdev_del(port->cdev);
+
+	kfree(port->name);
+
+	debugfs_remove(port->debugfs_file);
+
 	/*
 	 * Locks around here are not necessary - a port can't be
 	 * opened after we removed the port struct from ports_list
diff --git a/drivers/clk/samsung/clk-exynos4.c b/drivers/clk/samsung/clk-exynos4.c
index 1bdb882..4e57397 100644
--- a/drivers/clk/samsung/clk-exynos4.c
+++ b/drivers/clk/samsung/clk-exynos4.c
@@ -581,11 +581,15 @@
 	DIV(none, "div_spi1_isp", "mout_spi1_isp", E4X12_DIV_ISP, 16, 4),
 	DIV(none, "div_spi1_isp_pre", "div_spi1_isp", E4X12_DIV_ISP, 20, 8),
 	DIV(none, "div_uart_isp", "mout_uart_isp", E4X12_DIV_ISP, 28, 4),
-	DIV(div_isp0, "div_isp0", "aclk200", E4X12_DIV_ISP0, 0, 3),
-	DIV(div_isp1, "div_isp1", "aclk200", E4X12_DIV_ISP0, 4, 3),
+	DIV_F(div_isp0, "div_isp0", "aclk200", E4X12_DIV_ISP0, 0, 3,
+						CLK_GET_RATE_NOCACHE, 0),
+	DIV_F(div_isp1, "div_isp1", "aclk200", E4X12_DIV_ISP0, 4, 3,
+						CLK_GET_RATE_NOCACHE, 0),
 	DIV(none, "div_mpwm", "div_isp1", E4X12_DIV_ISP1, 0, 3),
-	DIV(div_mcuisp0, "div_mcuisp0", "aclk400_mcuisp", E4X12_DIV_ISP1, 4, 3),
-	DIV(div_mcuisp1, "div_mcuisp1", "div_mcuisp0", E4X12_DIV_ISP1, 8, 3),
+	DIV_F(div_mcuisp0, "div_mcuisp0", "aclk400_mcuisp", E4X12_DIV_ISP1,
+						4, 3, CLK_GET_RATE_NOCACHE, 0),
+	DIV_F(div_mcuisp1, "div_mcuisp1", "div_mcuisp0", E4X12_DIV_ISP1,
+						8, 3, CLK_GET_RATE_NOCACHE, 0),
 	DIV(sclk_fimg2d, "sclk_fimg2d", "mout_g2d", DIV_DMC1, 0, 4),
 };
 
@@ -863,57 +867,57 @@
 	GATE_DA(i2s0, "samsung-i2s.0", "i2s0", "aclk100",
 			E4X12_GATE_IP_MAUDIO, 3, 0, 0, "iis"),
 	GATE(fimc_isp, "isp", "aclk200", E4X12_GATE_ISP0, 0,
-			CLK_IGNORE_UNUSED, 0),
+			CLK_IGNORE_UNUSED | CLK_GET_RATE_NOCACHE, 0),
 	GATE(fimc_drc, "drc", "aclk200", E4X12_GATE_ISP0, 1,
-			CLK_IGNORE_UNUSED, 0),
+			CLK_IGNORE_UNUSED | CLK_GET_RATE_NOCACHE, 0),
 	GATE(fimc_fd, "fd", "aclk200", E4X12_GATE_ISP0, 2,
-			CLK_IGNORE_UNUSED, 0),
+			CLK_IGNORE_UNUSED | CLK_GET_RATE_NOCACHE, 0),
 	GATE(fimc_lite0, "lite0", "aclk200", E4X12_GATE_ISP0, 3,
-			CLK_IGNORE_UNUSED, 0),
+			CLK_IGNORE_UNUSED | CLK_GET_RATE_NOCACHE, 0),
 	GATE(fimc_lite1, "lite1", "aclk200", E4X12_GATE_ISP0, 4,
-			CLK_IGNORE_UNUSED, 0),
+			CLK_IGNORE_UNUSED | CLK_GET_RATE_NOCACHE, 0),
 	GATE(mcuisp, "mcuisp", "aclk200", E4X12_GATE_ISP0, 5,
-			CLK_IGNORE_UNUSED, 0),
+			CLK_IGNORE_UNUSED | CLK_GET_RATE_NOCACHE, 0),
 	GATE(gicisp, "gicisp", "aclk200", E4X12_GATE_ISP0, 7,
-			CLK_IGNORE_UNUSED, 0),
+			CLK_IGNORE_UNUSED | CLK_GET_RATE_NOCACHE, 0),
 	GATE(smmu_isp, "smmu_isp", "aclk200", E4X12_GATE_ISP0, 8,
-			CLK_IGNORE_UNUSED, 0),
+			CLK_IGNORE_UNUSED | CLK_GET_RATE_NOCACHE, 0),
 	GATE(smmu_drc, "smmu_drc", "aclk200", E4X12_GATE_ISP0, 9,
-			CLK_IGNORE_UNUSED, 0),
+			CLK_IGNORE_UNUSED | CLK_GET_RATE_NOCACHE, 0),
 	GATE(smmu_fd, "smmu_fd", "aclk200", E4X12_GATE_ISP0, 10,
-			CLK_IGNORE_UNUSED, 0),
+			CLK_IGNORE_UNUSED | CLK_GET_RATE_NOCACHE, 0),
 	GATE(smmu_lite0, "smmu_lite0", "aclk200", E4X12_GATE_ISP0, 11,
-			CLK_IGNORE_UNUSED, 0),
+			CLK_IGNORE_UNUSED | CLK_GET_RATE_NOCACHE, 0),
 	GATE(smmu_lite1, "smmu_lite1", "aclk200", E4X12_GATE_ISP0, 12,
-			CLK_IGNORE_UNUSED, 0),
+			CLK_IGNORE_UNUSED | CLK_GET_RATE_NOCACHE, 0),
 	GATE(ppmuispmx, "ppmuispmx", "aclk200", E4X12_GATE_ISP0, 20,
-			CLK_IGNORE_UNUSED, 0),
+			CLK_IGNORE_UNUSED | CLK_GET_RATE_NOCACHE, 0),
 	GATE(ppmuispx, "ppmuispx", "aclk200", E4X12_GATE_ISP0, 21,
-			CLK_IGNORE_UNUSED, 0),
+			CLK_IGNORE_UNUSED | CLK_GET_RATE_NOCACHE, 0),
 	GATE(mcuctl_isp, "mcuctl_isp", "aclk200", E4X12_GATE_ISP0, 23,
-			CLK_IGNORE_UNUSED, 0),
+			CLK_IGNORE_UNUSED | CLK_GET_RATE_NOCACHE, 0),
 	GATE(mpwm_isp, "mpwm_isp", "aclk200", E4X12_GATE_ISP0, 24,
-			CLK_IGNORE_UNUSED, 0),
+			CLK_IGNORE_UNUSED | CLK_GET_RATE_NOCACHE, 0),
 	GATE(i2c0_isp, "i2c0_isp", "aclk200", E4X12_GATE_ISP0, 25,
-			CLK_IGNORE_UNUSED, 0),
+			CLK_IGNORE_UNUSED | CLK_GET_RATE_NOCACHE, 0),
 	GATE(i2c1_isp, "i2c1_isp", "aclk200", E4X12_GATE_ISP0, 26,
-			CLK_IGNORE_UNUSED, 0),
+			CLK_IGNORE_UNUSED | CLK_GET_RATE_NOCACHE, 0),
 	GATE(mtcadc_isp, "mtcadc_isp", "aclk200", E4X12_GATE_ISP0, 27,
-			CLK_IGNORE_UNUSED, 0),
+			CLK_IGNORE_UNUSED | CLK_GET_RATE_NOCACHE, 0),
 	GATE(pwm_isp, "pwm_isp", "aclk200", E4X12_GATE_ISP0, 28,
-			CLK_IGNORE_UNUSED, 0),
+			CLK_IGNORE_UNUSED | CLK_GET_RATE_NOCACHE, 0),
 	GATE(wdt_isp, "wdt_isp", "aclk200", E4X12_GATE_ISP0, 30,
-			CLK_IGNORE_UNUSED, 0),
+			CLK_IGNORE_UNUSED | CLK_GET_RATE_NOCACHE, 0),
 	GATE(uart_isp, "uart_isp", "aclk200", E4X12_GATE_ISP0, 31,
-			CLK_IGNORE_UNUSED, 0),
+			CLK_IGNORE_UNUSED | CLK_GET_RATE_NOCACHE, 0),
 	GATE(asyncaxim, "asyncaxim", "aclk200", E4X12_GATE_ISP1, 0,
-			CLK_IGNORE_UNUSED, 0),
+			CLK_IGNORE_UNUSED | CLK_GET_RATE_NOCACHE, 0),
 	GATE(smmu_ispcx, "smmu_ispcx", "aclk200", E4X12_GATE_ISP1, 4,
-			CLK_IGNORE_UNUSED, 0),
+			CLK_IGNORE_UNUSED | CLK_GET_RATE_NOCACHE, 0),
 	GATE(spi0_isp, "spi0_isp", "aclk200", E4X12_GATE_ISP1, 12,
-			CLK_IGNORE_UNUSED, 0),
+			CLK_IGNORE_UNUSED | CLK_GET_RATE_NOCACHE, 0),
 	GATE(spi1_isp, "spi1_isp", "aclk200", E4X12_GATE_ISP1, 13,
-			CLK_IGNORE_UNUSED, 0),
+			CLK_IGNORE_UNUSED | CLK_GET_RATE_NOCACHE, 0),
 	GATE(g2d, "g2d", "aclk200", GATE_IP_DMC, 23, 0, 0),
 };
 
diff --git a/drivers/clk/zynq/clkc.c b/drivers/clk/zynq/clkc.c
index 5c205b6..089d3e3 100644
--- a/drivers/clk/zynq/clkc.c
+++ b/drivers/clk/zynq/clkc.c
@@ -71,6 +71,7 @@
 static DEFINE_SPINLOCK(ddrpll_lock);
 static DEFINE_SPINLOCK(iopll_lock);
 static DEFINE_SPINLOCK(armclk_lock);
+static DEFINE_SPINLOCK(swdtclk_lock);
 static DEFINE_SPINLOCK(ddrclk_lock);
 static DEFINE_SPINLOCK(dciclk_lock);
 static DEFINE_SPINLOCK(gem0clk_lock);
@@ -293,7 +294,7 @@
 	}
 	clks[swdt] = clk_register_mux(NULL, clk_output_name[swdt],
 			swdt_ext_clk_mux_parents, 2, CLK_SET_RATE_PARENT,
-			SLCR_SWDT_CLK_SEL, 0, 1, 0, &gem0clk_lock);
+			SLCR_SWDT_CLK_SEL, 0, 1, 0, &swdtclk_lock);
 
 	/* DDR clocks */
 	clk = clk_register_divider(NULL, "ddr2x_div", "ddrpll", 0,
@@ -364,8 +365,9 @@
 			CLK_SET_RATE_PARENT, SLCR_GEM0_CLK_CTRL, 20, 6,
 			CLK_DIVIDER_ONE_BASED | CLK_DIVIDER_ALLOW_ZERO,
 			&gem0clk_lock);
-	clk = clk_register_mux(NULL, "gem0_emio_mux", gem0_mux_parents, 2, 0,
-			SLCR_GEM0_CLK_CTRL, 6, 1, 0, &gem0clk_lock);
+	clk = clk_register_mux(NULL, "gem0_emio_mux", gem0_mux_parents, 2,
+			CLK_SET_RATE_PARENT, SLCR_GEM0_CLK_CTRL, 6, 1, 0,
+			&gem0clk_lock);
 	clks[gem0] = clk_register_gate(NULL, clk_output_name[gem0],
 			"gem0_emio_mux", CLK_SET_RATE_PARENT,
 			SLCR_GEM0_CLK_CTRL, 0, 0, &gem0clk_lock);
@@ -386,8 +388,9 @@
 			CLK_SET_RATE_PARENT, SLCR_GEM1_CLK_CTRL, 20, 6,
 			CLK_DIVIDER_ONE_BASED | CLK_DIVIDER_ALLOW_ZERO,
 			&gem1clk_lock);
-	clk = clk_register_mux(NULL, "gem1_emio_mux", gem1_mux_parents, 2, 0,
-			SLCR_GEM1_CLK_CTRL, 6, 1, 0, &gem1clk_lock);
+	clk = clk_register_mux(NULL, "gem1_emio_mux", gem1_mux_parents, 2,
+			CLK_SET_RATE_PARENT, SLCR_GEM1_CLK_CTRL, 6, 1, 0,
+			&gem1clk_lock);
 	clks[gem1] = clk_register_gate(NULL, clk_output_name[gem1],
 			"gem1_emio_mux", CLK_SET_RATE_PARENT,
 			SLCR_GEM1_CLK_CTRL, 0, 0, &gem1clk_lock);
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index a4ad733..f0a5e2b 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1177,14 +1177,11 @@
 				__func__, cpu_dev->id, cpu);
 	}
 
-	if ((cpus == 1) && (cpufreq_driver->target))
-		__cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT);
-
-	pr_debug("%s: removing link, cpu: %d\n", __func__, cpu);
-	cpufreq_cpu_put(data);
-
 	/* If cpu is last user of policy, free policy */
 	if (cpus == 1) {
+		if (cpufreq_driver->target)
+			__cpufreq_governor(data, CPUFREQ_GOV_POLICY_EXIT);
+
 		lock_policy_rwsem_read(cpu);
 		kobj = &data->kobj;
 		cmp = &data->kobj_unregister;
@@ -1205,9 +1202,13 @@
 		free_cpumask_var(data->related_cpus);
 		free_cpumask_var(data->cpus);
 		kfree(data);
-	} else if (cpufreq_driver->target) {
-		__cpufreq_governor(data, CPUFREQ_GOV_START);
-		__cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
+	} else {
+		pr_debug("%s: removing link, cpu: %d\n", __func__, cpu);
+		cpufreq_cpu_put(data);
+		if (cpufreq_driver->target) {
+			__cpufreq_governor(data, CPUFREQ_GOV_START);
+			__cpufreq_governor(data, CPUFREQ_GOV_LIMITS);
+		}
 	}
 
 	per_cpu(cpufreq_policy_cpu, cpu) = -1;
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 0ceb2ef..f97cb3d 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -221,8 +221,8 @@
 	return count;
 }
 
-static ssize_t store_ignore_nice(struct dbs_data *dbs_data, const char *buf,
-		size_t count)
+static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data,
+		const char *buf, size_t count)
 {
 	struct cs_dbs_tuners *cs_tuners = dbs_data->tuners;
 	unsigned int input, j;
@@ -235,10 +235,10 @@
 	if (input > 1)
 		input = 1;
 
-	if (input == cs_tuners->ignore_nice) /* nothing to do */
+	if (input == cs_tuners->ignore_nice_load) /* nothing to do */
 		return count;
 
-	cs_tuners->ignore_nice = input;
+	cs_tuners->ignore_nice_load = input;
 
 	/* we need to re-evaluate prev_cpu_idle */
 	for_each_online_cpu(j) {
@@ -246,7 +246,7 @@
 		dbs_info = &per_cpu(cs_cpu_dbs_info, j);
 		dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j,
 					&dbs_info->cdbs.prev_cpu_wall, 0);
-		if (cs_tuners->ignore_nice)
+		if (cs_tuners->ignore_nice_load)
 			dbs_info->cdbs.prev_cpu_nice =
 				kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 	}
@@ -279,7 +279,7 @@
 show_store_one(cs, sampling_down_factor);
 show_store_one(cs, up_threshold);
 show_store_one(cs, down_threshold);
-show_store_one(cs, ignore_nice);
+show_store_one(cs, ignore_nice_load);
 show_store_one(cs, freq_step);
 declare_show_sampling_rate_min(cs);
 
@@ -287,7 +287,7 @@
 gov_sys_pol_attr_rw(sampling_down_factor);
 gov_sys_pol_attr_rw(up_threshold);
 gov_sys_pol_attr_rw(down_threshold);
-gov_sys_pol_attr_rw(ignore_nice);
+gov_sys_pol_attr_rw(ignore_nice_load);
 gov_sys_pol_attr_rw(freq_step);
 gov_sys_pol_attr_ro(sampling_rate_min);
 
@@ -297,7 +297,7 @@
 	&sampling_down_factor_gov_sys.attr,
 	&up_threshold_gov_sys.attr,
 	&down_threshold_gov_sys.attr,
-	&ignore_nice_gov_sys.attr,
+	&ignore_nice_load_gov_sys.attr,
 	&freq_step_gov_sys.attr,
 	NULL
 };
@@ -313,7 +313,7 @@
 	&sampling_down_factor_gov_pol.attr,
 	&up_threshold_gov_pol.attr,
 	&down_threshold_gov_pol.attr,
-	&ignore_nice_gov_pol.attr,
+	&ignore_nice_load_gov_pol.attr,
 	&freq_step_gov_pol.attr,
 	NULL
 };
@@ -338,7 +338,7 @@
 	tuners->up_threshold = DEF_FREQUENCY_UP_THRESHOLD;
 	tuners->down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD;
 	tuners->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR;
-	tuners->ignore_nice = 0;
+	tuners->ignore_nice_load = 0;
 	tuners->freq_step = DEF_FREQUENCY_STEP;
 
 	dbs_data->tuners = tuners;
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 7b839a8..e59afaa 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -47,9 +47,9 @@
 	unsigned int j;
 
 	if (dbs_data->cdata->governor == GOV_ONDEMAND)
-		ignore_nice = od_tuners->ignore_nice;
+		ignore_nice = od_tuners->ignore_nice_load;
 	else
-		ignore_nice = cs_tuners->ignore_nice;
+		ignore_nice = cs_tuners->ignore_nice_load;
 
 	policy = cdbs->cur_policy;
 
@@ -298,12 +298,12 @@
 		cs_tuners = dbs_data->tuners;
 		cs_dbs_info = dbs_data->cdata->get_cpu_dbs_info_s(cpu);
 		sampling_rate = cs_tuners->sampling_rate;
-		ignore_nice = cs_tuners->ignore_nice;
+		ignore_nice = cs_tuners->ignore_nice_load;
 	} else {
 		od_tuners = dbs_data->tuners;
 		od_dbs_info = dbs_data->cdata->get_cpu_dbs_info_s(cpu);
 		sampling_rate = od_tuners->sampling_rate;
-		ignore_nice = od_tuners->ignore_nice;
+		ignore_nice = od_tuners->ignore_nice_load;
 		od_ops = dbs_data->cdata->gov_ops;
 		io_busy = od_tuners->io_is_busy;
 	}
diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h
index 6663ec3..d5f12b4 100644
--- a/drivers/cpufreq/cpufreq_governor.h
+++ b/drivers/cpufreq/cpufreq_governor.h
@@ -165,7 +165,7 @@
 
 /* Per policy Governers sysfs tunables */
 struct od_dbs_tuners {
-	unsigned int ignore_nice;
+	unsigned int ignore_nice_load;
 	unsigned int sampling_rate;
 	unsigned int sampling_down_factor;
 	unsigned int up_threshold;
@@ -175,7 +175,7 @@
 };
 
 struct cs_dbs_tuners {
-	unsigned int ignore_nice;
+	unsigned int ignore_nice_load;
 	unsigned int sampling_rate;
 	unsigned int sampling_down_factor;
 	unsigned int up_threshold;
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index 93eb5cb..c087347 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -403,8 +403,8 @@
 	return count;
 }
 
-static ssize_t store_ignore_nice(struct dbs_data *dbs_data, const char *buf,
-		size_t count)
+static ssize_t store_ignore_nice_load(struct dbs_data *dbs_data,
+		const char *buf, size_t count)
 {
 	struct od_dbs_tuners *od_tuners = dbs_data->tuners;
 	unsigned int input;
@@ -419,10 +419,10 @@
 	if (input > 1)
 		input = 1;
 
-	if (input == od_tuners->ignore_nice) { /* nothing to do */
+	if (input == od_tuners->ignore_nice_load) { /* nothing to do */
 		return count;
 	}
-	od_tuners->ignore_nice = input;
+	od_tuners->ignore_nice_load = input;
 
 	/* we need to re-evaluate prev_cpu_idle */
 	for_each_online_cpu(j) {
@@ -430,7 +430,7 @@
 		dbs_info = &per_cpu(od_cpu_dbs_info, j);
 		dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j,
 			&dbs_info->cdbs.prev_cpu_wall, od_tuners->io_is_busy);
-		if (od_tuners->ignore_nice)
+		if (od_tuners->ignore_nice_load)
 			dbs_info->cdbs.prev_cpu_nice =
 				kcpustat_cpu(j).cpustat[CPUTIME_NICE];
 
@@ -461,7 +461,7 @@
 show_store_one(od, io_is_busy);
 show_store_one(od, up_threshold);
 show_store_one(od, sampling_down_factor);
-show_store_one(od, ignore_nice);
+show_store_one(od, ignore_nice_load);
 show_store_one(od, powersave_bias);
 declare_show_sampling_rate_min(od);
 
@@ -469,7 +469,7 @@
 gov_sys_pol_attr_rw(io_is_busy);
 gov_sys_pol_attr_rw(up_threshold);
 gov_sys_pol_attr_rw(sampling_down_factor);
-gov_sys_pol_attr_rw(ignore_nice);
+gov_sys_pol_attr_rw(ignore_nice_load);
 gov_sys_pol_attr_rw(powersave_bias);
 gov_sys_pol_attr_ro(sampling_rate_min);
 
@@ -478,7 +478,7 @@
 	&sampling_rate_gov_sys.attr,
 	&up_threshold_gov_sys.attr,
 	&sampling_down_factor_gov_sys.attr,
-	&ignore_nice_gov_sys.attr,
+	&ignore_nice_load_gov_sys.attr,
 	&powersave_bias_gov_sys.attr,
 	&io_is_busy_gov_sys.attr,
 	NULL
@@ -494,7 +494,7 @@
 	&sampling_rate_gov_pol.attr,
 	&up_threshold_gov_pol.attr,
 	&sampling_down_factor_gov_pol.attr,
-	&ignore_nice_gov_pol.attr,
+	&ignore_nice_load_gov_pol.attr,
 	&powersave_bias_gov_pol.attr,
 	&io_is_busy_gov_pol.attr,
 	NULL
@@ -544,7 +544,7 @@
 	}
 
 	tuners->sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR;
-	tuners->ignore_nice = 0;
+	tuners->ignore_nice_load = 0;
 	tuners->powersave_bias = default_powersave_bias;
 	tuners->io_is_busy = should_io_be_busy();
 
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index b012d76..7cde885 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -103,10 +103,10 @@
 static struct pstate_adjust_policy default_policy = {
 	.sample_rate_ms = 10,
 	.deadband = 0,
-	.setpoint = 109,
-	.p_gain_pct = 17,
+	.setpoint = 97,
+	.p_gain_pct = 20,
 	.d_gain_pct = 0,
-	.i_gain_pct = 4,
+	.i_gain_pct = 0,
 };
 
 struct perf_limits {
@@ -468,12 +468,12 @@
 static inline int intel_pstate_get_scaled_busy(struct cpudata *cpu)
 {
 	int32_t busy_scaled;
-	int32_t core_busy, turbo_pstate, current_pstate;
+	int32_t core_busy, max_pstate, current_pstate;
 
 	core_busy = int_tofp(cpu->samples[cpu->sample_ptr].core_pct_busy);
-	turbo_pstate = int_tofp(cpu->pstate.turbo_pstate);
+	max_pstate = int_tofp(cpu->pstate.max_pstate);
 	current_pstate = int_tofp(cpu->pstate.current_pstate);
-	busy_scaled = mul_fp(core_busy, div_fp(turbo_pstate, current_pstate));
+	busy_scaled = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
 
 	return fp_toint(busy_scaled);
 }
diff --git a/drivers/cpufreq/loongson2_cpufreq.c b/drivers/cpufreq/loongson2_cpufreq.c
index bb838b9..9536852c 100644
--- a/drivers/cpufreq/loongson2_cpufreq.c
+++ b/drivers/cpufreq/loongson2_cpufreq.c
@@ -118,11 +118,6 @@
 		clk_put(cpuclk);
 		return -EINVAL;
 	}
-	ret = clk_set_rate(cpuclk, rate);
-	if (ret) {
-		clk_put(cpuclk);
-		return ret;
-	}
 
 	/* clock table init */
 	for (i = 2;
@@ -130,6 +125,12 @@
 	     i++)
 		loongson2_clockmod_table[i].frequency = (rate * i) / 8;
 
+	ret = clk_set_rate(cpuclk, rate);
+	if (ret) {
+		clk_put(cpuclk);
+		return ret;
+	}
+
 	policy->cur = loongson2_cpufreq_get(policy->cpu);
 
 	cpufreq_frequency_table_get_attr(&loongson2_clockmod_table[0],
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index fe343a0..bc580b6 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -28,13 +28,6 @@
 #define MAX_INTERESTING 50000
 #define STDDEV_THRESH 400
 
-/* 60 * 60 > STDDEV_THRESH * INTERVALS = 400 * 8 */
-#define MAX_DEVIATION 60
-
-static DEFINE_PER_CPU(struct hrtimer, menu_hrtimer);
-static DEFINE_PER_CPU(int, hrtimer_status);
-/* menu hrtimer mode */
-enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT, MENU_HRTIMER_GENERAL};
 
 /*
  * Concepts and ideas behind the menu governor
@@ -116,13 +109,6 @@
  *
  */
 
-/*
- * The C-state residency is so long that is is worthwhile to exit
- * from the shallow C-state and re-enter into a deeper C-state.
- */
-static unsigned int perfect_cstate_ms __read_mostly = 30;
-module_param(perfect_cstate_ms, uint, 0000);
-
 struct menu_device {
 	int		last_state_idx;
 	int             needs_update;
@@ -205,52 +191,17 @@
 	return div_u64(dividend + (divisor / 2), divisor);
 }
 
-/* Cancel the hrtimer if it is not triggered yet */
-void menu_hrtimer_cancel(void)
-{
-	int cpu = smp_processor_id();
-	struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu);
-
-	/* The timer is still not time out*/
-	if (per_cpu(hrtimer_status, cpu)) {
-		hrtimer_cancel(hrtmr);
-		per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP;
-	}
-}
-EXPORT_SYMBOL_GPL(menu_hrtimer_cancel);
-
-/* Call back for hrtimer is triggered */
-static enum hrtimer_restart menu_hrtimer_notify(struct hrtimer *hrtimer)
-{
-	int cpu = smp_processor_id();
-	struct menu_device *data = &per_cpu(menu_devices, cpu);
-
-	/* In general case, the expected residency is much larger than
-	 *  deepest C-state target residency, but prediction logic still
-	 *  predicts a small predicted residency, so the prediction
-	 *  history is totally broken if the timer is triggered.
-	 *  So reset the correction factor.
-	 */
-	if (per_cpu(hrtimer_status, cpu) == MENU_HRTIMER_GENERAL)
-		data->correction_factor[data->bucket] = RESOLUTION * DECAY;
-
-	per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP;
-
-	return HRTIMER_NORESTART;
-}
-
 /*
  * Try detecting repeating patterns by keeping track of the last 8
  * intervals, and checking if the standard deviation of that set
  * of points is below a threshold. If it is... then use the
  * average of these 8 points as the estimated value.
  */
-static u32 get_typical_interval(struct menu_device *data)
+static void get_typical_interval(struct menu_device *data)
 {
 	int i = 0, divisor = 0;
 	uint64_t max = 0, avg = 0, stddev = 0;
 	int64_t thresh = LLONG_MAX; /* Discard outliers above this value. */
-	unsigned int ret = 0;
 
 again:
 
@@ -291,16 +242,13 @@
 	if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3))
 							|| stddev <= 20) {
 		data->predicted_us = avg;
-		ret = 1;
-		return ret;
+		return;
 
 	} else if ((divisor * 4) > INTERVALS * 3) {
 		/* Exclude the max interval */
 		thresh = max - 1;
 		goto again;
 	}
-
-	return ret;
 }
 
 /**
@@ -315,9 +263,6 @@
 	int i;
 	int multiplier;
 	struct timespec t;
-	int repeat = 0, low_predicted = 0;
-	int cpu = smp_processor_id();
-	struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu);
 
 	if (data->needs_update) {
 		menu_update(drv, dev);
@@ -352,7 +297,7 @@
 	data->predicted_us = div_round64(data->expected_us * data->correction_factor[data->bucket],
 					 RESOLUTION * DECAY);
 
-	repeat = get_typical_interval(data);
+	get_typical_interval(data);
 
 	/*
 	 * We want to default to C1 (hlt), not to busy polling
@@ -373,10 +318,8 @@
 
 		if (s->disabled || su->disable)
 			continue;
-		if (s->target_residency > data->predicted_us) {
-			low_predicted = 1;
+		if (s->target_residency > data->predicted_us)
 			continue;
-		}
 		if (s->exit_latency > latency_req)
 			continue;
 		if (s->exit_latency * multiplier > data->predicted_us)
@@ -386,44 +329,6 @@
 		data->exit_us = s->exit_latency;
 	}
 
-	/* not deepest C-state chosen for low predicted residency */
-	if (low_predicted) {
-		unsigned int timer_us = 0;
-		unsigned int perfect_us = 0;
-
-		/*
-		 * Set a timer to detect whether this sleep is much
-		 * longer than repeat mode predicted.  If the timer
-		 * triggers, the code will evaluate whether to put
-		 * the CPU into a deeper C-state.
-		 * The timer is cancelled on CPU wakeup.
-		 */
-		timer_us = 2 * (data->predicted_us + MAX_DEVIATION);
-
-		perfect_us = perfect_cstate_ms * 1000;
-
-		if (repeat && (4 * timer_us < data->expected_us)) {
-			RCU_NONIDLE(hrtimer_start(hrtmr,
-				ns_to_ktime(1000 * timer_us),
-				HRTIMER_MODE_REL_PINNED));
-			/* In repeat case, menu hrtimer is started */
-			per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_REPEAT;
-		} else if (perfect_us < data->expected_us) {
-			/*
-			 * The next timer is long. This could be because
-			 * we did not make a useful prediction.
-			 * In that case, it makes sense to re-enter
-			 * into a deeper C-state after some time.
-			 */
-			RCU_NONIDLE(hrtimer_start(hrtmr,
-				ns_to_ktime(1000 * timer_us),
-				HRTIMER_MODE_REL_PINNED));
-			/* In general case, menu hrtimer is started */
-			per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_GENERAL;
-		}
-
-	}
-
 	return data->last_state_idx;
 }
 
@@ -514,9 +419,6 @@
 				struct cpuidle_device *dev)
 {
 	struct menu_device *data = &per_cpu(menu_devices, dev->cpu);
-	struct hrtimer *t = &per_cpu(menu_hrtimer, dev->cpu);
-	hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	t->function = menu_hrtimer_notify;
 
 	memset(data, 0, sizeof(struct menu_device));
 
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index 5996521..84573b4 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -429,7 +429,7 @@
 	dma_addr_t src_dma, dst_dma;
 	int ret = 0;
 
-	desc = kmalloc(CAAM_CMD_SZ * 6 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA);
+	desc = kmalloc(CAAM_CMD_SZ * 8 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA);
 	if (!desc) {
 		dev_err(jrdev, "unable to allocate key input memory\n");
 		return -ENOMEM;
diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c
index ce3dc3e..0bbdea50 100644
--- a/drivers/dma/pch_dma.c
+++ b/drivers/dma/pch_dma.c
@@ -867,6 +867,7 @@
 
 	if (!(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) {
 		dev_err(&pdev->dev, "Cannot find proper base address\n");
+		err = -ENODEV;
 		goto err_disable_pdev;
 	}
 
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 593827b..fa645d8 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -2505,6 +2505,10 @@
 	/* Assign cookies to all nodes */
 	while (!list_empty(&last->node)) {
 		desc = list_entry(last->node.next, struct dma_pl330_desc, node);
+		if (pch->cyclic) {
+			desc->txd.callback = last->txd.callback;
+			desc->txd.callback_param = last->txd.callback_param;
+		}
 
 		dma_cookie_assign(&desc->txd);
 
@@ -2688,45 +2692,82 @@
 		size_t period_len, enum dma_transfer_direction direction,
 		unsigned long flags, void *context)
 {
-	struct dma_pl330_desc *desc;
+	struct dma_pl330_desc *desc = NULL, *first = NULL;
 	struct dma_pl330_chan *pch = to_pchan(chan);
+	struct dma_pl330_dmac *pdmac = pch->dmac;
+	unsigned int i;
 	dma_addr_t dst;
 	dma_addr_t src;
 
-	desc = pl330_get_desc(pch);
-	if (!desc) {
-		dev_err(pch->dmac->pif.dev, "%s:%d Unable to fetch desc\n",
-			__func__, __LINE__);
+	if (len % period_len != 0)
 		return NULL;
-	}
 
-	switch (direction) {
-	case DMA_MEM_TO_DEV:
-		desc->rqcfg.src_inc = 1;
-		desc->rqcfg.dst_inc = 0;
-		desc->req.rqtype = MEMTODEV;
-		src = dma_addr;
-		dst = pch->fifo_addr;
-		break;
-	case DMA_DEV_TO_MEM:
-		desc->rqcfg.src_inc = 0;
-		desc->rqcfg.dst_inc = 1;
-		desc->req.rqtype = DEVTOMEM;
-		src = pch->fifo_addr;
-		dst = dma_addr;
-		break;
-	default:
+	if (!is_slave_direction(direction)) {
 		dev_err(pch->dmac->pif.dev, "%s:%d Invalid dma direction\n",
 		__func__, __LINE__);
 		return NULL;
 	}
 
-	desc->rqcfg.brst_size = pch->burst_sz;
-	desc->rqcfg.brst_len = 1;
+	for (i = 0; i < len / period_len; i++) {
+		desc = pl330_get_desc(pch);
+		if (!desc) {
+			dev_err(pch->dmac->pif.dev, "%s:%d Unable to fetch desc\n",
+				__func__, __LINE__);
+
+			if (!first)
+				return NULL;
+
+			spin_lock_irqsave(&pdmac->pool_lock, flags);
+
+			while (!list_empty(&first->node)) {
+				desc = list_entry(first->node.next,
+						struct dma_pl330_desc, node);
+				list_move_tail(&desc->node, &pdmac->desc_pool);
+			}
+
+			list_move_tail(&first->node, &pdmac->desc_pool);
+
+			spin_unlock_irqrestore(&pdmac->pool_lock, flags);
+
+			return NULL;
+		}
+
+		switch (direction) {
+		case DMA_MEM_TO_DEV:
+			desc->rqcfg.src_inc = 1;
+			desc->rqcfg.dst_inc = 0;
+			desc->req.rqtype = MEMTODEV;
+			src = dma_addr;
+			dst = pch->fifo_addr;
+			break;
+		case DMA_DEV_TO_MEM:
+			desc->rqcfg.src_inc = 0;
+			desc->rqcfg.dst_inc = 1;
+			desc->req.rqtype = DEVTOMEM;
+			src = pch->fifo_addr;
+			dst = dma_addr;
+			break;
+		default:
+			break;
+		}
+
+		desc->rqcfg.brst_size = pch->burst_sz;
+		desc->rqcfg.brst_len = 1;
+		fill_px(&desc->px, dst, src, period_len);
+
+		if (!first)
+			first = desc;
+		else
+			list_add_tail(&desc->node, &first->node);
+
+		dma_addr += period_len;
+	}
+
+	if (!desc)
+		return NULL;
 
 	pch->cyclic = true;
-
-	fill_px(&desc->px, dst, src, period_len);
+	desc->txd.flags = flags;
 
 	return &desc->txd;
 }
diff --git a/drivers/dma/sh/shdma.c b/drivers/dma/sh/shdma.c
index b67f45f..5039fbc 100644
--- a/drivers/dma/sh/shdma.c
+++ b/drivers/dma/sh/shdma.c
@@ -400,8 +400,8 @@
 						    shdma_chan);
 	struct sh_dmae_desc *sh_desc = container_of(sdesc,
 					struct sh_dmae_desc, shdma_desc);
-	return (sh_desc->hw.tcr - sh_dmae_readl(sh_chan, TCR)) <<
-		sh_chan->xmit_shift;
+	return sh_desc->hw.tcr -
+		(sh_dmae_readl(sh_chan, TCR) << sh_chan->xmit_shift);
 }
 
 /* Called from error IRQ or NMI */
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 27e86d9..89e1090 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -48,6 +48,8 @@
  */
 static void const *edac_mc_owner;
 
+static struct bus_type mc_bus[EDAC_MAX_MCS];
+
 unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
 			         unsigned len)
 {
@@ -723,6 +725,11 @@
 	int ret = -EINVAL;
 	edac_dbg(0, "\n");
 
+	if (mci->mc_idx >= EDAC_MAX_MCS) {
+		pr_warn_once("Too many memory controllers: %d\n", mci->mc_idx);
+		return -ENODEV;
+	}
+
 #ifdef CONFIG_EDAC_DEBUG
 	if (edac_debug_level >= 3)
 		edac_mc_dump_mci(mci);
@@ -762,6 +769,8 @@
 	/* set load time so that error rate can be tracked */
 	mci->start_time = jiffies;
 
+	mci->bus = &mc_bus[mci->mc_idx];
+
 	if (edac_create_sysfs_mci_device(mci)) {
 		edac_mc_printk(mci, KERN_WARNING,
 			"failed to create sysfs device\n");
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index ef15a7e..e7c32c4 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -370,7 +370,7 @@
 		return -ENODEV;
 
 	csrow->dev.type = &csrow_attr_type;
-	csrow->dev.bus = &mci->bus;
+	csrow->dev.bus = mci->bus;
 	device_initialize(&csrow->dev);
 	csrow->dev.parent = &mci->dev;
 	csrow->mci = mci;
@@ -605,7 +605,7 @@
 	dimm->mci = mci;
 
 	dimm->dev.type = &dimm_attr_type;
-	dimm->dev.bus = &mci->bus;
+	dimm->dev.bus = mci->bus;
 	device_initialize(&dimm->dev);
 
 	dimm->dev.parent = &mci->dev;
@@ -975,11 +975,13 @@
 	 * The memory controller needs its own bus, in order to avoid
 	 * namespace conflicts at /sys/bus/edac.
 	 */
-	mci->bus.name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx);
-	if (!mci->bus.name)
+	mci->bus->name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx);
+	if (!mci->bus->name)
 		return -ENOMEM;
-	edac_dbg(0, "creating bus %s\n", mci->bus.name);
-	err = bus_register(&mci->bus);
+
+	edac_dbg(0, "creating bus %s\n", mci->bus->name);
+
+	err = bus_register(mci->bus);
 	if (err < 0)
 		return err;
 
@@ -988,7 +990,7 @@
 	device_initialize(&mci->dev);
 
 	mci->dev.parent = mci_pdev;
-	mci->dev.bus = &mci->bus;
+	mci->dev.bus = mci->bus;
 	dev_set_name(&mci->dev, "mc%d", mci->mc_idx);
 	dev_set_drvdata(&mci->dev, mci);
 	pm_runtime_forbid(&mci->dev);
@@ -997,8 +999,8 @@
 	err = device_add(&mci->dev);
 	if (err < 0) {
 		edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev));
-		bus_unregister(&mci->bus);
-		kfree(mci->bus.name);
+		bus_unregister(mci->bus);
+		kfree(mci->bus->name);
 		return err;
 	}
 
@@ -1064,8 +1066,8 @@
 	}
 fail2:
 	device_unregister(&mci->dev);
-	bus_unregister(&mci->bus);
-	kfree(mci->bus.name);
+	bus_unregister(mci->bus);
+	kfree(mci->bus->name);
 	return err;
 }
 
@@ -1098,8 +1100,8 @@
 {
 	edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev));
 	device_unregister(&mci->dev);
-	bus_unregister(&mci->bus);
-	kfree(mci->bus.name);
+	bus_unregister(mci->bus);
+	kfree(mci->bus->name);
 }
 
 static void mc_attr_release(struct device *dev)
diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index 1b63517..157b934 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -974,7 +974,7 @@
 	if (!i5100_debugfs)
 		return -ENODEV;
 
-	priv->debugfs = debugfs_create_dir(mci->bus.name, i5100_debugfs);
+	priv->debugfs = debugfs_create_dir(mci->bus->name, i5100_debugfs);
 
 	if (!priv->debugfs)
 		return -ENOMEM;
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 7ef316f..ac1b43a 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -54,6 +54,7 @@
 #define FW_CDEV_KERNEL_VERSION			5
 #define FW_CDEV_VERSION_EVENT_REQUEST2		4
 #define FW_CDEV_VERSION_ALLOCATE_REGION_END	4
+#define FW_CDEV_VERSION_AUTO_FLUSH_ISO_OVERFLOW	5
 
 struct client {
 	u32 version;
@@ -1005,6 +1006,8 @@
 			a->channel, a->speed, a->header_size, cb, client);
 	if (IS_ERR(context))
 		return PTR_ERR(context);
+	if (client->version < FW_CDEV_VERSION_AUTO_FLUSH_ISO_OVERFLOW)
+		context->drop_overflow_headers = true;
 
 	/* We only support one context at this time. */
 	spin_lock_irq(&client->lock);
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index 9e1db64..afb701e 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -2749,8 +2749,11 @@
 {
 	u32 *ctx_hdr;
 
-	if (ctx->header_length + ctx->base.header_size > PAGE_SIZE)
+	if (ctx->header_length + ctx->base.header_size > PAGE_SIZE) {
+		if (ctx->base.drop_overflow_headers)
+			return;
 		flush_iso_completions(ctx);
+	}
 
 	ctx_hdr = ctx->header + ctx->header_length;
 	ctx->last_timestamp = (u16)le32_to_cpu((__force __le32)dma_hdr[0]);
@@ -2910,8 +2913,11 @@
 
 	sync_it_packet_for_cpu(context, d);
 
-	if (ctx->header_length + 4 > PAGE_SIZE)
+	if (ctx->header_length + 4 > PAGE_SIZE) {
+		if (ctx->base.drop_overflow_headers)
+			return 1;
 		flush_iso_completions(ctx);
+	}
 
 	ctx_hdr = ctx->header + ctx->header_length;
 	ctx->last_timestamp = le16_to_cpu(last->res_count);
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index eb760a2..232fa8f 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -419,6 +419,13 @@
 			    dmi_get_system_info(DMI_BIOS_DATE));
 }
 
+/*
+ * Check for DMI/SMBIOS headers in the system firmware image.  Any
+ * SMBIOS header must start 16 bytes before the DMI header, so take a
+ * 32 byte buffer and check for DMI at offset 16 and SMBIOS at offset
+ * 0.  If the DMI header is present, set dmi_ver accordingly (SMBIOS
+ * takes precedence) and return 0.  Otherwise return 1.
+ */
 static int __init dmi_present(const u8 *buf)
 {
 	int smbios_ver;
@@ -506,6 +513,13 @@
 		if (p == NULL)
 			goto error;
 
+		/*
+		 * Iterate over all possible DMI header addresses q.
+		 * Maintain the 32 bytes around q in buf.  On the
+		 * first iteration, substitute zero for the
+		 * out-of-range bytes so there is no chance of falsely
+		 * detecting an SMBIOS header.
+		 */
 		memset(buf, 0, 16);
 		for (q = p; q < p + 0x10000; q += 16) {
 			memcpy_fromio(buf + 16, q, 16);
diff --git a/drivers/gpio/gpio-msm-v1.c b/drivers/gpio/gpio-msm-v1.c
index e3ceaac..73b7396 100644
--- a/drivers/gpio/gpio-msm-v1.c
+++ b/drivers/gpio/gpio-msm-v1.c
@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/device.h>
 #include <linux/platform_device.h>
+#include <linux/err.h>
 
 #include <mach/msm_gpiomux.h>
 
diff --git a/drivers/gpio/gpio-msm-v2.c b/drivers/gpio/gpio-msm-v2.c
index f4491a4..c2fa770 100644
--- a/drivers/gpio/gpio-msm-v2.c
+++ b/drivers/gpio/gpio-msm-v2.c
@@ -378,7 +378,7 @@
 	int ret, ngpio;
 	struct resource *res;
 
-	if (!of_property_read_u32(pdev->dev.of_node, "ngpio", &ngpio)) {
+	if (of_property_read_u32(pdev->dev.of_node, "ngpio", &ngpio)) {
 		dev_err(&pdev->dev, "%s: ngpio property missing\n", __func__);
 		return -EINVAL;
 	}
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index a7c54c8..955555d 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -6,7 +6,7 @@
 #
 menuconfig DRM
 	tristate "Direct Rendering Manager (XFree86 4.1.0 and higher DRI support)"
-	depends on (AGP || AGP=n) && !EMULATED_CMPXCHG && MMU
+	depends on (AGP || AGP=n) && !EMULATED_CMPXCHG && MMU && HAS_DMA
 	select HDMI
 	select I2C
 	select I2C_ALGOBIT
@@ -168,6 +168,17 @@
 	  the driver to bind to PCI devices, which precludes loading things
 	  like intelfb.
 
+config DRM_I915_PRELIMINARY_HW_SUPPORT
+	bool "Enable preliminary support for prerelease Intel hardware by default"
+	depends on DRM_I915
+	help
+	  Choose this option if you have prerelease Intel hardware and want the
+	  i915 driver to support it by default.  You can enable such support at
+	  runtime with the module option i915.preliminary_hw_support=1; this
+	  option changes the default for that module option.
+
+	  If in doubt, say "N".
+
 config DRM_MGA
 	tristate "Matrox g200/g400"
 	depends on DRM && PCI
@@ -223,3 +234,5 @@
 source "drivers/gpu/drm/tilcdc/Kconfig"
 
 source "drivers/gpu/drm/qxl/Kconfig"
+
+source "drivers/gpu/drm/msm/Kconfig"
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 801bcaf..f089adf 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -7,13 +7,13 @@
 drm-y       :=	drm_auth.o drm_buffer.o drm_bufs.o drm_cache.o \
 		drm_context.o drm_dma.o \
 		drm_drv.o drm_fops.o drm_gem.o drm_ioctl.o drm_irq.o \
-		drm_lock.o drm_memory.o drm_proc.o drm_stub.o drm_vm.o \
+		drm_lock.o drm_memory.o drm_stub.o drm_vm.o \
 		drm_agpsupport.o drm_scatter.o drm_pci.o \
 		drm_platform.o drm_sysfs.o drm_hashtab.o drm_mm.o \
 		drm_crtc.o drm_modes.o drm_edid.o \
 		drm_info.o drm_debugfs.o drm_encoder_slave.o \
 		drm_trace_points.o drm_global.o drm_prime.o \
-		drm_rect.o
+		drm_rect.o drm_vma_manager.o drm_flip_work.o
 
 drm-$(CONFIG_COMPAT) += drm_ioc32.o
 drm-$(CONFIG_DRM_GEM_CMA_HELPER) += drm_gem_cma_helper.o
@@ -54,4 +54,5 @@
 obj-$(CONFIG_DRM_OMAP)	+= omapdrm/
 obj-$(CONFIG_DRM_TILCDC)	+= tilcdc/
 obj-$(CONFIG_DRM_QXL) += qxl/
+obj-$(CONFIG_DRM_MSM) += msm/
 obj-y			+= i2c/
diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c
index df0d0a0..32e270d 100644
--- a/drivers/gpu/drm/ast/ast_drv.c
+++ b/drivers/gpu/drm/ast/ast_drv.c
@@ -190,7 +190,6 @@
 	.unlocked_ioctl = drm_ioctl,
 	.mmap = ast_mmap,
 	.poll = drm_poll,
-	.fasync = drm_fasync,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = drm_compat_ioctl,
 #endif
@@ -198,7 +197,7 @@
 };
 
 static struct drm_driver driver = {
-	.driver_features = DRIVER_USE_MTRR | DRIVER_MODESET | DRIVER_GEM,
+	.driver_features = DRIVER_MODESET | DRIVER_GEM,
 	.dev_priv_size = 0,
 
 	.load = ast_driver_load,
@@ -216,7 +215,7 @@
 	.gem_free_object = ast_gem_free_object,
 	.dumb_create = ast_dumb_create,
 	.dumb_map_offset = ast_dumb_mmap_offset,
-	.dumb_destroy = ast_dumb_destroy,
+	.dumb_destroy = drm_gem_dumb_destroy,
 
 };
 
diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h
index 622d4ae..796dbb2 100644
--- a/drivers/gpu/drm/ast/ast_drv.h
+++ b/drivers/gpu/drm/ast/ast_drv.h
@@ -322,9 +322,6 @@
 extern int ast_dumb_create(struct drm_file *file,
 			   struct drm_device *dev,
 			   struct drm_mode_create_dumb *args);
-extern int ast_dumb_destroy(struct drm_file *file,
-			    struct drm_device *dev,
-			    uint32_t handle);
 
 extern int ast_gem_init_object(struct drm_gem_object *obj);
 extern void ast_gem_free_object(struct drm_gem_object *obj);
diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index f60fd7b..7f6152d 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -449,13 +449,6 @@
 	return 0;
 }
 
-int ast_dumb_destroy(struct drm_file *file,
-		     struct drm_device *dev,
-		     uint32_t handle)
-{
-	return drm_gem_handle_delete(file, handle);
-}
-
 int ast_gem_init_object(struct drm_gem_object *obj)
 {
 	BUG();
@@ -487,7 +480,7 @@
 
 static inline u64 ast_bo_mmap_offset(struct ast_bo *bo)
 {
-	return bo->bo.addr_space_offset;
+	return drm_vma_node_offset_addr(&bo->bo.vma_node);
 }
 int
 ast_dumb_mmap_offset(struct drm_file *file,
diff --git a/drivers/gpu/drm/ast/ast_ttm.c b/drivers/gpu/drm/ast/ast_ttm.c
index 98d6708..32aecb3 100644
--- a/drivers/gpu/drm/ast/ast_ttm.c
+++ b/drivers/gpu/drm/ast/ast_ttm.c
@@ -148,7 +148,9 @@
 
 static int ast_bo_verify_access(struct ttm_buffer_object *bo, struct file *filp)
 {
-	return 0;
+	struct ast_bo *astbo = ast_bo(bo);
+
+	return drm_vma_node_verify_access(&astbo->gem.vma_node, filp);
 }
 
 static int ast_ttm_io_mem_reserve(struct ttm_bo_device *bdev,
@@ -321,8 +323,8 @@
 		return ret;
 	}
 
-	astbo->gem.driver_private = NULL;
 	astbo->bo.bdev = &ast->ttm.bdev;
+	astbo->bo.bdev->dev_mapping = dev->dev_mapping;
 
 	ast_ttm_placement(astbo, TTM_PL_FLAG_VRAM | TTM_PL_FLAG_SYSTEM);
 
diff --git a/drivers/gpu/drm/cirrus/cirrus_drv.c b/drivers/gpu/drm/cirrus/cirrus_drv.c
index 8ecb601..138364d 100644
--- a/drivers/gpu/drm/cirrus/cirrus_drv.c
+++ b/drivers/gpu/drm/cirrus/cirrus_drv.c
@@ -85,10 +85,9 @@
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = drm_compat_ioctl,
 #endif
-	.fasync = drm_fasync,
 };
 static struct drm_driver driver = {
-	.driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_USE_MTRR,
+	.driver_features = DRIVER_MODESET | DRIVER_GEM,
 	.load = cirrus_driver_load,
 	.unload = cirrus_driver_unload,
 	.fops = &cirrus_driver_fops,
@@ -102,7 +101,7 @@
 	.gem_free_object = cirrus_gem_free_object,
 	.dumb_create = cirrus_dumb_create,
 	.dumb_map_offset = cirrus_dumb_mmap_offset,
-	.dumb_destroy = cirrus_dumb_destroy,
+	.dumb_destroy = drm_gem_dumb_destroy,
 };
 
 static struct pci_driver cirrus_pci_driver = {
diff --git a/drivers/gpu/drm/cirrus/cirrus_drv.h b/drivers/gpu/drm/cirrus/cirrus_drv.h
index bae5560..9b0bb91 100644
--- a/drivers/gpu/drm/cirrus/cirrus_drv.h
+++ b/drivers/gpu/drm/cirrus/cirrus_drv.h
@@ -203,9 +203,6 @@
 int cirrus_dumb_create(struct drm_file *file,
 		    struct drm_device *dev,
 		       struct drm_mode_create_dumb *args);
-int cirrus_dumb_destroy(struct drm_file *file,
-		     struct drm_device *dev,
-			uint32_t handle);
 
 int cirrus_framebuffer_init(struct drm_device *dev,
 			   struct cirrus_framebuffer *gfb,
diff --git a/drivers/gpu/drm/cirrus/cirrus_main.c b/drivers/gpu/drm/cirrus/cirrus_main.c
index 35cbae8..f130a53 100644
--- a/drivers/gpu/drm/cirrus/cirrus_main.c
+++ b/drivers/gpu/drm/cirrus/cirrus_main.c
@@ -255,13 +255,6 @@
 	return 0;
 }
 
-int cirrus_dumb_destroy(struct drm_file *file,
-		     struct drm_device *dev,
-		     uint32_t handle)
-{
-	return drm_gem_handle_delete(file, handle);
-}
-
 int cirrus_gem_init_object(struct drm_gem_object *obj)
 {
 	BUG();
@@ -294,7 +287,7 @@
 
 static inline u64 cirrus_bo_mmap_offset(struct cirrus_bo *bo)
 {
-	return bo->bo.addr_space_offset;
+	return drm_vma_node_offset_addr(&bo->bo.vma_node);
 }
 
 int
diff --git a/drivers/gpu/drm/cirrus/cirrus_ttm.c b/drivers/gpu/drm/cirrus/cirrus_ttm.c
index 0047012..75becde 100644
--- a/drivers/gpu/drm/cirrus/cirrus_ttm.c
+++ b/drivers/gpu/drm/cirrus/cirrus_ttm.c
@@ -148,7 +148,9 @@
 
 static int cirrus_bo_verify_access(struct ttm_buffer_object *bo, struct file *filp)
 {
-	return 0;
+	struct cirrus_bo *cirrusbo = cirrus_bo(bo);
+
+	return drm_vma_node_verify_access(&cirrusbo->gem.vma_node, filp);
 }
 
 static int cirrus_ttm_io_mem_reserve(struct ttm_bo_device *bdev,
@@ -326,8 +328,8 @@
 		return ret;
 	}
 
-	cirrusbo->gem.driver_private = NULL;
 	cirrusbo->bo.bdev = &cirrus->ttm.bdev;
+	cirrusbo->bo.bdev->dev_mapping = dev->dev_mapping;
 
 	cirrus_ttm_placement(cirrusbo, TTM_PL_FLAG_VRAM | TTM_PL_FLAG_SYSTEM);
 
diff --git a/drivers/gpu/drm/drm_agpsupport.c b/drivers/gpu/drm/drm_agpsupport.c
index 3d8fed1..e301d65 100644
--- a/drivers/gpu/drm/drm_agpsupport.c
+++ b/drivers/gpu/drm/drm_agpsupport.c
@@ -424,6 +424,57 @@
 }
 
 /**
+ * drm_agp_clear - Clear AGP resource list
+ * @dev: DRM device
+ *
+ * Iterate over all AGP resources and remove them. But keep the AGP head
+ * intact so it can still be used. It is safe to call this if AGP is disabled or
+ * was already removed.
+ *
+ * If DRIVER_MODESET is active, nothing is done to protect the modesetting
+ * resources from getting destroyed. Drivers are responsible of cleaning them up
+ * during device shutdown.
+ */
+void drm_agp_clear(struct drm_device *dev)
+{
+	struct drm_agp_mem *entry, *tempe;
+
+	if (!drm_core_has_AGP(dev) || !dev->agp)
+		return;
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return;
+
+	list_for_each_entry_safe(entry, tempe, &dev->agp->memory, head) {
+		if (entry->bound)
+			drm_unbind_agp(entry->memory);
+		drm_free_agp(entry->memory, entry->pages);
+		kfree(entry);
+	}
+	INIT_LIST_HEAD(&dev->agp->memory);
+
+	if (dev->agp->acquired)
+		drm_agp_release(dev);
+
+	dev->agp->acquired = 0;
+	dev->agp->enabled = 0;
+}
+
+/**
+ * drm_agp_destroy - Destroy AGP head
+ * @dev: DRM device
+ *
+ * Destroy resources that were previously allocated via drm_agp_initp. Caller
+ * must ensure to clean up all AGP resources before calling this. See
+ * drm_agp_clear().
+ *
+ * Call this to destroy AGP heads allocated via drm_agp_init().
+ */
+void drm_agp_destroy(struct drm_agp_head *agp)
+{
+	kfree(agp);
+}
+
+/**
  * Binds a collection of pages into AGP memory at the given offset, returning
  * the AGP memory structure containing them.
  *
diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c
index 5a4dbb4..471e051d 100644
--- a/drivers/gpu/drm/drm_bufs.c
+++ b/drivers/gpu/drm/drm_bufs.c
@@ -207,12 +207,10 @@
 			return 0;
 		}
 
-		if (drm_core_has_MTRR(dev)) {
-			if (map->type == _DRM_FRAME_BUFFER ||
-			    (map->flags & _DRM_WRITE_COMBINING)) {
-				map->mtrr =
-					arch_phys_wc_add(map->offset, map->size);
-			}
+		if (map->type == _DRM_FRAME_BUFFER ||
+		    (map->flags & _DRM_WRITE_COMBINING)) {
+			map->mtrr =
+				arch_phys_wc_add(map->offset, map->size);
 		}
 		if (map->type == _DRM_REGISTERS) {
 			if (map->flags & _DRM_WRITE_COMBINING)
@@ -243,7 +241,7 @@
 		}
 		map->handle = vmalloc_user(map->size);
 		DRM_DEBUG("%lu %d %p\n",
-			  map->size, drm_order(map->size), map->handle);
+			  map->size, order_base_2(map->size), map->handle);
 		if (!map->handle) {
 			kfree(map);
 			return -ENOMEM;
@@ -464,8 +462,7 @@
 		iounmap(map->handle);
 		/* FALLTHROUGH */
 	case _DRM_FRAME_BUFFER:
-		if (drm_core_has_MTRR(dev))
-			arch_phys_wc_del(map->mtrr);
+		arch_phys_wc_del(map->mtrr);
 		break;
 	case _DRM_SHM:
 		vfree(map->handle);
@@ -630,7 +627,7 @@
 		return -EINVAL;
 
 	count = request->count;
-	order = drm_order(request->size);
+	order = order_base_2(request->size);
 	size = 1 << order;
 
 	alignment = (request->flags & _DRM_PAGE_ALIGN)
@@ -800,7 +797,7 @@
 		return -EPERM;
 
 	count = request->count;
-	order = drm_order(request->size);
+	order = order_base_2(request->size);
 	size = 1 << order;
 
 	DRM_DEBUG("count=%d, size=%d (%d), order=%d\n",
@@ -1002,7 +999,7 @@
 		return -EPERM;
 
 	count = request->count;
-	order = drm_order(request->size);
+	order = order_base_2(request->size);
 	size = 1 << order;
 
 	alignment = (request->flags & _DRM_PAGE_ALIGN)
@@ -1130,161 +1127,6 @@
 	return 0;
 }
 
-static int drm_addbufs_fb(struct drm_device * dev, struct drm_buf_desc * request)
-{
-	struct drm_device_dma *dma = dev->dma;
-	struct drm_buf_entry *entry;
-	struct drm_buf *buf;
-	unsigned long offset;
-	unsigned long agp_offset;
-	int count;
-	int order;
-	int size;
-	int alignment;
-	int page_order;
-	int total;
-	int byte_count;
-	int i;
-	struct drm_buf **temp_buflist;
-
-	if (!drm_core_check_feature(dev, DRIVER_FB_DMA))
-		return -EINVAL;
-
-	if (!dma)
-		return -EINVAL;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
-	count = request->count;
-	order = drm_order(request->size);
-	size = 1 << order;
-
-	alignment = (request->flags & _DRM_PAGE_ALIGN)
-	    ? PAGE_ALIGN(size) : size;
-	page_order = order - PAGE_SHIFT > 0 ? order - PAGE_SHIFT : 0;
-	total = PAGE_SIZE << page_order;
-
-	byte_count = 0;
-	agp_offset = request->agp_start;
-
-	DRM_DEBUG("count:      %d\n", count);
-	DRM_DEBUG("order:      %d\n", order);
-	DRM_DEBUG("size:       %d\n", size);
-	DRM_DEBUG("agp_offset: %lu\n", agp_offset);
-	DRM_DEBUG("alignment:  %d\n", alignment);
-	DRM_DEBUG("page_order: %d\n", page_order);
-	DRM_DEBUG("total:      %d\n", total);
-
-	if (order < DRM_MIN_ORDER || order > DRM_MAX_ORDER)
-		return -EINVAL;
-
-	spin_lock(&dev->count_lock);
-	if (dev->buf_use) {
-		spin_unlock(&dev->count_lock);
-		return -EBUSY;
-	}
-	atomic_inc(&dev->buf_alloc);
-	spin_unlock(&dev->count_lock);
-
-	mutex_lock(&dev->struct_mutex);
-	entry = &dma->bufs[order];
-	if (entry->buf_count) {
-		mutex_unlock(&dev->struct_mutex);
-		atomic_dec(&dev->buf_alloc);
-		return -ENOMEM;	/* May only call once for each order */
-	}
-
-	if (count < 0 || count > 4096) {
-		mutex_unlock(&dev->struct_mutex);
-		atomic_dec(&dev->buf_alloc);
-		return -EINVAL;
-	}
-
-	entry->buflist = kzalloc(count * sizeof(*entry->buflist),
-				GFP_KERNEL);
-	if (!entry->buflist) {
-		mutex_unlock(&dev->struct_mutex);
-		atomic_dec(&dev->buf_alloc);
-		return -ENOMEM;
-	}
-
-	entry->buf_size = size;
-	entry->page_order = page_order;
-
-	offset = 0;
-
-	while (entry->buf_count < count) {
-		buf = &entry->buflist[entry->buf_count];
-		buf->idx = dma->buf_count + entry->buf_count;
-		buf->total = alignment;
-		buf->order = order;
-		buf->used = 0;
-
-		buf->offset = (dma->byte_count + offset);
-		buf->bus_address = agp_offset + offset;
-		buf->address = (void *)(agp_offset + offset);
-		buf->next = NULL;
-		buf->waiting = 0;
-		buf->pending = 0;
-		buf->file_priv = NULL;
-
-		buf->dev_priv_size = dev->driver->dev_priv_size;
-		buf->dev_private = kzalloc(buf->dev_priv_size, GFP_KERNEL);
-		if (!buf->dev_private) {
-			/* Set count correctly so we free the proper amount. */
-			entry->buf_count = count;
-			drm_cleanup_buf_error(dev, entry);
-			mutex_unlock(&dev->struct_mutex);
-			atomic_dec(&dev->buf_alloc);
-			return -ENOMEM;
-		}
-
-		DRM_DEBUG("buffer %d @ %p\n", entry->buf_count, buf->address);
-
-		offset += alignment;
-		entry->buf_count++;
-		byte_count += PAGE_SIZE << page_order;
-	}
-
-	DRM_DEBUG("byte_count: %d\n", byte_count);
-
-	temp_buflist = krealloc(dma->buflist,
-				(dma->buf_count + entry->buf_count) *
-				sizeof(*dma->buflist), GFP_KERNEL);
-	if (!temp_buflist) {
-		/* Free the entry because it isn't valid */
-		drm_cleanup_buf_error(dev, entry);
-		mutex_unlock(&dev->struct_mutex);
-		atomic_dec(&dev->buf_alloc);
-		return -ENOMEM;
-	}
-	dma->buflist = temp_buflist;
-
-	for (i = 0; i < entry->buf_count; i++) {
-		dma->buflist[i + dma->buf_count] = &entry->buflist[i];
-	}
-
-	dma->buf_count += entry->buf_count;
-	dma->seg_count += entry->seg_count;
-	dma->page_count += byte_count >> PAGE_SHIFT;
-	dma->byte_count += byte_count;
-
-	DRM_DEBUG("dma->buf_count : %d\n", dma->buf_count);
-	DRM_DEBUG("entry->buf_count : %d\n", entry->buf_count);
-
-	mutex_unlock(&dev->struct_mutex);
-
-	request->count = entry->buf_count;
-	request->size = size;
-
-	dma->flags = _DRM_DMA_USE_FB;
-
-	atomic_dec(&dev->buf_alloc);
-	return 0;
-}
-
-
 /**
  * Add buffers for DMA transfers (ioctl).
  *
@@ -1305,6 +1147,9 @@
 	struct drm_buf_desc *request = data;
 	int ret;
 
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA))
 		return -EINVAL;
 
@@ -1316,7 +1161,7 @@
 	if (request->flags & _DRM_SG_BUFFER)
 		ret = drm_addbufs_sg(dev, request);
 	else if (request->flags & _DRM_FB_BUFFER)
-		ret = drm_addbufs_fb(dev, request);
+		ret = -EINVAL;
 	else
 		ret = drm_addbufs_pci(dev, request);
 
@@ -1348,6 +1193,9 @@
 	int i;
 	int count;
 
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA))
 		return -EINVAL;
 
@@ -1427,6 +1275,9 @@
 	int order;
 	struct drm_buf_entry *entry;
 
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA))
 		return -EINVAL;
 
@@ -1435,7 +1286,7 @@
 
 	DRM_DEBUG("%d, %d, %d\n",
 		  request->size, request->low_mark, request->high_mark);
-	order = drm_order(request->size);
+	order = order_base_2(request->size);
 	if (order < DRM_MIN_ORDER || order > DRM_MAX_ORDER)
 		return -EINVAL;
 	entry = &dma->bufs[order];
@@ -1472,6 +1323,9 @@
 	int idx;
 	struct drm_buf *buf;
 
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA))
 		return -EINVAL;
 
@@ -1524,6 +1378,9 @@
 	struct drm_buf_map *request = data;
 	int i;
 
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA))
 		return -EINVAL;
 
@@ -1541,9 +1398,7 @@
 	if (request->count >= dma->buf_count) {
 		if ((drm_core_has_AGP(dev) && (dma->flags & _DRM_DMA_USE_AGP))
 		    || (drm_core_check_feature(dev, DRIVER_SG)
-			&& (dma->flags & _DRM_DMA_USE_SG))
-		    || (drm_core_check_feature(dev, DRIVER_FB_DMA)
-			&& (dma->flags & _DRM_DMA_USE_FB))) {
+			&& (dma->flags & _DRM_DMA_USE_SG))) {
 			struct drm_local_map *map = dev->agp_buffer_map;
 			unsigned long token = dev->agp_buffer_token;
 
@@ -1600,25 +1455,28 @@
 	return retcode;
 }
 
-/**
- * Compute size order.  Returns the exponent of the smaller power of two which
- * is greater or equal to given number.
- *
- * \param size size.
- * \return order.
- *
- * \todo Can be made faster.
- */
-int drm_order(unsigned long size)
+int drm_dma_ioctl(struct drm_device *dev, void *data,
+		  struct drm_file *file_priv)
 {
-	int order;
-	unsigned long tmp;
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
 
-	for (order = 0, tmp = size >> 1; tmp; tmp >>= 1, order++) ;
-
-	if (size & (size - 1))
-		++order;
-
-	return order;
+	if (dev->driver->dma_ioctl)
+		return dev->driver->dma_ioctl(dev, data, file_priv);
+	else
+		return -EINVAL;
 }
-EXPORT_SYMBOL(drm_order);
+
+struct drm_local_map *drm_getsarea(struct drm_device *dev)
+{
+	struct drm_map_list *entry;
+
+	list_for_each_entry(entry, &dev->maplist, head) {
+		if (entry->map && entry->map->type == _DRM_SHM &&
+		    (entry->map->flags & _DRM_CONTAINS_LOCK)) {
+			return entry->map;
+		}
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(drm_getsarea);
diff --git a/drivers/gpu/drm/drm_context.c b/drivers/gpu/drm/drm_context.c
index 725968d..b4fb86d 100644
--- a/drivers/gpu/drm/drm_context.c
+++ b/drivers/gpu/drm/drm_context.c
@@ -42,10 +42,6 @@
 
 #include <drm/drmP.h>
 
-/******************************************************************/
-/** \name Context bitmap support */
-/*@{*/
-
 /**
  * Free a handle from the context bitmap.
  *
@@ -56,13 +52,48 @@
  * in drm_device::ctx_idr, while holding the drm_device::struct_mutex
  * lock.
  */
-void drm_ctxbitmap_free(struct drm_device * dev, int ctx_handle)
+static void drm_ctxbitmap_free(struct drm_device * dev, int ctx_handle)
 {
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return;
+
 	mutex_lock(&dev->struct_mutex);
 	idr_remove(&dev->ctx_idr, ctx_handle);
 	mutex_unlock(&dev->struct_mutex);
 }
 
+/******************************************************************/
+/** \name Context bitmap support */
+/*@{*/
+
+void drm_legacy_ctxbitmap_release(struct drm_device *dev,
+				  struct drm_file *file_priv)
+{
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return;
+
+	mutex_lock(&dev->ctxlist_mutex);
+	if (!list_empty(&dev->ctxlist)) {
+		struct drm_ctx_list *pos, *n;
+
+		list_for_each_entry_safe(pos, n, &dev->ctxlist, head) {
+			if (pos->tag == file_priv &&
+			    pos->handle != DRM_KERNEL_CONTEXT) {
+				if (dev->driver->context_dtor)
+					dev->driver->context_dtor(dev,
+								  pos->handle);
+
+				drm_ctxbitmap_free(dev, pos->handle);
+
+				list_del(&pos->head);
+				kfree(pos);
+				--dev->ctx_count;
+			}
+		}
+	}
+	mutex_unlock(&dev->ctxlist_mutex);
+}
+
 /**
  * Context bitmap allocation.
  *
@@ -90,10 +121,12 @@
  *
  * Initialise the drm_device::ctx_idr
  */
-int drm_ctxbitmap_init(struct drm_device * dev)
+void drm_legacy_ctxbitmap_init(struct drm_device * dev)
 {
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return;
+
 	idr_init(&dev->ctx_idr);
-	return 0;
 }
 
 /**
@@ -104,7 +137,7 @@
  * Free all idr members using drm_ctx_sarea_free helper function
  * while holding the drm_device::struct_mutex lock.
  */
-void drm_ctxbitmap_cleanup(struct drm_device * dev)
+void drm_legacy_ctxbitmap_cleanup(struct drm_device * dev)
 {
 	mutex_lock(&dev->struct_mutex);
 	idr_destroy(&dev->ctx_idr);
@@ -136,6 +169,9 @@
 	struct drm_local_map *map;
 	struct drm_map_list *_entry;
 
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->struct_mutex);
 
 	map = idr_find(&dev->ctx_idr, request->ctx_id);
@@ -180,6 +216,9 @@
 	struct drm_local_map *map = NULL;
 	struct drm_map_list *r_list = NULL;
 
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	mutex_lock(&dev->struct_mutex);
 	list_for_each_entry(r_list, &dev->maplist, head) {
 		if (r_list->map
@@ -251,7 +290,6 @@
 				       struct drm_file *file_priv, int new)
 {
 	dev->last_context = new;	/* PRE/POST: This is the _only_ writer. */
-	dev->last_switch = jiffies;
 
 	if (!_DRM_LOCK_IS_HELD(file_priv->master->lock.hw_lock->lock)) {
 		DRM_ERROR("Lock isn't held after context switch\n");
@@ -261,7 +299,6 @@
 	   when the kernel holds the lock, release
 	   that lock here. */
 	clear_bit(0, &dev->context_flag);
-	wake_up(&dev->context_wait);
 
 	return 0;
 }
@@ -282,6 +319,9 @@
 	struct drm_ctx ctx;
 	int i;
 
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	if (res->count >= DRM_RESERVED_CONTEXTS) {
 		memset(&ctx, 0, sizeof(ctx));
 		for (i = 0; i < DRM_RESERVED_CONTEXTS; i++) {
@@ -312,6 +352,9 @@
 	struct drm_ctx_list *ctx_entry;
 	struct drm_ctx *ctx = data;
 
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	ctx->handle = drm_ctxbitmap_next(dev);
 	if (ctx->handle == DRM_KERNEL_CONTEXT) {
 		/* Skip kernel's context and get a new one. */
@@ -342,12 +385,6 @@
 	return 0;
 }
 
-int drm_modctx(struct drm_device *dev, void *data, struct drm_file *file_priv)
-{
-	/* This does nothing */
-	return 0;
-}
-
 /**
  * Get context.
  *
@@ -361,6 +398,9 @@
 {
 	struct drm_ctx *ctx = data;
 
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	/* This is 0, because we don't handle any context flags */
 	ctx->flags = 0;
 
@@ -383,6 +423,9 @@
 {
 	struct drm_ctx *ctx = data;
 
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	DRM_DEBUG("%d\n", ctx->handle);
 	return drm_context_switch(dev, dev->last_context, ctx->handle);
 }
@@ -403,6 +446,9 @@
 {
 	struct drm_ctx *ctx = data;
 
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	DRM_DEBUG("%d\n", ctx->handle);
 	drm_context_switch_complete(dev, file_priv, ctx->handle);
 
@@ -425,6 +471,9 @@
 {
 	struct drm_ctx *ctx = data;
 
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	DRM_DEBUG("%d\n", ctx->handle);
 	if (ctx->handle != DRM_KERNEL_CONTEXT) {
 		if (dev->driver->context_dtor)
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index fc83bb9..bff2fa9 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -125,13 +125,6 @@
 	{ DRM_MODE_SCALE_ASPECT, "Full aspect" },
 };
 
-static const struct drm_prop_enum_list drm_dithering_mode_enum_list[] =
-{
-	{ DRM_MODE_DITHERING_OFF, "Off" },
-	{ DRM_MODE_DITHERING_ON, "On" },
-	{ DRM_MODE_DITHERING_AUTO, "Automatic" },
-};
-
 /*
  * Non-global properties, but "required" for certain connectors.
  */
@@ -186,29 +179,29 @@
 struct drm_conn_prop_enum_list {
 	int type;
 	const char *name;
-	int count;
+	struct ida ida;
 };
 
 /*
  * Connector and encoder types.
  */
 static struct drm_conn_prop_enum_list drm_connector_enum_list[] =
-{	{ DRM_MODE_CONNECTOR_Unknown, "Unknown", 0 },
-	{ DRM_MODE_CONNECTOR_VGA, "VGA", 0 },
-	{ DRM_MODE_CONNECTOR_DVII, "DVI-I", 0 },
-	{ DRM_MODE_CONNECTOR_DVID, "DVI-D", 0 },
-	{ DRM_MODE_CONNECTOR_DVIA, "DVI-A", 0 },
-	{ DRM_MODE_CONNECTOR_Composite, "Composite", 0 },
-	{ DRM_MODE_CONNECTOR_SVIDEO, "SVIDEO", 0 },
-	{ DRM_MODE_CONNECTOR_LVDS, "LVDS", 0 },
-	{ DRM_MODE_CONNECTOR_Component, "Component", 0 },
-	{ DRM_MODE_CONNECTOR_9PinDIN, "DIN", 0 },
-	{ DRM_MODE_CONNECTOR_DisplayPort, "DP", 0 },
-	{ DRM_MODE_CONNECTOR_HDMIA, "HDMI-A", 0 },
-	{ DRM_MODE_CONNECTOR_HDMIB, "HDMI-B", 0 },
-	{ DRM_MODE_CONNECTOR_TV, "TV", 0 },
-	{ DRM_MODE_CONNECTOR_eDP, "eDP", 0 },
-	{ DRM_MODE_CONNECTOR_VIRTUAL, "Virtual", 0},
+{	{ DRM_MODE_CONNECTOR_Unknown, "Unknown" },
+	{ DRM_MODE_CONNECTOR_VGA, "VGA" },
+	{ DRM_MODE_CONNECTOR_DVII, "DVI-I" },
+	{ DRM_MODE_CONNECTOR_DVID, "DVI-D" },
+	{ DRM_MODE_CONNECTOR_DVIA, "DVI-A" },
+	{ DRM_MODE_CONNECTOR_Composite, "Composite" },
+	{ DRM_MODE_CONNECTOR_SVIDEO, "SVIDEO" },
+	{ DRM_MODE_CONNECTOR_LVDS, "LVDS" },
+	{ DRM_MODE_CONNECTOR_Component, "Component" },
+	{ DRM_MODE_CONNECTOR_9PinDIN, "DIN" },
+	{ DRM_MODE_CONNECTOR_DisplayPort, "DP" },
+	{ DRM_MODE_CONNECTOR_HDMIA, "HDMI-A" },
+	{ DRM_MODE_CONNECTOR_HDMIB, "HDMI-B" },
+	{ DRM_MODE_CONNECTOR_TV, "TV" },
+	{ DRM_MODE_CONNECTOR_eDP, "eDP" },
+	{ DRM_MODE_CONNECTOR_VIRTUAL, "Virtual" },
 };
 
 static const struct drm_prop_enum_list drm_encoder_enum_list[] =
@@ -220,6 +213,22 @@
 	{ DRM_MODE_ENCODER_VIRTUAL, "Virtual" },
 };
 
+void drm_connector_ida_init(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(drm_connector_enum_list); i++)
+		ida_init(&drm_connector_enum_list[i].ida);
+}
+
+void drm_connector_ida_destroy(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(drm_connector_enum_list); i++)
+		ida_destroy(&drm_connector_enum_list[i].ida);
+}
+
 const char *drm_get_encoder_name(const struct drm_encoder *encoder)
 {
 	static char buf[32];
@@ -677,20 +686,19 @@
 }
 EXPORT_SYMBOL(drm_mode_probed_add);
 
-/**
+/*
  * drm_mode_remove - remove and free a mode
  * @connector: connector list to modify
  * @mode: mode to remove
  *
  * Remove @mode from @connector's mode list, then free it.
  */
-void drm_mode_remove(struct drm_connector *connector,
-		     struct drm_display_mode *mode)
+static void drm_mode_remove(struct drm_connector *connector,
+			    struct drm_display_mode *mode)
 {
 	list_del(&mode->head);
 	drm_mode_destroy(connector->dev, mode);
 }
-EXPORT_SYMBOL(drm_mode_remove);
 
 /**
  * drm_connector_init - Init a preallocated connector
@@ -711,6 +719,8 @@
 		       int connector_type)
 {
 	int ret;
+	struct ida *connector_ida =
+		&drm_connector_enum_list[connector_type].ida;
 
 	drm_modeset_lock_all(dev);
 
@@ -723,7 +733,12 @@
 	connector->funcs = funcs;
 	connector->connector_type = connector_type;
 	connector->connector_type_id =
-		++drm_connector_enum_list[connector_type].count; /* TODO */
+		ida_simple_get(connector_ida, 1, 0, GFP_KERNEL);
+	if (connector->connector_type_id < 0) {
+		ret = connector->connector_type_id;
+		drm_mode_object_put(dev, &connector->base);
+		goto out;
+	}
 	INIT_LIST_HEAD(&connector->probed_modes);
 	INIT_LIST_HEAD(&connector->modes);
 	connector->edid_blob_ptr = NULL;
@@ -764,6 +779,9 @@
 	list_for_each_entry_safe(mode, t, &connector->modes, head)
 		drm_mode_remove(connector, mode);
 
+	ida_remove(&drm_connector_enum_list[connector->connector_type].ida,
+		   connector->connector_type_id);
+
 	drm_mode_object_put(dev, &connector->base);
 	list_del(&connector->head);
 	dev->mode_config.num_connector--;
@@ -781,6 +799,41 @@
 }
 EXPORT_SYMBOL(drm_connector_unplug_all);
 
+int drm_bridge_init(struct drm_device *dev, struct drm_bridge *bridge,
+		const struct drm_bridge_funcs *funcs)
+{
+	int ret;
+
+	drm_modeset_lock_all(dev);
+
+	ret = drm_mode_object_get(dev, &bridge->base, DRM_MODE_OBJECT_BRIDGE);
+	if (ret)
+		goto out;
+
+	bridge->dev = dev;
+	bridge->funcs = funcs;
+
+	list_add_tail(&bridge->head, &dev->mode_config.bridge_list);
+	dev->mode_config.num_bridge++;
+
+ out:
+	drm_modeset_unlock_all(dev);
+	return ret;
+}
+EXPORT_SYMBOL(drm_bridge_init);
+
+void drm_bridge_cleanup(struct drm_bridge *bridge)
+{
+	struct drm_device *dev = bridge->dev;
+
+	drm_modeset_lock_all(dev);
+	drm_mode_object_put(dev, &bridge->base);
+	list_del(&bridge->head);
+	dev->mode_config.num_bridge--;
+	drm_modeset_unlock_all(dev);
+}
+EXPORT_SYMBOL(drm_bridge_cleanup);
+
 int drm_encoder_init(struct drm_device *dev,
 		      struct drm_encoder *encoder,
 		      const struct drm_encoder_funcs *funcs,
@@ -1135,30 +1188,6 @@
 EXPORT_SYMBOL(drm_mode_create_scaling_mode_property);
 
 /**
- * drm_mode_create_dithering_property - create dithering property
- * @dev: DRM device
- *
- * Called by a driver the first time it's needed, must be attached to desired
- * connectors.
- */
-int drm_mode_create_dithering_property(struct drm_device *dev)
-{
-	struct drm_property *dithering_mode;
-
-	if (dev->mode_config.dithering_mode_property)
-		return 0;
-
-	dithering_mode =
-		drm_property_create_enum(dev, 0, "dithering",
-				drm_dithering_mode_enum_list,
-				    ARRAY_SIZE(drm_dithering_mode_enum_list));
-	dev->mode_config.dithering_mode_property = dithering_mode;
-
-	return 0;
-}
-EXPORT_SYMBOL(drm_mode_create_dithering_property);
-
-/**
  * drm_mode_create_dirty_property - create dirty property
  * @dev: DRM device
  *
@@ -1190,6 +1219,7 @@
 	total_objects += dev->mode_config.num_crtc;
 	total_objects += dev->mode_config.num_connector;
 	total_objects += dev->mode_config.num_encoder;
+	total_objects += dev->mode_config.num_bridge;
 
 	group->id_list = kzalloc(total_objects * sizeof(uint32_t), GFP_KERNEL);
 	if (!group->id_list)
@@ -1198,6 +1228,7 @@
 	group->num_crtcs = 0;
 	group->num_connectors = 0;
 	group->num_encoders = 0;
+	group->num_bridges = 0;
 	return 0;
 }
 
@@ -1207,6 +1238,7 @@
 	struct drm_crtc *crtc;
 	struct drm_encoder *encoder;
 	struct drm_connector *connector;
+	struct drm_bridge *bridge;
 	int ret;
 
 	if ((ret = drm_mode_group_init(dev, group)))
@@ -1223,6 +1255,11 @@
 		group->id_list[group->num_crtcs + group->num_encoders +
 			       group->num_connectors++] = connector->base.id;
 
+	list_for_each_entry(bridge, &dev->mode_config.bridge_list, head)
+		group->id_list[group->num_crtcs + group->num_encoders +
+			       group->num_connectors + group->num_bridges++] =
+					bridge->base.id;
+
 	return 0;
 }
 EXPORT_SYMBOL(drm_mode_group_init_legacy_group);
@@ -2604,10 +2641,22 @@
 	r->depth = fb->depth;
 	r->bpp = fb->bits_per_pixel;
 	r->pitch = fb->pitches[0];
-	if (fb->funcs->create_handle)
-		ret = fb->funcs->create_handle(fb, file_priv, &r->handle);
-	else
+	if (fb->funcs->create_handle) {
+		if (file_priv->is_master || capable(CAP_SYS_ADMIN)) {
+			ret = fb->funcs->create_handle(fb, file_priv,
+						       &r->handle);
+		} else {
+			/* GET_FB() is an unprivileged ioctl so we must not
+			 * return a buffer-handle to non-master processes! For
+			 * backwards-compatibility reasons, we cannot make
+			 * GET_FB() privileged, so just return an invalid handle
+			 * for non-masters. */
+			r->handle = 0;
+			ret = 0;
+		}
+	} else {
 		ret = -ENODEV;
+	}
 
 	drm_framebuffer_unreference(fb);
 
@@ -3514,6 +3563,9 @@
 	    page_flip->reserved != 0)
 		return -EINVAL;
 
+	if ((page_flip->flags & DRM_MODE_PAGE_FLIP_ASYNC) && !dev->mode_config.async_page_flip)
+		return -EINVAL;
+
 	obj = drm_mode_object_find(dev, page_flip->crtc_id, DRM_MODE_OBJECT_CRTC);
 	if (!obj)
 		return -EINVAL;
@@ -3587,7 +3639,7 @@
 	}
 
 	old_fb = crtc->fb;
-	ret = crtc->funcs->page_flip(crtc, fb, e);
+	ret = crtc->funcs->page_flip(crtc, fb, e, page_flip->flags);
 	if (ret) {
 		if (page_flip->flags & DRM_MODE_PAGE_FLIP_EVENT) {
 			spin_lock_irqsave(&dev->event_lock, flags);
@@ -3905,6 +3957,7 @@
 	INIT_LIST_HEAD(&dev->mode_config.fb_list);
 	INIT_LIST_HEAD(&dev->mode_config.crtc_list);
 	INIT_LIST_HEAD(&dev->mode_config.connector_list);
+	INIT_LIST_HEAD(&dev->mode_config.bridge_list);
 	INIT_LIST_HEAD(&dev->mode_config.encoder_list);
 	INIT_LIST_HEAD(&dev->mode_config.property_list);
 	INIT_LIST_HEAD(&dev->mode_config.property_blob_list);
@@ -3941,6 +3994,7 @@
 	struct drm_connector *connector, *ot;
 	struct drm_crtc *crtc, *ct;
 	struct drm_encoder *encoder, *enct;
+	struct drm_bridge *bridge, *brt;
 	struct drm_framebuffer *fb, *fbt;
 	struct drm_property *property, *pt;
 	struct drm_property_blob *blob, *bt;
@@ -3951,6 +4005,11 @@
 		encoder->funcs->destroy(encoder);
 	}
 
+	list_for_each_entry_safe(bridge, brt,
+				 &dev->mode_config.bridge_list, head) {
+		bridge->funcs->destroy(bridge);
+	}
+
 	list_for_each_entry_safe(connector, ot,
 				 &dev->mode_config.connector_list, head) {
 		connector->funcs->destroy(connector);
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index 738a429..c722c3b 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -257,10 +257,16 @@
 {
 	struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
 
+	if (encoder->bridge)
+		encoder->bridge->funcs->disable(encoder->bridge);
+
 	if (encoder_funcs->disable)
 		(*encoder_funcs->disable)(encoder);
 	else
 		(*encoder_funcs->dpms)(encoder, DRM_MODE_DPMS_OFF);
+
+	if (encoder->bridge)
+		encoder->bridge->funcs->post_disable(encoder->bridge);
 }
 
 /**
@@ -424,6 +430,16 @@
 
 		if (encoder->crtc != crtc)
 			continue;
+
+		if (encoder->bridge && encoder->bridge->funcs->mode_fixup) {
+			ret = encoder->bridge->funcs->mode_fixup(
+					encoder->bridge, mode, adjusted_mode);
+			if (!ret) {
+				DRM_DEBUG_KMS("Bridge fixup failed\n");
+				goto done;
+			}
+		}
+
 		encoder_funcs = encoder->helper_private;
 		if (!(ret = encoder_funcs->mode_fixup(encoder, mode,
 						      adjusted_mode))) {
@@ -443,9 +459,16 @@
 
 		if (encoder->crtc != crtc)
 			continue;
+
+		if (encoder->bridge)
+			encoder->bridge->funcs->disable(encoder->bridge);
+
 		encoder_funcs = encoder->helper_private;
 		/* Disable the encoders as the first thing we do. */
 		encoder_funcs->prepare(encoder);
+
+		if (encoder->bridge)
+			encoder->bridge->funcs->post_disable(encoder->bridge);
 	}
 
 	drm_crtc_prepare_encoders(dev);
@@ -469,6 +492,10 @@
 			mode->base.id, mode->name);
 		encoder_funcs = encoder->helper_private;
 		encoder_funcs->mode_set(encoder, mode, adjusted_mode);
+
+		if (encoder->bridge && encoder->bridge->funcs->mode_set)
+			encoder->bridge->funcs->mode_set(encoder->bridge, mode,
+					adjusted_mode);
 	}
 
 	/* Now enable the clocks, plane, pipe, and connectors that we set up. */
@@ -479,9 +506,14 @@
 		if (encoder->crtc != crtc)
 			continue;
 
+		if (encoder->bridge)
+			encoder->bridge->funcs->pre_enable(encoder->bridge);
+
 		encoder_funcs = encoder->helper_private;
 		encoder_funcs->commit(encoder);
 
+		if (encoder->bridge)
+			encoder->bridge->funcs->enable(encoder->bridge);
 	}
 
 	/* Store real post-adjustment hardware mode. */
@@ -677,6 +709,11 @@
 					/* don't break so fail path works correct */
 					fail = 1;
 				break;
+
+				if (connector->dpms != DRM_MODE_DPMS_ON) {
+					DRM_DEBUG_KMS("connector dpms not on, full mode switch\n");
+					mode_changed = true;
+				}
 			}
 		}
 
@@ -754,6 +791,12 @@
 				ret = -EINVAL;
 				goto fail;
 			}
+			DRM_DEBUG_KMS("Setting connector DPMS state to on\n");
+			for (i = 0; i < set->num_connectors; i++) {
+				DRM_DEBUG_KMS("\t[CONNECTOR:%d:%s] set DPMS on\n", set->connectors[i]->base.id,
+					      drm_get_connector_name(set->connectors[i]));
+				set->connectors[i]->funcs->dpms(set->connectors[i], DRM_MODE_DPMS_ON);
+			}
 		}
 		drm_helper_disable_unused_functions(dev);
 	} else if (fb_changed) {
@@ -771,22 +814,6 @@
 		}
 	}
 
-	/*
-	 * crtc set_config helpers implicit set the crtc and all connected
-	 * encoders to DPMS on for a full mode set. But for just an fb update it
-	 * doesn't do that. To not confuse userspace, do an explicit DPMS_ON
-	 * unconditionally. This will also ensure driver internal dpms state is
-	 * consistent again.
-	 */
-	if (set->crtc->enabled) {
-		DRM_DEBUG_KMS("Setting connector DPMS state to on\n");
-		for (i = 0; i < set->num_connectors; i++) {
-			DRM_DEBUG_KMS("\t[CONNECTOR:%d:%s] set DPMS on\n", set->connectors[i]->base.id,
-				      drm_get_connector_name(set->connectors[i]));
-			set->connectors[i]->funcs->dpms(set->connectors[i], DRM_MODE_DPMS_ON);
-		}
-	}
-
 	kfree(save_connectors);
 	kfree(save_encoders);
 	kfree(save_crtcs);
@@ -835,6 +862,31 @@
 	return dpms;
 }
 
+/* Helper which handles bridge ordering around encoder dpms */
+static void drm_helper_encoder_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct drm_bridge *bridge = encoder->bridge;
+	struct drm_encoder_helper_funcs *encoder_funcs;
+
+	if (bridge) {
+		if (mode == DRM_MODE_DPMS_ON)
+			bridge->funcs->pre_enable(bridge);
+		else
+			bridge->funcs->disable(bridge);
+	}
+
+	encoder_funcs = encoder->helper_private;
+	if (encoder_funcs->dpms)
+		encoder_funcs->dpms(encoder, mode);
+
+	if (bridge) {
+		if (mode == DRM_MODE_DPMS_ON)
+			bridge->funcs->enable(bridge);
+		else
+			bridge->funcs->post_disable(bridge);
+	}
+}
+
 static int drm_helper_choose_crtc_dpms(struct drm_crtc *crtc)
 {
 	int dpms = DRM_MODE_DPMS_OFF;
@@ -862,7 +914,7 @@
 {
 	struct drm_encoder *encoder = connector->encoder;
 	struct drm_crtc *crtc = encoder ? encoder->crtc : NULL;
-	int old_dpms;
+	int old_dpms, encoder_dpms = DRM_MODE_DPMS_OFF;
 
 	if (mode == connector->dpms)
 		return;
@@ -870,6 +922,9 @@
 	old_dpms = connector->dpms;
 	connector->dpms = mode;
 
+	if (encoder)
+		encoder_dpms = drm_helper_choose_encoder_dpms(encoder);
+
 	/* from off to on, do crtc then encoder */
 	if (mode < old_dpms) {
 		if (crtc) {
@@ -878,22 +933,14 @@
 				(*crtc_funcs->dpms) (crtc,
 						     drm_helper_choose_crtc_dpms(crtc));
 		}
-		if (encoder) {
-			struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
-			if (encoder_funcs->dpms)
-				(*encoder_funcs->dpms) (encoder,
-							drm_helper_choose_encoder_dpms(encoder));
-		}
+		if (encoder)
+			drm_helper_encoder_dpms(encoder, encoder_dpms);
 	}
 
 	/* from on to off, do encoder then crtc */
 	if (mode > old_dpms) {
-		if (encoder) {
-			struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
-			if (encoder_funcs->dpms)
-				(*encoder_funcs->dpms) (encoder,
-							drm_helper_choose_encoder_dpms(encoder));
-		}
+		if (encoder)
+			drm_helper_encoder_dpms(encoder, encoder_dpms);
 		if (crtc) {
 			struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
 			if (crtc_funcs->dpms)
@@ -929,9 +976,8 @@
 {
 	struct drm_crtc *crtc;
 	struct drm_encoder *encoder;
-	struct drm_encoder_helper_funcs *encoder_funcs;
 	struct drm_crtc_helper_funcs *crtc_funcs;
-	int ret;
+	int ret, encoder_dpms;
 
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
 
@@ -951,10 +997,10 @@
 				if(encoder->crtc != crtc)
 					continue;
 
-				encoder_funcs = encoder->helper_private;
-				if (encoder_funcs->dpms)
-					(*encoder_funcs->dpms) (encoder,
-								drm_helper_choose_encoder_dpms(encoder));
+				encoder_dpms = drm_helper_choose_encoder_dpms(
+							encoder);
+
+				drm_helper_encoder_dpms(encoder, encoder_dpms);
 			}
 
 			crtc_funcs = crtc->helper_private;
diff --git a/drivers/gpu/drm/drm_dma.c b/drivers/gpu/drm/drm_dma.c
index 495b5fd..8a140a9 100644
--- a/drivers/gpu/drm/drm_dma.c
+++ b/drivers/gpu/drm/drm_dma.c
@@ -44,10 +44,18 @@
  *
  * Allocate and initialize a drm_device_dma structure.
  */
-int drm_dma_setup(struct drm_device *dev)
+int drm_legacy_dma_setup(struct drm_device *dev)
 {
 	int i;
 
+	if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA) ||
+	    drm_core_check_feature(dev, DRIVER_MODESET)) {
+		return 0;
+	}
+
+	dev->buf_use = 0;
+	atomic_set(&dev->buf_alloc, 0);
+
 	dev->dma = kzalloc(sizeof(*dev->dma), GFP_KERNEL);
 	if (!dev->dma)
 		return -ENOMEM;
@@ -66,11 +74,16 @@
  * Free all pages associated with DMA buffers, the buffers and pages lists, and
  * finally the drm_device::dma structure itself.
  */
-void drm_dma_takedown(struct drm_device *dev)
+void drm_legacy_dma_takedown(struct drm_device *dev)
 {
 	struct drm_device_dma *dma = dev->dma;
 	int i, j;
 
+	if (!drm_core_check_feature(dev, DRIVER_HAVE_DMA) ||
+	    drm_core_check_feature(dev, DRIVER_MODESET)) {
+		return;
+	}
+
 	if (!dma)
 		return;
 
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 99fcd7c..e572dd2 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -68,7 +68,7 @@
 	DRM_IOCTL_DEF(DRM_IOCTL_GET_MAP, drm_getmap, DRM_UNLOCKED),
 	DRM_IOCTL_DEF(DRM_IOCTL_GET_CLIENT, drm_getclient, DRM_UNLOCKED),
 	DRM_IOCTL_DEF(DRM_IOCTL_GET_STATS, drm_getstats, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_GET_CAP, drm_getcap, DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_GET_CAP, drm_getcap, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_SET_VERSION, drm_setversion, DRM_MASTER),
 
 	DRM_IOCTL_DEF(DRM_IOCTL_SET_UNIQUE, drm_setunique, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
@@ -87,7 +87,7 @@
 
 	DRM_IOCTL_DEF(DRM_IOCTL_ADD_CTX, drm_addctx, DRM_AUTH|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF(DRM_IOCTL_RM_CTX, drm_rmctx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_IOCTL_MOD_CTX, drm_modctx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_MOD_CTX, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF(DRM_IOCTL_GET_CTX, drm_getctx, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_IOCTL_SWITCH_CTX, drm_switchctx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF(DRM_IOCTL_NEW_CTX, drm_newctx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
@@ -106,8 +106,7 @@
 	DRM_IOCTL_DEF(DRM_IOCTL_INFO_BUFS, drm_infobufs, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_IOCTL_MAP_BUFS, drm_mapbufs, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_IOCTL_FREE_BUFS, drm_freebufs, DRM_AUTH),
-	/* The DRM_IOCTL_DMA ioctl should be defined by the driver. */
-	DRM_IOCTL_DEF(DRM_IOCTL_DMA, NULL, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_IOCTL_DMA, drm_dma_ioctl, DRM_AUTH),
 
 	DRM_IOCTL_DEF(DRM_IOCTL_CONTROL, drm_control, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 
@@ -122,7 +121,7 @@
 	DRM_IOCTL_DEF(DRM_IOCTL_AGP_UNBIND, drm_agp_unbind_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 #endif
 
-	DRM_IOCTL_DEF(DRM_IOCTL_SG_ALLOC, drm_sg_alloc_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_SG_ALLOC, drm_sg_alloc, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF(DRM_IOCTL_SG_FREE, drm_sg_free, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 
 	DRM_IOCTL_DEF(DRM_IOCTL_WAIT_VBLANK, drm_wait_vblank, DRM_UNLOCKED),
@@ -131,14 +130,14 @@
 
 	DRM_IOCTL_DEF(DRM_IOCTL_UPDATE_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 
-	DRM_IOCTL_DEF(DRM_IOCTL_GEM_CLOSE, drm_gem_close_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_GEM_CLOSE, drm_gem_close_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF(DRM_IOCTL_GEM_FLINK, drm_gem_flink_ioctl, DRM_AUTH|DRM_UNLOCKED),
 	DRM_IOCTL_DEF(DRM_IOCTL_GEM_OPEN, drm_gem_open_ioctl, DRM_AUTH|DRM_UNLOCKED),
 
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETRESOURCES, drm_mode_getresources, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
 
-	DRM_IOCTL_DEF(DRM_IOCTL_PRIME_HANDLE_TO_FD, drm_prime_handle_to_fd_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_PRIME_FD_TO_HANDLE, drm_prime_fd_to_handle_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_PRIME_HANDLE_TO_FD, drm_prime_handle_to_fd_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_PRIME_FD_TO_HANDLE, drm_prime_fd_to_handle_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANERESOURCES, drm_mode_getplane_res, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
 	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCRTC, drm_mode_getcrtc, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
@@ -172,6 +171,31 @@
 #define DRM_CORE_IOCTL_COUNT	ARRAY_SIZE( drm_ioctls )
 
 /**
+ * drm_legacy_dev_reinit
+ *
+ * Reinitializes a legacy/ums drm device in it's lastclose function.
+ */
+static void drm_legacy_dev_reinit(struct drm_device *dev)
+{
+	int i;
+
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return;
+
+	atomic_set(&dev->ioctl_count, 0);
+	atomic_set(&dev->vma_count, 0);
+
+	for (i = 0; i < ARRAY_SIZE(dev->counts); i++)
+		atomic_set(&dev->counts[i], 0);
+
+	dev->sigdata.lock = NULL;
+
+	dev->context_flag = 0;
+	dev->last_context = 0;
+	dev->if_version = 0;
+}
+
+/**
  * Take down the DRM device.
  *
  * \param dev DRM device structure.
@@ -195,32 +219,9 @@
 
 	mutex_lock(&dev->struct_mutex);
 
-	/* Clear AGP information */
-	if (drm_core_has_AGP(dev) && dev->agp &&
-			!drm_core_check_feature(dev, DRIVER_MODESET)) {
-		struct drm_agp_mem *entry, *tempe;
+	drm_agp_clear(dev);
 
-		/* Remove AGP resources, but leave dev->agp
-		   intact until drv_cleanup is called. */
-		list_for_each_entry_safe(entry, tempe, &dev->agp->memory, head) {
-			if (entry->bound)
-				drm_unbind_agp(entry->memory);
-			drm_free_agp(entry->memory, entry->pages);
-			kfree(entry);
-		}
-		INIT_LIST_HEAD(&dev->agp->memory);
-
-		if (dev->agp->acquired)
-			drm_agp_release(dev);
-
-		dev->agp->acquired = 0;
-		dev->agp->enabled = 0;
-	}
-	if (drm_core_check_feature(dev, DRIVER_SG) && dev->sg &&
-	    !drm_core_check_feature(dev, DRIVER_MODESET)) {
-		drm_sg_cleanup(dev->sg);
-		dev->sg = NULL;
-	}
+	drm_legacy_sg_cleanup(dev);
 
 	/* Clear vma list (only built for debugging) */
 	list_for_each_entry_safe(vma, vma_temp, &dev->vmalist, head) {
@@ -228,13 +229,13 @@
 		kfree(vma);
 	}
 
-	if (drm_core_check_feature(dev, DRIVER_HAVE_DMA) &&
-	    !drm_core_check_feature(dev, DRIVER_MODESET))
-		drm_dma_takedown(dev);
+	drm_legacy_dma_takedown(dev);
 
 	dev->dev_mapping = NULL;
 	mutex_unlock(&dev->struct_mutex);
 
+	drm_legacy_dev_reinit(dev);
+
 	DRM_DEBUG("lastclose completed\n");
 	return 0;
 }
@@ -251,6 +252,7 @@
 	int ret = -ENOMEM;
 
 	drm_global_init();
+	drm_connector_ida_init();
 	idr_init(&drm_minors_idr);
 
 	if (register_chrdev(DRM_MAJOR, "drm", &drm_stub_fops))
@@ -263,13 +265,6 @@
 		goto err_p2;
 	}
 
-	drm_proc_root = proc_mkdir("dri", NULL);
-	if (!drm_proc_root) {
-		DRM_ERROR("Cannot create /proc/dri\n");
-		ret = -1;
-		goto err_p3;
-	}
-
 	drm_debugfs_root = debugfs_create_dir("dri", NULL);
 	if (!drm_debugfs_root) {
 		DRM_ERROR("Cannot create /sys/kernel/debug/dri\n");
@@ -292,12 +287,12 @@
 
 static void __exit drm_core_exit(void)
 {
-	remove_proc_entry("dri", NULL);
 	debugfs_remove(drm_debugfs_root);
 	drm_sysfs_destroy();
 
 	unregister_chrdev(DRM_MAJOR, "drm");
 
+	drm_connector_ida_destroy();
 	idr_destroy(&drm_minors_idr);
 }
 
@@ -420,17 +415,15 @@
 
 	/* Do not trust userspace, use our own definition */
 	func = ioctl->func;
-	/* is there a local override? */
-	if ((nr == DRM_IOCTL_NR(DRM_IOCTL_DMA)) && dev->driver->dma_ioctl)
-		func = dev->driver->dma_ioctl;
 
 	if (!func) {
 		DRM_DEBUG("no function\n");
 		retcode = -EINVAL;
 	} else if (((ioctl->flags & DRM_ROOT_ONLY) && !capable(CAP_SYS_ADMIN)) ||
-		   ((ioctl->flags & DRM_AUTH) && !file_priv->authenticated) ||
+		   ((ioctl->flags & DRM_AUTH) && !drm_is_render_client(file_priv) && !file_priv->authenticated) ||
 		   ((ioctl->flags & DRM_MASTER) && !file_priv->is_master) ||
-		   (!(ioctl->flags & DRM_CONTROL_ALLOW) && (file_priv->minor->type == DRM_MINOR_CONTROL))) {
+		   (!(ioctl->flags & DRM_CONTROL_ALLOW) && (file_priv->minor->type == DRM_MINOR_CONTROL)) ||
+		   (!(ioctl->flags & DRM_RENDER_ALLOW) && drm_is_render_client(file_priv))) {
 		retcode = -EACCES;
 	} else {
 		if (cmd & (IOC_IN | IOC_OUT)) {
@@ -485,19 +478,4 @@
 		DRM_DEBUG("ret = %d\n", retcode);
 	return retcode;
 }
-
 EXPORT_SYMBOL(drm_ioctl);
-
-struct drm_local_map *drm_getsarea(struct drm_device *dev)
-{
-	struct drm_map_list *entry;
-
-	list_for_each_entry(entry, &dev->maplist, head) {
-		if (entry->map && entry->map->type == _DRM_SHM &&
-		    (entry->map->flags & _DRM_CONTAINS_LOCK)) {
-			return entry->map;
-		}
-	}
-	return NULL;
-}
-EXPORT_SYMBOL(drm_getsarea);
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 95d6f4b..1688ff5 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -125,6 +125,9 @@
 
 	/* ViewSonic VA2026w */
 	{ "VSC", 5020, EDID_QUIRK_FORCE_REDUCED_BLANKING },
+
+	/* Medion MD 30217 PG */
+	{ "MED", 0x7b8, EDID_QUIRK_PREFER_LARGE_75 },
 };
 
 /*
@@ -931,6 +934,36 @@
 	 .vrefresh = 100, },
 };
 
+/*
+ * HDMI 1.4 4k modes.
+ */
+static const struct drm_display_mode edid_4k_modes[] = {
+	/* 1 - 3840x2160@30Hz */
+	{ DRM_MODE("3840x2160", DRM_MODE_TYPE_DRIVER, 297000,
+		   3840, 4016, 4104, 4400, 0,
+		   2160, 2168, 2178, 2250, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 30, },
+	/* 2 - 3840x2160@25Hz */
+	{ DRM_MODE("3840x2160", DRM_MODE_TYPE_DRIVER, 297000,
+		   3840, 4896, 4984, 5280, 0,
+		   2160, 2168, 2178, 2250, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 25, },
+	/* 3 - 3840x2160@24Hz */
+	{ DRM_MODE("3840x2160", DRM_MODE_TYPE_DRIVER, 297000,
+		   3840, 5116, 5204, 5500, 0,
+		   2160, 2168, 2178, 2250, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 24, },
+	/* 4 - 4096x2160@24Hz (SMPTE) */
+	{ DRM_MODE("4096x2160", DRM_MODE_TYPE_DRIVER, 297000,
+		   4096, 5116, 5204, 5500, 0,
+		   2160, 2168, 2178, 2250, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC),
+	  .vrefresh = 24, },
+};
+
 /*** DDC fetch and block validation ***/
 
 static const u8 edid_header[] = {
@@ -2287,7 +2320,6 @@
 	return closure.modes;
 }
 
-#define HDMI_IDENTIFIER 0x000C03
 #define AUDIO_BLOCK	0x01
 #define VIDEO_BLOCK     0x02
 #define VENDOR_BLOCK    0x03
@@ -2298,10 +2330,10 @@
 #define EDID_CEA_YCRCB422	(1 << 4)
 #define EDID_CEA_VCDB_QS	(1 << 6)
 
-/**
+/*
  * Search EDID for CEA extension block.
  */
-u8 *drm_find_cea_extension(struct edid *edid)
+static u8 *drm_find_cea_extension(struct edid *edid)
 {
 	u8 *edid_ext = NULL;
 	int i;
@@ -2322,7 +2354,6 @@
 
 	return edid_ext;
 }
-EXPORT_SYMBOL(drm_find_cea_extension);
 
 /*
  * Calculate the alternate clock for the CEA mode
@@ -2380,6 +2411,54 @@
 }
 EXPORT_SYMBOL(drm_match_cea_mode);
 
+/*
+ * Calculate the alternate clock for HDMI modes (those from the HDMI vendor
+ * specific block).
+ *
+ * It's almost like cea_mode_alternate_clock(), we just need to add an
+ * exception for the VIC 4 mode (4096x2160@24Hz): no alternate clock for this
+ * one.
+ */
+static unsigned int
+hdmi_mode_alternate_clock(const struct drm_display_mode *hdmi_mode)
+{
+	if (hdmi_mode->vdisplay == 4096 && hdmi_mode->hdisplay == 2160)
+		return hdmi_mode->clock;
+
+	return cea_mode_alternate_clock(hdmi_mode);
+}
+
+/*
+ * drm_match_hdmi_mode - look for a HDMI mode matching given mode
+ * @to_match: display mode
+ *
+ * An HDMI mode is one defined in the HDMI vendor specific block.
+ *
+ * Returns the HDMI Video ID (VIC) of the mode or 0 if it isn't one.
+ */
+static u8 drm_match_hdmi_mode(const struct drm_display_mode *to_match)
+{
+	u8 mode;
+
+	if (!to_match->clock)
+		return 0;
+
+	for (mode = 0; mode < ARRAY_SIZE(edid_4k_modes); mode++) {
+		const struct drm_display_mode *hdmi_mode = &edid_4k_modes[mode];
+		unsigned int clock1, clock2;
+
+		/* Make sure to also match alternate clocks */
+		clock1 = hdmi_mode->clock;
+		clock2 = hdmi_mode_alternate_clock(hdmi_mode);
+
+		if ((KHZ2PICOS(to_match->clock) == KHZ2PICOS(clock1) ||
+		     KHZ2PICOS(to_match->clock) == KHZ2PICOS(clock2)) &&
+		    drm_mode_equal_no_clocks(to_match, hdmi_mode))
+			return mode + 1;
+	}
+	return 0;
+}
+
 static int
 add_alternate_cea_modes(struct drm_connector *connector, struct edid *edid)
 {
@@ -2397,18 +2476,26 @@
 	 * with the alternate clock for certain CEA modes.
 	 */
 	list_for_each_entry(mode, &connector->probed_modes, head) {
-		const struct drm_display_mode *cea_mode;
+		const struct drm_display_mode *cea_mode = NULL;
 		struct drm_display_mode *newmode;
-		u8 cea_mode_idx = drm_match_cea_mode(mode) - 1;
+		u8 mode_idx = drm_match_cea_mode(mode) - 1;
 		unsigned int clock1, clock2;
 
-		if (cea_mode_idx >= ARRAY_SIZE(edid_cea_modes))
+		if (mode_idx < ARRAY_SIZE(edid_cea_modes)) {
+			cea_mode = &edid_cea_modes[mode_idx];
+			clock2 = cea_mode_alternate_clock(cea_mode);
+		} else {
+			mode_idx = drm_match_hdmi_mode(mode) - 1;
+			if (mode_idx < ARRAY_SIZE(edid_4k_modes)) {
+				cea_mode = &edid_4k_modes[mode_idx];
+				clock2 = hdmi_mode_alternate_clock(cea_mode);
+			}
+		}
+
+		if (!cea_mode)
 			continue;
 
-		cea_mode = &edid_cea_modes[cea_mode_idx];
-
 		clock1 = cea_mode->clock;
-		clock2 = cea_mode_alternate_clock(cea_mode);
 
 		if (clock1 == clock2)
 			continue;
@@ -2442,10 +2529,11 @@
 }
 
 static int
-do_cea_modes (struct drm_connector *connector, u8 *db, u8 len)
+do_cea_modes(struct drm_connector *connector, const u8 *db, u8 len)
 {
 	struct drm_device *dev = connector->dev;
-	u8 * mode, cea_mode;
+	const u8 *mode;
+	u8 cea_mode;
 	int modes = 0;
 
 	for (mode = db; mode < db + len; mode++) {
@@ -2465,6 +2553,68 @@
 	return modes;
 }
 
+/*
+ * do_hdmi_vsdb_modes - Parse the HDMI Vendor Specific data block
+ * @connector: connector corresponding to the HDMI sink
+ * @db: start of the CEA vendor specific block
+ * @len: length of the CEA block payload, ie. one can access up to db[len]
+ *
+ * Parses the HDMI VSDB looking for modes to add to @connector.
+ */
+static int
+do_hdmi_vsdb_modes(struct drm_connector *connector, const u8 *db, u8 len)
+{
+	struct drm_device *dev = connector->dev;
+	int modes = 0, offset = 0, i;
+	u8 vic_len;
+
+	if (len < 8)
+		goto out;
+
+	/* no HDMI_Video_Present */
+	if (!(db[8] & (1 << 5)))
+		goto out;
+
+	/* Latency_Fields_Present */
+	if (db[8] & (1 << 7))
+		offset += 2;
+
+	/* I_Latency_Fields_Present */
+	if (db[8] & (1 << 6))
+		offset += 2;
+
+	/* the declared length is not long enough for the 2 first bytes
+	 * of additional video format capabilities */
+	offset += 2;
+	if (len < (8 + offset))
+		goto out;
+
+	vic_len = db[8 + offset] >> 5;
+
+	for (i = 0; i < vic_len && len >= (9 + offset + i); i++) {
+		struct drm_display_mode *newmode;
+		u8 vic;
+
+		vic = db[9 + offset + i];
+
+		vic--; /* VICs start at 1 */
+		if (vic >= ARRAY_SIZE(edid_4k_modes)) {
+			DRM_ERROR("Unknown HDMI VIC: %d\n", vic);
+			continue;
+		}
+
+		newmode = drm_mode_duplicate(dev, &edid_4k_modes[vic]);
+		if (!newmode)
+			continue;
+
+		drm_mode_probed_add(connector, newmode);
+		modes++;
+	}
+
+out:
+	return modes;
+}
+
 static int
 cea_db_payload_len(const u8 *db)
 {
@@ -2496,14 +2646,30 @@
 	return 0;
 }
 
+static bool cea_db_is_hdmi_vsdb(const u8 *db)
+{
+	int hdmi_id;
+
+	if (cea_db_tag(db) != VENDOR_BLOCK)
+		return false;
+
+	if (cea_db_payload_len(db) < 5)
+		return false;
+
+	hdmi_id = db[1] | (db[2] << 8) | (db[3] << 16);
+
+	return hdmi_id == HDMI_IEEE_OUI;
+}
+
 #define for_each_cea_db(cea, i, start, end) \
 	for ((i) = (start); (i) < (end) && (i) + cea_db_payload_len(&(cea)[(i)]) < (end); (i) += cea_db_payload_len(&(cea)[(i)]) + 1)
 
 static int
 add_cea_modes(struct drm_connector *connector, struct edid *edid)
 {
-	u8 * cea = drm_find_cea_extension(edid);
-	u8 * db, dbl;
+	const u8 *cea = drm_find_cea_extension(edid);
+	const u8 *db;
+	u8 dbl;
 	int modes = 0;
 
 	if (cea && cea_revision(cea) >= 3) {
@@ -2517,7 +2683,9 @@
 			dbl = cea_db_payload_len(db);
 
 			if (cea_db_tag(db) == VIDEO_BLOCK)
-				modes += do_cea_modes (connector, db+1, dbl);
+				modes += do_cea_modes(connector, db + 1, dbl);
+			else if (cea_db_is_hdmi_vsdb(db))
+				modes += do_hdmi_vsdb_modes(connector, db, dbl);
 		}
 	}
 
@@ -2570,21 +2738,6 @@
 		*(u8 **)data = t->data.other_data.data.str.str;
 }
 
-static bool cea_db_is_hdmi_vsdb(const u8 *db)
-{
-	int hdmi_id;
-
-	if (cea_db_tag(db) != VENDOR_BLOCK)
-		return false;
-
-	if (cea_db_payload_len(db) < 5)
-		return false;
-
-	hdmi_id = db[1] | (db[2] << 8) | (db[3] << 16);
-
-	return hdmi_id == HDMI_IDENTIFIER;
-}
-
 /**
  * drm_edid_to_eld - build ELD from EDID
  * @connector: connector corresponding to the HDMI/DP sink
@@ -2732,6 +2885,58 @@
 EXPORT_SYMBOL(drm_edid_to_sad);
 
 /**
+ * drm_edid_to_speaker_allocation - extracts Speaker Allocation Data Blocks from EDID
+ * @edid: EDID to parse
+ * @sadb: pointer to the speaker block
+ *
+ * Looks for CEA EDID block and extracts the Speaker Allocation Data Block from it.
+ * Note: returned pointer needs to be kfreed
+ *
+ * Return number of found Speaker Allocation Blocks or negative number on error.
+ */
+int drm_edid_to_speaker_allocation(struct edid *edid, u8 **sadb)
+{
+	int count = 0;
+	int i, start, end, dbl;
+	const u8 *cea;
+
+	cea = drm_find_cea_extension(edid);
+	if (!cea) {
+		DRM_DEBUG_KMS("SAD: no CEA Extension found\n");
+		return -ENOENT;
+	}
+
+	if (cea_revision(cea) < 3) {
+		DRM_DEBUG_KMS("SAD: wrong CEA revision\n");
+		return -ENOTSUPP;
+	}
+
+	if (cea_db_offsets(cea, &start, &end)) {
+		DRM_DEBUG_KMS("SAD: invalid data block offsets\n");
+		return -EPROTO;
+	}
+
+	for_each_cea_db(cea, i, start, end) {
+		const u8 *db = &cea[i];
+
+		if (cea_db_tag(db) == SPEAKER_BLOCK) {
+			dbl = cea_db_payload_len(db);
+
+			/* Speaker Allocation Data Block */
+			if (dbl == 3) {
+				*sadb = kmalloc(dbl, GFP_KERNEL);
+				memcpy(*sadb, &db[1], dbl);
+				count = dbl;
+				break;
+			}
+		}
+	}
+
+	return count;
+}
+EXPORT_SYMBOL(drm_edid_to_speaker_allocation);
+
+/**
  * drm_av_sync_delay - HDMI/DP sink audio-video sync delay in millisecond
  * @connector: connector associated with the HDMI/DP sink
  * @mode: the display mode
@@ -3102,9 +3307,10 @@
 	if (err < 0)
 		return err;
 
+	if (mode->flags & DRM_MODE_FLAG_DBLCLK)
+		frame->pixel_repeat = 1;
+
 	frame->video_code = drm_match_cea_mode(mode);
-	if (!frame->video_code)
-		return 0;
 
 	frame->picture_aspect = HDMI_PICTURE_ASPECT_NONE;
 	frame->active_aspect = HDMI_ACTIVE_ASPECT_PICTURE;
@@ -3112,3 +3318,39 @@
 	return 0;
 }
 EXPORT_SYMBOL(drm_hdmi_avi_infoframe_from_display_mode);
+
+/**
+ * drm_hdmi_vendor_infoframe_from_display_mode() - fill an HDMI infoframe with
+ * data from a DRM display mode
+ * @frame: HDMI vendor infoframe
+ * @mode: DRM display mode
+ *
+ * Note that there's is a need to send HDMI vendor infoframes only when using a
+ * 4k or stereoscopic 3D mode. So when giving any other mode as input this
+ * function will return -EINVAL, error that can be safely ignored.
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int
+drm_hdmi_vendor_infoframe_from_display_mode(struct hdmi_vendor_infoframe *frame,
+					    const struct drm_display_mode *mode)
+{
+	int err;
+	u8 vic;
+
+	if (!frame || !mode)
+		return -EINVAL;
+
+	vic = drm_match_hdmi_mode(mode);
+	if (!vic)
+		return -EINVAL;
+
+	err = hdmi_vendor_infoframe_init(frame);
+	if (err < 0)
+		return err;
+
+	frame->vic = vic;
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_hdmi_vendor_infoframe_from_display_mode);
diff --git a/drivers/gpu/drm/drm_fb_cma_helper.c b/drivers/gpu/drm/drm_fb_cma_helper.c
index c385cc5..61b5a47 100644
--- a/drivers/gpu/drm/drm_fb_cma_helper.c
+++ b/drivers/gpu/drm/drm_fb_cma_helper.c
@@ -181,11 +181,11 @@
 EXPORT_SYMBOL_GPL(drm_fb_cma_get_gem_obj);
 
 #ifdef CONFIG_DEBUG_FS
-/**
+/*
  * drm_fb_cma_describe() - Helper to dump information about a single
  * CMA framebuffer object
  */
-void drm_fb_cma_describe(struct drm_framebuffer *fb, struct seq_file *m)
+static void drm_fb_cma_describe(struct drm_framebuffer *fb, struct seq_file *m)
 {
 	struct drm_fb_cma *fb_cma = to_fb_cma(fb);
 	int i, n = drm_format_num_planes(fb->pixel_format);
@@ -199,7 +199,6 @@
 		drm_gem_cma_describe(fb_cma->obj[i], m);
 	}
 }
-EXPORT_SYMBOL_GPL(drm_fb_cma_describe);
 
 /**
  * drm_fb_cma_debugfs_show() - Helper to list CMA framebuffer objects
diff --git a/drivers/gpu/drm/drm_flip_work.c b/drivers/gpu/drm/drm_flip_work.c
new file mode 100644
index 0000000..e788882
--- /dev/null
+++ b/drivers/gpu/drm/drm_flip_work.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "drm_flip_work.h"
+
+/**
+ * drm_flip_work_queue - queue work
+ * @work: the flip-work
+ * @val: the value to queue
+ *
+ * Queues work, that will later be run (passed back to drm_flip_func_t
+ * func) on a work queue after drm_flip_work_commit() is called.
+ */
+void drm_flip_work_queue(struct drm_flip_work *work, void *val)
+{
+	if (kfifo_put(&work->fifo, (const void **)&val)) {
+		atomic_inc(&work->pending);
+	} else {
+		DRM_ERROR("%s fifo full!\n", work->name);
+		work->func(work, val);
+	}
+}
+EXPORT_SYMBOL(drm_flip_work_queue);
+
+/**
+ * drm_flip_work_commit - commit queued work
+ * @work: the flip-work
+ * @wq: the work-queue to run the queued work on
+ *
+ * Trigger work previously queued by drm_flip_work_queue() to run
+ * on a workqueue.  The typical usage would be to queue work (via
+ * drm_flip_work_queue()) at any point (from vblank irq and/or
+ * prior), and then from vblank irq commit the queued work.
+ */
+void drm_flip_work_commit(struct drm_flip_work *work,
+		struct workqueue_struct *wq)
+{
+	uint32_t pending = atomic_read(&work->pending);
+	atomic_add(pending, &work->count);
+	atomic_sub(pending, &work->pending);
+	queue_work(wq, &work->worker);
+}
+EXPORT_SYMBOL(drm_flip_work_commit);
+
+static void flip_worker(struct work_struct *w)
+{
+	struct drm_flip_work *work = container_of(w, struct drm_flip_work, worker);
+	uint32_t count = atomic_read(&work->count);
+	void *val = NULL;
+
+	atomic_sub(count, &work->count);
+
+	while(count--)
+		if (!WARN_ON(!kfifo_get(&work->fifo, &val)))
+			work->func(work, val);
+}
+
+/**
+ * drm_flip_work_init - initialize flip-work
+ * @work: the flip-work to initialize
+ * @size: the max queue depth
+ * @name: debug name
+ * @func: the callback work function
+ *
+ * Initializes/allocates resources for the flip-work
+ *
+ * RETURNS:
+ * Zero on success, error code on failure.
+ */
+int drm_flip_work_init(struct drm_flip_work *work, int size,
+		const char *name, drm_flip_func_t func)
+{
+	int ret;
+
+	work->name = name;
+	atomic_set(&work->count, 0);
+	atomic_set(&work->pending, 0);
+	work->func = func;
+
+	ret = kfifo_alloc(&work->fifo, size, GFP_KERNEL);
+	if (ret) {
+		DRM_ERROR("could not allocate %s fifo\n", name);
+		return ret;
+	}
+
+	INIT_WORK(&work->worker, flip_worker);
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_flip_work_init);
+
+/**
+ * drm_flip_work_cleanup - cleans up flip-work
+ * @work: the flip-work to cleanup
+ *
+ * Destroy resources allocated for the flip-work
+ */
+void drm_flip_work_cleanup(struct drm_flip_work *work)
+{
+	WARN_ON(!kfifo_is_empty(&work->fifo));
+	kfifo_free(&work->fifo);
+}
+EXPORT_SYMBOL(drm_flip_work_cleanup);
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c
index 3a24385..4be8e09 100644
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c
@@ -48,59 +48,21 @@
 
 static int drm_setup(struct drm_device * dev)
 {
-	int i;
 	int ret;
 
-	if (dev->driver->firstopen) {
+	if (dev->driver->firstopen &&
+	    !drm_core_check_feature(dev, DRIVER_MODESET)) {
 		ret = dev->driver->firstopen(dev);
 		if (ret != 0)
 			return ret;
 	}
 
-	atomic_set(&dev->ioctl_count, 0);
-	atomic_set(&dev->vma_count, 0);
+	ret = drm_legacy_dma_setup(dev);
+	if (ret < 0)
+		return ret;
 
-	if (drm_core_check_feature(dev, DRIVER_HAVE_DMA) &&
-	    !drm_core_check_feature(dev, DRIVER_MODESET)) {
-		dev->buf_use = 0;
-		atomic_set(&dev->buf_alloc, 0);
-
-		i = drm_dma_setup(dev);
-		if (i < 0)
-			return i;
-	}
-
-	for (i = 0; i < ARRAY_SIZE(dev->counts); i++)
-		atomic_set(&dev->counts[i], 0);
-
-	dev->sigdata.lock = NULL;
-
-	dev->context_flag = 0;
-	dev->interrupt_flag = 0;
-	dev->dma_flag = 0;
-	dev->last_context = 0;
-	dev->last_switch = 0;
-	dev->last_checked = 0;
-	init_waitqueue_head(&dev->context_wait);
-	dev->if_version = 0;
-
-	dev->ctx_start = 0;
-	dev->lck_start = 0;
-
-	dev->buf_async = NULL;
-	init_waitqueue_head(&dev->buf_readers);
-	init_waitqueue_head(&dev->buf_writers);
 
 	DRM_DEBUG("\n");
-
-	/*
-	 * The kernel's context could be created here, but is now created
-	 * in drm_dma_enqueue.  This is more resource-efficient for
-	 * hardware that does not do DMA, but may mean that
-	 * drm_select_queue fails between the time the interrupt is
-	 * initialized and the time the queues are initialized.
-	 */
-
 	return 0;
 }
 
@@ -257,7 +219,7 @@
 		return -EBUSY;	/* No exclusive opens */
 	if (!drm_cpu_valid())
 		return -EINVAL;
-	if (dev->switch_power_state != DRM_SWITCH_POWER_ON)
+	if (dev->switch_power_state != DRM_SWITCH_POWER_ON && dev->switch_power_state != DRM_SWITCH_POWER_DYNAMIC_OFF)
 		return -EINVAL;
 
 	DRM_DEBUG("pid = %d, minor = %d\n", task_pid_nr(current), minor_id);
@@ -300,10 +262,10 @@
 			goto out_prime_destroy;
 	}
 
-
-	/* if there is no current master make this fd it */
+	/* if there is no current master make this fd it, but do not create
+	 * any master object for render clients */
 	mutex_lock(&dev->struct_mutex);
-	if (!priv->minor->master) {
+	if (!priv->minor->master && !drm_is_render_client(priv)) {
 		/* create a new master */
 		priv->minor->master = drm_master_create(priv->minor);
 		if (!priv->minor->master) {
@@ -341,12 +303,11 @@
 				goto out_close;
 			}
 		}
-		mutex_unlock(&dev->struct_mutex);
-	} else {
+	} else if (!drm_is_render_client(priv)) {
 		/* get a reference to the master */
 		priv->master = drm_master_get(priv->minor->master);
-		mutex_unlock(&dev->struct_mutex);
 	}
+	mutex_unlock(&dev->struct_mutex);
 
 	mutex_lock(&dev->struct_mutex);
 	list_add(&priv->lhead, &dev->filelist);
@@ -388,18 +349,6 @@
 	return ret;
 }
 
-/** No-op. */
-int drm_fasync(int fd, struct file *filp, int on)
-{
-	struct drm_file *priv = filp->private_data;
-	struct drm_device *dev = priv->minor->dev;
-
-	DRM_DEBUG("fd = %d, device = 0x%lx\n", fd,
-		  (long)old_encode_dev(priv->minor->device));
-	return fasync_helper(fd, filp, on, &dev->buf_async);
-}
-EXPORT_SYMBOL(drm_fasync);
-
 static void drm_master_release(struct drm_device *dev, struct file *filp)
 {
 	struct drm_file *file_priv = filp->private_data;
@@ -490,26 +439,7 @@
 	if (dev->driver->driver_features & DRIVER_GEM)
 		drm_gem_release(dev, file_priv);
 
-	mutex_lock(&dev->ctxlist_mutex);
-	if (!list_empty(&dev->ctxlist)) {
-		struct drm_ctx_list *pos, *n;
-
-		list_for_each_entry_safe(pos, n, &dev->ctxlist, head) {
-			if (pos->tag == file_priv &&
-			    pos->handle != DRM_KERNEL_CONTEXT) {
-				if (dev->driver->context_dtor)
-					dev->driver->context_dtor(dev,
-								  pos->handle);
-
-				drm_ctxbitmap_free(dev, pos->handle);
-
-				list_del(&pos->head);
-				kfree(pos);
-				--dev->ctx_count;
-			}
-		}
-	}
-	mutex_unlock(&dev->ctxlist_mutex);
+	drm_legacy_ctxbitmap_release(dev, file_priv);
 
 	mutex_lock(&dev->struct_mutex);
 
@@ -547,7 +477,8 @@
 	iput(container_of(dev->dev_mapping, struct inode, i_data));
 
 	/* drop the reference held my the file priv */
-	drm_master_put(&file_priv->master);
+	if (file_priv->master)
+		drm_master_put(&file_priv->master);
 	file_priv->is_master = 0;
 	list_del(&file_priv->lhead);
 	mutex_unlock(&dev->struct_mutex);
@@ -555,6 +486,7 @@
 	if (dev->driver->postclose)
 		dev->driver->postclose(dev, file_priv);
 
+
 	if (drm_core_check_feature(dev, DRIVER_PRIME))
 		drm_prime_destroy_file_private(&file_priv->prime);
 
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 603f256..49293bdc 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -37,6 +37,7 @@
 #include <linux/shmem_fs.h>
 #include <linux/dma-buf.h>
 #include <drm/drmP.h>
+#include <drm/drm_vma_manager.h>
 
 /** @file drm_gem.c
  *
@@ -92,7 +93,7 @@
 {
 	struct drm_gem_mm *mm;
 
-	spin_lock_init(&dev->object_name_lock);
+	mutex_init(&dev->object_name_lock);
 	idr_init(&dev->object_name_idr);
 
 	mm = kzalloc(sizeof(struct drm_gem_mm), GFP_KERNEL);
@@ -102,14 +103,9 @@
 	}
 
 	dev->mm_private = mm;
-
-	if (drm_ht_create(&mm->offset_hash, 12)) {
-		kfree(mm);
-		return -ENOMEM;
-	}
-
-	drm_mm_init(&mm->offset_manager, DRM_FILE_PAGE_OFFSET_START,
-		    DRM_FILE_PAGE_OFFSET_SIZE);
+	drm_vma_offset_manager_init(&mm->vma_manager,
+				    DRM_FILE_PAGE_OFFSET_START,
+				    DRM_FILE_PAGE_OFFSET_SIZE);
 
 	return 0;
 }
@@ -119,8 +115,7 @@
 {
 	struct drm_gem_mm *mm = dev->mm_private;
 
-	drm_mm_takedown(&mm->offset_manager);
-	drm_ht_remove(&mm->offset_hash);
+	drm_vma_offset_manager_destroy(&mm->vma_manager);
 	kfree(mm);
 	dev->mm_private = NULL;
 }
@@ -132,16 +127,14 @@
 int drm_gem_object_init(struct drm_device *dev,
 			struct drm_gem_object *obj, size_t size)
 {
-	BUG_ON((size & (PAGE_SIZE - 1)) != 0);
+	struct file *filp;
 
-	obj->dev = dev;
-	obj->filp = shmem_file_setup("drm mm object", size, VM_NORESERVE);
-	if (IS_ERR(obj->filp))
-		return PTR_ERR(obj->filp);
+	filp = shmem_file_setup("drm mm object", size, VM_NORESERVE);
+	if (IS_ERR(filp))
+		return PTR_ERR(filp);
 
-	kref_init(&obj->refcount);
-	atomic_set(&obj->handle_count, 0);
-	obj->size = size;
+	drm_gem_private_object_init(dev, obj, size);
+	obj->filp = filp;
 
 	return 0;
 }
@@ -152,8 +145,8 @@
  * no GEM provided backing store. Instead the caller is responsible for
  * backing the object and handling it.
  */
-int drm_gem_private_object_init(struct drm_device *dev,
-			struct drm_gem_object *obj, size_t size)
+void drm_gem_private_object_init(struct drm_device *dev,
+				 struct drm_gem_object *obj, size_t size)
 {
 	BUG_ON((size & (PAGE_SIZE - 1)) != 0);
 
@@ -161,10 +154,9 @@
 	obj->filp = NULL;
 
 	kref_init(&obj->refcount);
-	atomic_set(&obj->handle_count, 0);
+	obj->handle_count = 0;
 	obj->size = size;
-
-	return 0;
+	drm_vma_node_reset(&obj->vma_node);
 }
 EXPORT_SYMBOL(drm_gem_private_object_init);
 
@@ -200,16 +192,79 @@
 static void
 drm_gem_remove_prime_handles(struct drm_gem_object *obj, struct drm_file *filp)
 {
-	if (obj->import_attach) {
-		drm_prime_remove_buf_handle(&filp->prime,
-				obj->import_attach->dmabuf);
+	/*
+	 * Note: obj->dma_buf can't disappear as long as we still hold a
+	 * handle reference in obj->handle_count.
+	 */
+	mutex_lock(&filp->prime.lock);
+	if (obj->dma_buf) {
+		drm_prime_remove_buf_handle_locked(&filp->prime,
+						   obj->dma_buf);
 	}
-	if (obj->export_dma_buf) {
-		drm_prime_remove_buf_handle(&filp->prime,
-				obj->export_dma_buf);
+	mutex_unlock(&filp->prime.lock);
+}
+
+static void drm_gem_object_ref_bug(struct kref *list_kref)
+{
+	BUG();
+}
+
+/**
+ * Called after the last handle to the object has been closed
+ *
+ * Removes any name for the object. Note that this must be
+ * called before drm_gem_object_free or we'll be touching
+ * freed memory
+ */
+static void drm_gem_object_handle_free(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+
+	/* Remove any name for this object */
+	if (obj->name) {
+		idr_remove(&dev->object_name_idr, obj->name);
+		obj->name = 0;
+		/*
+		 * The object name held a reference to this object, drop
+		 * that now.
+		*
+		* This cannot be the last reference, since the handle holds one too.
+		 */
+		kref_put(&obj->refcount, drm_gem_object_ref_bug);
 	}
 }
 
+static void drm_gem_object_exported_dma_buf_free(struct drm_gem_object *obj)
+{
+	/* Unbreak the reference cycle if we have an exported dma_buf. */
+	if (obj->dma_buf) {
+		dma_buf_put(obj->dma_buf);
+		obj->dma_buf = NULL;
+	}
+}
+
+static void
+drm_gem_object_handle_unreference_unlocked(struct drm_gem_object *obj)
+{
+	if (WARN_ON(obj->handle_count == 0))
+		return;
+
+	/*
+	* Must bump handle count first as this may be the last
+	* ref, in which case the object would disappear before we
+	* checked for a name
+	*/
+
+	mutex_lock(&obj->dev->object_name_lock);
+	if (--obj->handle_count == 0) {
+		drm_gem_object_handle_free(obj);
+		drm_gem_object_exported_dma_buf_free(obj);
+	}
+	mutex_unlock(&obj->dev->object_name_lock);
+
+	drm_gem_object_unreference_unlocked(obj);
+}
+
 /**
  * Removes the mapping from handle to filp for this object.
  */
@@ -242,7 +297,9 @@
 	idr_remove(&filp->object_idr, handle);
 	spin_unlock(&filp->table_lock);
 
-	drm_gem_remove_prime_handles(obj, filp);
+	if (drm_core_check_feature(dev, DRIVER_PRIME))
+		drm_gem_remove_prime_handles(obj, filp);
+	drm_vma_node_revoke(&obj->vma_node, filp->filp);
 
 	if (dev->driver->gem_close_object)
 		dev->driver->gem_close_object(obj, filp);
@@ -253,6 +310,73 @@
 EXPORT_SYMBOL(drm_gem_handle_delete);
 
 /**
+ * drm_gem_dumb_destroy - dumb fb callback helper for gem based drivers
+ * 
+ * This implements the ->dumb_destroy kms driver callback for drivers which use
+ * gem to manage their backing storage.
+ */
+int drm_gem_dumb_destroy(struct drm_file *file,
+			 struct drm_device *dev,
+			 uint32_t handle)
+{
+	return drm_gem_handle_delete(file, handle);
+}
+EXPORT_SYMBOL(drm_gem_dumb_destroy);
+
+/**
+ * drm_gem_handle_create_tail - internal functions to create a handle
+ * 
+ * This expects the dev->object_name_lock to be held already and will drop it
+ * before returning. Used to avoid races in establishing new handles when
+ * importing an object from either an flink name or a dma-buf.
+ */
+int
+drm_gem_handle_create_tail(struct drm_file *file_priv,
+			   struct drm_gem_object *obj,
+			   u32 *handlep)
+{
+	struct drm_device *dev = obj->dev;
+	int ret;
+
+	WARN_ON(!mutex_is_locked(&dev->object_name_lock));
+
+	/*
+	 * Get the user-visible handle using idr.  Preload and perform
+	 * allocation under our spinlock.
+	 */
+	idr_preload(GFP_KERNEL);
+	spin_lock(&file_priv->table_lock);
+
+	ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_NOWAIT);
+	drm_gem_object_reference(obj);
+	obj->handle_count++;
+	spin_unlock(&file_priv->table_lock);
+	idr_preload_end();
+	mutex_unlock(&dev->object_name_lock);
+	if (ret < 0) {
+		drm_gem_object_handle_unreference_unlocked(obj);
+		return ret;
+	}
+	*handlep = ret;
+
+	ret = drm_vma_node_allow(&obj->vma_node, file_priv->filp);
+	if (ret) {
+		drm_gem_handle_delete(file_priv, *handlep);
+		return ret;
+	}
+
+	if (dev->driver->gem_open_object) {
+		ret = dev->driver->gem_open_object(obj, file_priv);
+		if (ret) {
+			drm_gem_handle_delete(file_priv, *handlep);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
  * Create a handle for this object. This adds a handle reference
  * to the object, which includes a regular reference count. Callers
  * will likely want to dereference the object afterwards.
@@ -262,35 +386,9 @@
 		       struct drm_gem_object *obj,
 		       u32 *handlep)
 {
-	struct drm_device *dev = obj->dev;
-	int ret;
+	mutex_lock(&obj->dev->object_name_lock);
 
-	/*
-	 * Get the user-visible handle using idr.  Preload and perform
-	 * allocation under our spinlock.
-	 */
-	idr_preload(GFP_KERNEL);
-	spin_lock(&file_priv->table_lock);
-
-	ret = idr_alloc(&file_priv->object_idr, obj, 1, 0, GFP_NOWAIT);
-
-	spin_unlock(&file_priv->table_lock);
-	idr_preload_end();
-	if (ret < 0)
-		return ret;
-	*handlep = ret;
-
-	drm_gem_object_handle_reference(obj);
-
-	if (dev->driver->gem_open_object) {
-		ret = dev->driver->gem_open_object(obj, file_priv);
-		if (ret) {
-			drm_gem_handle_delete(file_priv, *handlep);
-			return ret;
-		}
-	}
-
-	return 0;
+	return drm_gem_handle_create_tail(file_priv, obj, handlep);
 }
 EXPORT_SYMBOL(drm_gem_handle_create);
 
@@ -306,16 +404,37 @@
 {
 	struct drm_device *dev = obj->dev;
 	struct drm_gem_mm *mm = dev->mm_private;
-	struct drm_map_list *list = &obj->map_list;
 
-	drm_ht_remove_item(&mm->offset_hash, &list->hash);
-	drm_mm_put_block(list->file_offset_node);
-	kfree(list->map);
-	list->map = NULL;
+	drm_vma_offset_remove(&mm->vma_manager, &obj->vma_node);
 }
 EXPORT_SYMBOL(drm_gem_free_mmap_offset);
 
 /**
+ * drm_gem_create_mmap_offset_size - create a fake mmap offset for an object
+ * @obj: obj in question
+ * @size: the virtual size
+ *
+ * GEM memory mapping works by handing back to userspace a fake mmap offset
+ * it can use in a subsequent mmap(2) call.  The DRM core code then looks
+ * up the object based on the offset and sets up the various memory mapping
+ * structures.
+ *
+ * This routine allocates and attaches a fake offset for @obj, in cases where
+ * the virtual size differs from the physical size (ie. obj->size).  Otherwise
+ * just use drm_gem_create_mmap_offset().
+ */
+int
+drm_gem_create_mmap_offset_size(struct drm_gem_object *obj, size_t size)
+{
+	struct drm_device *dev = obj->dev;
+	struct drm_gem_mm *mm = dev->mm_private;
+
+	return drm_vma_offset_add(&mm->vma_manager, &obj->vma_node,
+				  size / PAGE_SIZE);
+}
+EXPORT_SYMBOL(drm_gem_create_mmap_offset_size);
+
+/**
  * drm_gem_create_mmap_offset - create a fake mmap offset for an object
  * @obj: obj in question
  *
@@ -326,62 +445,115 @@
  *
  * This routine allocates and attaches a fake offset for @obj.
  */
-int
-drm_gem_create_mmap_offset(struct drm_gem_object *obj)
+int drm_gem_create_mmap_offset(struct drm_gem_object *obj)
 {
-	struct drm_device *dev = obj->dev;
-	struct drm_gem_mm *mm = dev->mm_private;
-	struct drm_map_list *list;
-	struct drm_local_map *map;
-	int ret;
-
-	/* Set the object up for mmap'ing */
-	list = &obj->map_list;
-	list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
-	if (!list->map)
-		return -ENOMEM;
-
-	map = list->map;
-	map->type = _DRM_GEM;
-	map->size = obj->size;
-	map->handle = obj;
-
-	/* Get a DRM GEM mmap offset allocated... */
-	list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
-			obj->size / PAGE_SIZE, 0, false);
-
-	if (!list->file_offset_node) {
-		DRM_ERROR("failed to allocate offset for bo %d\n", obj->name);
-		ret = -ENOSPC;
-		goto out_free_list;
-	}
-
-	list->file_offset_node = drm_mm_get_block(list->file_offset_node,
-			obj->size / PAGE_SIZE, 0);
-	if (!list->file_offset_node) {
-		ret = -ENOMEM;
-		goto out_free_list;
-	}
-
-	list->hash.key = list->file_offset_node->start;
-	ret = drm_ht_insert_item(&mm->offset_hash, &list->hash);
-	if (ret) {
-		DRM_ERROR("failed to add to map hash\n");
-		goto out_free_mm;
-	}
-
-	return 0;
-
-out_free_mm:
-	drm_mm_put_block(list->file_offset_node);
-out_free_list:
-	kfree(list->map);
-	list->map = NULL;
-
-	return ret;
+	return drm_gem_create_mmap_offset_size(obj, obj->size);
 }
 EXPORT_SYMBOL(drm_gem_create_mmap_offset);
 
+/**
+ * drm_gem_get_pages - helper to allocate backing pages for a GEM object
+ * from shmem
+ * @obj: obj in question
+ * @gfpmask: gfp mask of requested pages
+ */
+struct page **drm_gem_get_pages(struct drm_gem_object *obj, gfp_t gfpmask)
+{
+	struct inode *inode;
+	struct address_space *mapping;
+	struct page *p, **pages;
+	int i, npages;
+
+	/* This is the shared memory object that backs the GEM resource */
+	inode = file_inode(obj->filp);
+	mapping = inode->i_mapping;
+
+	/* We already BUG_ON() for non-page-aligned sizes in
+	 * drm_gem_object_init(), so we should never hit this unless
+	 * driver author is doing something really wrong:
+	 */
+	WARN_ON((obj->size & (PAGE_SIZE - 1)) != 0);
+
+	npages = obj->size >> PAGE_SHIFT;
+
+	pages = drm_malloc_ab(npages, sizeof(struct page *));
+	if (pages == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	gfpmask |= mapping_gfp_mask(mapping);
+
+	for (i = 0; i < npages; i++) {
+		p = shmem_read_mapping_page_gfp(mapping, i, gfpmask);
+		if (IS_ERR(p))
+			goto fail;
+		pages[i] = p;
+
+		/* There is a hypothetical issue w/ drivers that require
+		 * buffer memory in the low 4GB.. if the pages are un-
+		 * pinned, and swapped out, they can end up swapped back
+		 * in above 4GB.  If pages are already in memory, then
+		 * shmem_read_mapping_page_gfp will ignore the gfpmask,
+		 * even if the already in-memory page disobeys the mask.
+		 *
+		 * It is only a theoretical issue today, because none of
+		 * the devices with this limitation can be populated with
+		 * enough memory to trigger the issue.  But this BUG_ON()
+		 * is here as a reminder in case the problem with
+		 * shmem_read_mapping_page_gfp() isn't solved by the time
+		 * it does become a real issue.
+		 *
+		 * See this thread: http://lkml.org/lkml/2011/7/11/238
+		 */
+		BUG_ON((gfpmask & __GFP_DMA32) &&
+				(page_to_pfn(p) >= 0x00100000UL));
+	}
+
+	return pages;
+
+fail:
+	while (i--)
+		page_cache_release(pages[i]);
+
+	drm_free_large(pages);
+	return ERR_CAST(p);
+}
+EXPORT_SYMBOL(drm_gem_get_pages);
+
+/**
+ * drm_gem_put_pages - helper to free backing pages for a GEM object
+ * @obj: obj in question
+ * @pages: pages to free
+ * @dirty: if true, pages will be marked as dirty
+ * @accessed: if true, the pages will be marked as accessed
+ */
+void drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages,
+		bool dirty, bool accessed)
+{
+	int i, npages;
+
+	/* We already BUG_ON() for non-page-aligned sizes in
+	 * drm_gem_object_init(), so we should never hit this unless
+	 * driver author is doing something really wrong:
+	 */
+	WARN_ON((obj->size & (PAGE_SIZE - 1)) != 0);
+
+	npages = obj->size >> PAGE_SHIFT;
+
+	for (i = 0; i < npages; i++) {
+		if (dirty)
+			set_page_dirty(pages[i]);
+
+		if (accessed)
+			mark_page_accessed(pages[i]);
+
+		/* Undo the reference we took when populating the table */
+		page_cache_release(pages[i]);
+	}
+
+	drm_free_large(pages);
+}
+EXPORT_SYMBOL(drm_gem_put_pages);
+
 /** Returns a reference to the object named by the handle. */
 struct drm_gem_object *
 drm_gem_object_lookup(struct drm_device *dev, struct drm_file *filp,
@@ -445,8 +617,14 @@
 	if (obj == NULL)
 		return -ENOENT;
 
+	mutex_lock(&dev->object_name_lock);
 	idr_preload(GFP_KERNEL);
-	spin_lock(&dev->object_name_lock);
+	/* prevent races with concurrent gem_close. */
+	if (obj->handle_count == 0) {
+		ret = -ENOENT;
+		goto err;
+	}
+
 	if (!obj->name) {
 		ret = idr_alloc(&dev->object_name_idr, obj, 1, 0, GFP_NOWAIT);
 		if (ret < 0)
@@ -462,8 +640,8 @@
 	ret = 0;
 
 err:
-	spin_unlock(&dev->object_name_lock);
 	idr_preload_end();
+	mutex_unlock(&dev->object_name_lock);
 	drm_gem_object_unreference_unlocked(obj);
 	return ret;
 }
@@ -486,15 +664,17 @@
 	if (!(dev->driver->driver_features & DRIVER_GEM))
 		return -ENODEV;
 
-	spin_lock(&dev->object_name_lock);
+	mutex_lock(&dev->object_name_lock);
 	obj = idr_find(&dev->object_name_idr, (int) args->name);
-	if (obj)
+	if (obj) {
 		drm_gem_object_reference(obj);
-	spin_unlock(&dev->object_name_lock);
-	if (!obj)
+	} else {
+		mutex_unlock(&dev->object_name_lock);
 		return -ENOENT;
+	}
 
-	ret = drm_gem_handle_create(file_priv, obj, &handle);
+	/* drm_gem_handle_create_tail unlocks dev->object_name_lock. */
+	ret = drm_gem_handle_create_tail(file_priv, obj, &handle);
 	drm_gem_object_unreference_unlocked(obj);
 	if (ret)
 		return ret;
@@ -527,7 +707,9 @@
 	struct drm_gem_object *obj = ptr;
 	struct drm_device *dev = obj->dev;
 
-	drm_gem_remove_prime_handles(obj, file_priv);
+	if (drm_core_check_feature(dev, DRIVER_PRIME))
+		drm_gem_remove_prime_handles(obj, file_priv);
+	drm_vma_node_revoke(&obj->vma_node, file_priv->filp);
 
 	if (dev->driver->gem_close_object)
 		dev->driver->gem_close_object(obj, file_priv);
@@ -553,6 +735,8 @@
 void
 drm_gem_object_release(struct drm_gem_object *obj)
 {
+	WARN_ON(obj->dma_buf);
+
 	if (obj->filp)
 	    fput(obj->filp);
 }
@@ -577,41 +761,6 @@
 }
 EXPORT_SYMBOL(drm_gem_object_free);
 
-static void drm_gem_object_ref_bug(struct kref *list_kref)
-{
-	BUG();
-}
-
-/**
- * Called after the last handle to the object has been closed
- *
- * Removes any name for the object. Note that this must be
- * called before drm_gem_object_free or we'll be touching
- * freed memory
- */
-void drm_gem_object_handle_free(struct drm_gem_object *obj)
-{
-	struct drm_device *dev = obj->dev;
-
-	/* Remove any name for this object */
-	spin_lock(&dev->object_name_lock);
-	if (obj->name) {
-		idr_remove(&dev->object_name_idr, obj->name);
-		obj->name = 0;
-		spin_unlock(&dev->object_name_lock);
-		/*
-		 * The object name held a reference to this object, drop
-		 * that now.
-		*
-		* This cannot be the last reference, since the handle holds one too.
-		 */
-		kref_put(&obj->refcount, drm_gem_object_ref_bug);
-	} else
-		spin_unlock(&dev->object_name_lock);
-
-}
-EXPORT_SYMBOL(drm_gem_object_handle_free);
-
 void drm_gem_vm_open(struct vm_area_struct *vma)
 {
 	struct drm_gem_object *obj = vma->vm_private_data;
@@ -653,6 +802,10 @@
  * the GEM object is not looked up based on its fake offset. To implement the
  * DRM mmap operation, drivers should use the drm_gem_mmap() function.
  *
+ * drm_gem_mmap_obj() assumes the user is granted access to the buffer while
+ * drm_gem_mmap() prevents unprivileged users from mapping random objects. So
+ * callers must verify access restrictions before calling this helper.
+ *
  * NOTE: This function has to be protected with dev->struct_mutex
  *
  * Return 0 or success or -EINVAL if the object size is smaller than the VMA
@@ -701,14 +854,17 @@
  * Look up the GEM object based on the offset passed in (vma->vm_pgoff will
  * contain the fake offset we created when the GTT map ioctl was called on
  * the object) and map it with a call to drm_gem_mmap_obj().
+ *
+ * If the caller is not granted access to the buffer object, the mmap will fail
+ * with EACCES. Please see the vma manager for more information.
  */
 int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
 {
 	struct drm_file *priv = filp->private_data;
 	struct drm_device *dev = priv->minor->dev;
 	struct drm_gem_mm *mm = dev->mm_private;
-	struct drm_local_map *map = NULL;
-	struct drm_hash_item *hash;
+	struct drm_gem_object *obj;
+	struct drm_vma_offset_node *node;
 	int ret = 0;
 
 	if (drm_device_is_unplugged(dev))
@@ -716,21 +872,19 @@
 
 	mutex_lock(&dev->struct_mutex);
 
-	if (drm_ht_find_item(&mm->offset_hash, vma->vm_pgoff, &hash)) {
+	node = drm_vma_offset_exact_lookup(&mm->vma_manager, vma->vm_pgoff,
+					   vma_pages(vma));
+	if (!node) {
 		mutex_unlock(&dev->struct_mutex);
 		return drm_mmap(filp, vma);
+	} else if (!drm_vma_node_is_allowed(node, filp)) {
+		mutex_unlock(&dev->struct_mutex);
+		return -EACCES;
 	}
 
-	map = drm_hash_entry(hash, struct drm_map_list, hash)->map;
-	if (!map ||
-	    ((map->flags & _DRM_RESTRICTED) && !capable(CAP_SYS_ADMIN))) {
-		ret =  -EPERM;
-		goto out_unlock;
-	}
+	obj = container_of(node, struct drm_gem_object, vma_node);
+	ret = drm_gem_mmap_obj(obj, drm_vma_node_size(node) << PAGE_SHIFT, vma);
 
-	ret = drm_gem_mmap_obj(map->handle, map->size, vma);
-
-out_unlock:
 	mutex_unlock(&dev->struct_mutex);
 
 	return ret;
diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c
index ece72a8..0a4f805 100644
--- a/drivers/gpu/drm/drm_gem_cma_helper.c
+++ b/drivers/gpu/drm/drm_gem_cma_helper.c
@@ -27,11 +27,7 @@
 #include <drm/drmP.h>
 #include <drm/drm.h>
 #include <drm/drm_gem_cma_helper.h>
-
-static unsigned int get_gem_mmap_offset(struct drm_gem_object *obj)
-{
-	return (unsigned int)obj->map_list.hash.key << PAGE_SHIFT;
-}
+#include <drm/drm_vma_manager.h>
 
 /*
  * __drm_gem_cma_create - Create a GEM CMA object without allocating memory
@@ -172,8 +168,7 @@
 {
 	struct drm_gem_cma_object *cma_obj;
 
-	if (gem_obj->map_list.map)
-		drm_gem_free_mmap_offset(gem_obj);
+	drm_gem_free_mmap_offset(gem_obj);
 
 	cma_obj = to_drm_gem_cma_obj(gem_obj);
 
@@ -237,7 +232,7 @@
 		return -EINVAL;
 	}
 
-	*offset = get_gem_mmap_offset(gem_obj);
+	*offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
 
 	drm_gem_object_unreference(gem_obj);
 
@@ -286,27 +281,16 @@
 }
 EXPORT_SYMBOL_GPL(drm_gem_cma_mmap);
 
-/*
- * drm_gem_cma_dumb_destroy - (struct drm_driver)->dumb_destroy callback function
- */
-int drm_gem_cma_dumb_destroy(struct drm_file *file_priv,
-		struct drm_device *drm, unsigned int handle)
-{
-	return drm_gem_handle_delete(file_priv, handle);
-}
-EXPORT_SYMBOL_GPL(drm_gem_cma_dumb_destroy);
-
 #ifdef CONFIG_DEBUG_FS
 void drm_gem_cma_describe(struct drm_gem_cma_object *cma_obj, struct seq_file *m)
 {
 	struct drm_gem_object *obj = &cma_obj->base;
 	struct drm_device *dev = obj->dev;
-	uint64_t off = 0;
+	uint64_t off;
 
 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 
-	if (obj->map_list.map)
-		off = (uint64_t)obj->map_list.hash.key;
+	off = drm_vma_node_start(&obj->vma_node);
 
 	seq_printf(m, "%2d (%2d) %08llx %08Zx %p %d",
 			obj->name, obj->refcount.refcount.counter,
diff --git a/drivers/gpu/drm/drm_info.c b/drivers/gpu/drm/drm_info.c
index d4b20ce..5329832 100644
--- a/drivers/gpu/drm/drm_info.c
+++ b/drivers/gpu/drm/drm_info.c
@@ -207,7 +207,7 @@
 
 	seq_printf(m, "%6d %8zd %7d %8d\n",
 		   obj->name, obj->size,
-		   atomic_read(&obj->handle_count),
+		   obj->handle_count,
 		   atomic_read(&obj->refcount.refcount));
 	return 0;
 }
@@ -218,7 +218,11 @@
 	struct drm_device *dev = node->minor->dev;
 
 	seq_printf(m, "  name     size handles refcount\n");
+
+	mutex_lock(&dev->object_name_lock);
 	idr_for_each(&dev->object_name_idr, drm_gem_one_name_info, m);
+	mutex_unlock(&dev->object_name_lock);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index ffd7a7b..07247e2 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -217,29 +217,30 @@
 		  struct drm_file *file_priv)
 {
 	struct drm_client *client = data;
-	struct drm_file *pt;
-	int idx;
-	int i;
 
-	idx = client->idx;
-	i = 0;
+	/*
+	 * Hollowed-out getclient ioctl to keep some dead old drm tests/tools
+	 * not breaking completely. Userspace tools stop enumerating one they
+	 * get -EINVAL, hence this is the return value we need to hand back for
+	 * no clients tracked.
+	 *
+	 * Unfortunately some clients (*cough* libva *cough*) use this in a fun
+	 * attempt to figure out whether they're authenticated or not. Since
+	 * that's the only thing they care about, give it to the directly
+	 * instead of walking one giant list.
+	 */
+	if (client->idx == 0) {
+		client->auth = file_priv->authenticated;
+		client->pid = pid_vnr(file_priv->pid);
+		client->uid = from_kuid_munged(current_user_ns(),
+					       file_priv->uid);
+		client->magic = 0;
+		client->iocs = 0;
 
-	mutex_lock(&dev->struct_mutex);
-	list_for_each_entry(pt, &dev->filelist, lhead) {
-		if (i++ >= idx) {
-			client->auth = pt->authenticated;
-			client->pid = pid_vnr(pt->pid);
-			client->uid = from_kuid_munged(current_user_ns(), pt->uid);
-			client->magic = pt->magic;
-			client->iocs = pt->ioctl_count;
-			mutex_unlock(&dev->struct_mutex);
-
-			return 0;
-		}
+		return 0;
+	} else {
+		return -EINVAL;
 	}
-	mutex_unlock(&dev->struct_mutex);
-
-	return -EINVAL;
 }
 
 /**
@@ -256,21 +257,10 @@
 		 struct drm_file *file_priv)
 {
 	struct drm_stats *stats = data;
-	int i;
 
+	/* Clear stats to prevent userspace from eating its stack garbage. */
 	memset(stats, 0, sizeof(*stats));
 
-	for (i = 0; i < dev->counters; i++) {
-		if (dev->types[i] == _DRM_STAT_LOCK)
-			stats->data[i].value =
-			    (file_priv->master->lock.hw_lock ? file_priv->master->lock.hw_lock->lock : 0);
-		else
-			stats->data[i].value = atomic_read(&dev->counts[i]);
-		stats->data[i].type = dev->types[i];
-	}
-
-	stats->count = dev->counters;
-
 	return 0;
 }
 
@@ -303,6 +293,9 @@
 	case DRM_CAP_TIMESTAMP_MONOTONIC:
 		req->value = drm_timestamp_monotonic;
 		break;
+	case DRM_CAP_ASYNC_PAGE_FLIP:
+		req->value = dev->mode_config.async_page_flip;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -352,9 +345,6 @@
 			retcode = -EINVAL;
 			goto done;
 		}
-
-		if (dev->driver->set_version)
-			dev->driver->set_version(dev, sv);
 	}
 
 done:
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 8bcce78..f92da0a 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -708,7 +708,10 @@
 	/* Subtract time delta from raw timestamp to get final
 	 * vblank_time timestamp for end of vblank.
 	 */
-	etime = ktime_sub_ns(etime, delta_ns);
+	if (delta_ns < 0)
+		etime = ktime_add_ns(etime, -delta_ns);
+	else
+		etime = ktime_sub_ns(etime, delta_ns);
 	*vblank_time = ktime_to_timeval(etime);
 
 	DRM_DEBUG("crtc %d : v %d p(%d,%d)@ %ld.%ld -> %ld.%ld [e %d us, %d rep]\n",
diff --git a/drivers/gpu/drm/drm_memory.c b/drivers/gpu/drm/drm_memory.c
index 126d50e..64e44fa 100644
--- a/drivers/gpu/drm/drm_memory.c
+++ b/drivers/gpu/drm/drm_memory.c
@@ -86,7 +86,6 @@
 {
 	agp_free_memory(handle);
 }
-EXPORT_SYMBOL(drm_free_agp);
 
 /** Wrapper around agp_bind_memory() */
 int drm_bind_agp(DRM_AGP_MEM * handle, unsigned int start)
@@ -99,7 +98,6 @@
 {
 	return agp_unbind_memory(handle);
 }
-EXPORT_SYMBOL(drm_unbind_agp);
 
 #else  /*  __OS_HAS_AGP  */
 static inline void *agp_remap(unsigned long offset, unsigned long size,
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 543b9b3..af93cc5 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -49,58 +49,18 @@
 
 #define MM_UNUSED_TARGET 4
 
-static struct drm_mm_node *drm_mm_kmalloc(struct drm_mm *mm, int atomic)
-{
-	struct drm_mm_node *child;
-
-	if (atomic)
-		child = kzalloc(sizeof(*child), GFP_ATOMIC);
-	else
-		child = kzalloc(sizeof(*child), GFP_KERNEL);
-
-	if (unlikely(child == NULL)) {
-		spin_lock(&mm->unused_lock);
-		if (list_empty(&mm->unused_nodes))
-			child = NULL;
-		else {
-			child =
-			    list_entry(mm->unused_nodes.next,
-				       struct drm_mm_node, node_list);
-			list_del(&child->node_list);
-			--mm->num_unused;
-		}
-		spin_unlock(&mm->unused_lock);
-	}
-	return child;
-}
-
-/* drm_mm_pre_get() - pre allocate drm_mm_node structure
- * drm_mm:	memory manager struct we are pre-allocating for
- *
- * Returns 0 on success or -ENOMEM if allocation fails.
- */
-int drm_mm_pre_get(struct drm_mm *mm)
-{
-	struct drm_mm_node *node;
-
-	spin_lock(&mm->unused_lock);
-	while (mm->num_unused < MM_UNUSED_TARGET) {
-		spin_unlock(&mm->unused_lock);
-		node = kzalloc(sizeof(*node), GFP_KERNEL);
-		spin_lock(&mm->unused_lock);
-
-		if (unlikely(node == NULL)) {
-			int ret = (mm->num_unused < 2) ? -ENOMEM : 0;
-			spin_unlock(&mm->unused_lock);
-			return ret;
-		}
-		++mm->num_unused;
-		list_add_tail(&node->node_list, &mm->unused_nodes);
-	}
-	spin_unlock(&mm->unused_lock);
-	return 0;
-}
-EXPORT_SYMBOL(drm_mm_pre_get);
+static struct drm_mm_node *drm_mm_search_free_generic(const struct drm_mm *mm,
+						unsigned long size,
+						unsigned alignment,
+						unsigned long color,
+						enum drm_mm_search_flags flags);
+static struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_mm *mm,
+						unsigned long size,
+						unsigned alignment,
+						unsigned long color,
+						unsigned long start,
+						unsigned long end,
+						enum drm_mm_search_flags flags);
 
 static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
 				 struct drm_mm_node *node,
@@ -147,33 +107,27 @@
 	}
 }
 
-struct drm_mm_node *drm_mm_create_block(struct drm_mm *mm,
-					unsigned long start,
-					unsigned long size,
-					bool atomic)
+int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node)
 {
-	struct drm_mm_node *hole, *node;
-	unsigned long end = start + size;
+	struct drm_mm_node *hole;
+	unsigned long end = node->start + node->size;
 	unsigned long hole_start;
 	unsigned long hole_end;
 
+	BUG_ON(node == NULL);
+
+	/* Find the relevant hole to add our node to */
 	drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
-		if (hole_start > start || hole_end < end)
+		if (hole_start > node->start || hole_end < end)
 			continue;
 
-		node = drm_mm_kmalloc(mm, atomic);
-		if (unlikely(node == NULL))
-			return NULL;
-
-		node->start = start;
-		node->size = size;
 		node->mm = mm;
 		node->allocated = 1;
 
 		INIT_LIST_HEAD(&node->hole_stack);
 		list_add(&node->node_list, &hole->node_list);
 
-		if (start == hole_start) {
+		if (node->start == hole_start) {
 			hole->hole_follows = 0;
 			list_del_init(&hole->hole_stack);
 		}
@@ -184,31 +138,14 @@
 			node->hole_follows = 1;
 		}
 
-		return node;
+		return 0;
 	}
 
-	WARN(1, "no hole found for block 0x%lx + 0x%lx\n", start, size);
-	return NULL;
+	WARN(1, "no hole found for node 0x%lx + 0x%lx\n",
+	     node->start, node->size);
+	return -ENOSPC;
 }
-EXPORT_SYMBOL(drm_mm_create_block);
-
-struct drm_mm_node *drm_mm_get_block_generic(struct drm_mm_node *hole_node,
-					     unsigned long size,
-					     unsigned alignment,
-					     unsigned long color,
-					     int atomic)
-{
-	struct drm_mm_node *node;
-
-	node = drm_mm_kmalloc(hole_node->mm, atomic);
-	if (unlikely(node == NULL))
-		return NULL;
-
-	drm_mm_insert_helper(hole_node, node, size, alignment, color);
-
-	return node;
-}
-EXPORT_SYMBOL(drm_mm_get_block_generic);
+EXPORT_SYMBOL(drm_mm_reserve_node);
 
 /**
  * Search for free space and insert a preallocated memory node. Returns
@@ -217,12 +154,13 @@
  */
 int drm_mm_insert_node_generic(struct drm_mm *mm, struct drm_mm_node *node,
 			       unsigned long size, unsigned alignment,
-			       unsigned long color)
+			       unsigned long color,
+			       enum drm_mm_search_flags flags)
 {
 	struct drm_mm_node *hole_node;
 
 	hole_node = drm_mm_search_free_generic(mm, size, alignment,
-					       color, 0);
+					       color, flags);
 	if (!hole_node)
 		return -ENOSPC;
 
@@ -231,13 +169,6 @@
 }
 EXPORT_SYMBOL(drm_mm_insert_node_generic);
 
-int drm_mm_insert_node(struct drm_mm *mm, struct drm_mm_node *node,
-		       unsigned long size, unsigned alignment)
-{
-	return drm_mm_insert_node_generic(mm, node, size, alignment, 0);
-}
-EXPORT_SYMBOL(drm_mm_insert_node);
-
 static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
 				       struct drm_mm_node *node,
 				       unsigned long size, unsigned alignment,
@@ -290,27 +221,6 @@
 	}
 }
 
-struct drm_mm_node *drm_mm_get_block_range_generic(struct drm_mm_node *hole_node,
-						unsigned long size,
-						unsigned alignment,
-						unsigned long color,
-						unsigned long start,
-						unsigned long end,
-						int atomic)
-{
-	struct drm_mm_node *node;
-
-	node = drm_mm_kmalloc(hole_node->mm, atomic);
-	if (unlikely(node == NULL))
-		return NULL;
-
-	drm_mm_insert_helper_range(hole_node, node, size, alignment, color,
-				   start, end);
-
-	return node;
-}
-EXPORT_SYMBOL(drm_mm_get_block_range_generic);
-
 /**
  * Search for free space and insert a preallocated memory node. Returns
  * -ENOSPC if no suitable free area is available. This is for range
@@ -318,13 +228,14 @@
  */
 int drm_mm_insert_node_in_range_generic(struct drm_mm *mm, struct drm_mm_node *node,
 					unsigned long size, unsigned alignment, unsigned long color,
-					unsigned long start, unsigned long end)
+					unsigned long start, unsigned long end,
+					enum drm_mm_search_flags flags)
 {
 	struct drm_mm_node *hole_node;
 
 	hole_node = drm_mm_search_free_in_range_generic(mm,
 							size, alignment, color,
-							start, end, 0);
+							start, end, flags);
 	if (!hole_node)
 		return -ENOSPC;
 
@@ -335,14 +246,6 @@
 }
 EXPORT_SYMBOL(drm_mm_insert_node_in_range_generic);
 
-int drm_mm_insert_node_in_range(struct drm_mm *mm, struct drm_mm_node *node,
-				unsigned long size, unsigned alignment,
-				unsigned long start, unsigned long end)
-{
-	return drm_mm_insert_node_in_range_generic(mm, node, size, alignment, 0, start, end);
-}
-EXPORT_SYMBOL(drm_mm_insert_node_in_range);
-
 /**
  * Remove a memory node from the allocator.
  */
@@ -351,6 +254,9 @@
 	struct drm_mm *mm = node->mm;
 	struct drm_mm_node *prev_node;
 
+	if (WARN_ON(!node->allocated))
+		return;
+
 	BUG_ON(node->scanned_block || node->scanned_prev_free
 				   || node->scanned_next_free);
 
@@ -377,28 +283,6 @@
 }
 EXPORT_SYMBOL(drm_mm_remove_node);
 
-/*
- * Remove a memory node from the allocator and free the allocated struct
- * drm_mm_node. Only to be used on a struct drm_mm_node obtained by one of the
- * drm_mm_get_block functions.
- */
-void drm_mm_put_block(struct drm_mm_node *node)
-{
-
-	struct drm_mm *mm = node->mm;
-
-	drm_mm_remove_node(node);
-
-	spin_lock(&mm->unused_lock);
-	if (mm->num_unused < MM_UNUSED_TARGET) {
-		list_add(&node->node_list, &mm->unused_nodes);
-		++mm->num_unused;
-	} else
-		kfree(node);
-	spin_unlock(&mm->unused_lock);
-}
-EXPORT_SYMBOL(drm_mm_put_block);
-
 static int check_free_hole(unsigned long start, unsigned long end,
 			   unsigned long size, unsigned alignment)
 {
@@ -414,11 +298,11 @@
 	return end >= start + size;
 }
 
-struct drm_mm_node *drm_mm_search_free_generic(const struct drm_mm *mm,
-					       unsigned long size,
-					       unsigned alignment,
-					       unsigned long color,
-					       bool best_match)
+static struct drm_mm_node *drm_mm_search_free_generic(const struct drm_mm *mm,
+						      unsigned long size,
+						      unsigned alignment,
+						      unsigned long color,
+						      enum drm_mm_search_flags flags)
 {
 	struct drm_mm_node *entry;
 	struct drm_mm_node *best;
@@ -441,7 +325,7 @@
 		if (!check_free_hole(adj_start, adj_end, size, alignment))
 			continue;
 
-		if (!best_match)
+		if (!(flags & DRM_MM_SEARCH_BEST))
 			return entry;
 
 		if (entry->size < best_size) {
@@ -452,15 +336,14 @@
 
 	return best;
 }
-EXPORT_SYMBOL(drm_mm_search_free_generic);
 
-struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_mm *mm,
+static struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_mm *mm,
 							unsigned long size,
 							unsigned alignment,
 							unsigned long color,
 							unsigned long start,
 							unsigned long end,
-							bool best_match)
+							enum drm_mm_search_flags flags)
 {
 	struct drm_mm_node *entry;
 	struct drm_mm_node *best;
@@ -488,7 +371,7 @@
 		if (!check_free_hole(adj_start, adj_end, size, alignment))
 			continue;
 
-		if (!best_match)
+		if (!(flags & DRM_MM_SEARCH_BEST))
 			return entry;
 
 		if (entry->size < best_size) {
@@ -499,7 +382,6 @@
 
 	return best;
 }
-EXPORT_SYMBOL(drm_mm_search_free_in_range_generic);
 
 /**
  * Moves an allocation. To be used with embedded struct drm_mm_node.
@@ -634,8 +516,8 @@
  * corrupted.
  *
  * When the scan list is empty, the selected memory nodes can be freed. An
- * immediately following drm_mm_search_free with best_match = 0 will then return
- * the just freed block (because its at the top of the free_stack list).
+ * immediately following drm_mm_search_free with !DRM_MM_SEARCH_BEST will then
+ * return the just freed block (because its at the top of the free_stack list).
  *
  * Returns one if this block should be evicted, zero otherwise. Will always
  * return zero when no hole has been found.
@@ -672,10 +554,7 @@
 void drm_mm_init(struct drm_mm * mm, unsigned long start, unsigned long size)
 {
 	INIT_LIST_HEAD(&mm->hole_stack);
-	INIT_LIST_HEAD(&mm->unused_nodes);
-	mm->num_unused = 0;
 	mm->scanned_blocks = 0;
-	spin_lock_init(&mm->unused_lock);
 
 	/* Clever trick to avoid a special case in the free hole tracking. */
 	INIT_LIST_HEAD(&mm->head_node.node_list);
@@ -695,22 +574,8 @@
 
 void drm_mm_takedown(struct drm_mm * mm)
 {
-	struct drm_mm_node *entry, *next;
-
-	if (WARN(!list_empty(&mm->head_node.node_list),
-		 "Memory manager not clean. Delaying takedown\n")) {
-		return;
-	}
-
-	spin_lock(&mm->unused_lock);
-	list_for_each_entry_safe(entry, next, &mm->unused_nodes, node_list) {
-		list_del(&entry->node_list);
-		kfree(entry);
-		--mm->num_unused;
-	}
-	spin_unlock(&mm->unused_lock);
-
-	BUG_ON(mm->num_unused != 0);
+	WARN(!list_empty(&mm->head_node.node_list),
+	     "Memory manager not clean during takedown.\n");
 }
 EXPORT_SYMBOL(drm_mm_takedown);
 
diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index a6729bf..fc2adb6 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -596,27 +596,6 @@
 EXPORT_SYMBOL(drm_mode_set_name);
 
 /**
- * drm_mode_list_concat - move modes from one list to another
- * @head: source list
- * @new: dst list
- *
- * LOCKING:
- * Caller must ensure both lists are locked.
- *
- * Move all the modes from @head to @new.
- */
-void drm_mode_list_concat(struct list_head *head, struct list_head *new)
-{
-
-	struct list_head *entry, *tmp;
-
-	list_for_each_safe(entry, tmp, head) {
-		list_move_tail(entry, new);
-	}
-}
-EXPORT_SYMBOL(drm_mode_list_concat);
-
-/**
  * drm_mode_width - get the width of a mode
  * @mode: mode
  *
@@ -923,43 +902,6 @@
 EXPORT_SYMBOL(drm_mode_validate_size);
 
 /**
- * drm_mode_validate_clocks - validate modes against clock limits
- * @dev: DRM device
- * @mode_list: list of modes to check
- * @min: minimum clock rate array
- * @max: maximum clock rate array
- * @n_ranges: number of clock ranges (size of arrays)
- *
- * LOCKING:
- * Caller must hold a lock protecting @mode_list.
- *
- * Some code may need to check a mode list against the clock limits of the
- * device in question.  This function walks the mode list, testing to make
- * sure each mode falls within a given range (defined by @min and @max
- * arrays) and sets @mode->status as needed.
- */
-void drm_mode_validate_clocks(struct drm_device *dev,
-			      struct list_head *mode_list,
-			      int *min, int *max, int n_ranges)
-{
-	struct drm_display_mode *mode;
-	int i;
-
-	list_for_each_entry(mode, mode_list, head) {
-		bool good = false;
-		for (i = 0; i < n_ranges; i++) {
-			if (mode->clock >= min[i] && mode->clock <= max[i]) {
-				good = true;
-				break;
-			}
-		}
-		if (!good)
-			mode->status = MODE_CLOCK_RANGE;
-	}
-}
-EXPORT_SYMBOL(drm_mode_validate_clocks);
-
-/**
  * drm_mode_prune_invalid - remove invalid modes from mode list
  * @dev: DRM device
  * @mode_list: list of modes to check
diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c
index 80c0b2b..1f96cee 100644
--- a/drivers/gpu/drm/drm_pci.c
+++ b/drivers/gpu/drm/drm_pci.c
@@ -52,10 +52,8 @@
 drm_dma_handle_t *drm_pci_alloc(struct drm_device * dev, size_t size, size_t align)
 {
 	drm_dma_handle_t *dmah;
-#if 1
 	unsigned long addr;
 	size_t sz;
-#endif
 
 	/* pci_alloc_consistent only guarantees alignment to the smallest
 	 * PAGE_SIZE order which is greater than or equal to the requested size.
@@ -97,10 +95,8 @@
  */
 void __drm_pci_free(struct drm_device * dev, drm_dma_handle_t * dmah)
 {
-#if 1
 	unsigned long addr;
 	size_t sz;
-#endif
 
 	if (dmah->vaddr) {
 		/* XXX - Is virt_to_page() legal for consistent mem? */
@@ -276,17 +272,26 @@
 			DRM_ERROR("Cannot initialize the agpgart module.\n");
 			return -EINVAL;
 		}
-		if (drm_core_has_MTRR(dev)) {
-			if (dev->agp)
-				dev->agp->agp_mtrr = arch_phys_wc_add(
-					dev->agp->agp_info.aper_base,
-					dev->agp->agp_info.aper_size *
-					1024 * 1024);
+		if (dev->agp) {
+			dev->agp->agp_mtrr = arch_phys_wc_add(
+				dev->agp->agp_info.aper_base,
+				dev->agp->agp_info.aper_size *
+				1024 * 1024);
 		}
 	}
 	return 0;
 }
 
+static void drm_pci_agp_destroy(struct drm_device *dev)
+{
+	if (drm_core_has_AGP(dev) && dev->agp) {
+		arch_phys_wc_del(dev->agp->agp_mtrr);
+		drm_agp_clear(dev);
+		drm_agp_destroy(dev->agp);
+		dev->agp = NULL;
+	}
+}
+
 static struct drm_bus drm_pci_bus = {
 	.bus_type = DRIVER_BUS_PCI,
 	.get_irq = drm_pci_get_irq,
@@ -295,6 +300,7 @@
 	.set_unique = drm_pci_set_unique,
 	.irq_by_busid = drm_pci_irq_by_busid,
 	.agp_init = drm_pci_agp_init,
+	.agp_destroy = drm_pci_agp_destroy,
 };
 
 /**
@@ -348,6 +354,12 @@
 			goto err_g2;
 	}
 
+	if (drm_core_check_feature(dev, DRIVER_RENDER) && drm_rnodes) {
+		ret = drm_get_minor(dev, &dev->render, DRM_MINOR_RENDER);
+		if (ret)
+			goto err_g21;
+	}
+
 	if ((ret = drm_get_minor(dev, &dev->primary, DRM_MINOR_LEGACY)))
 		goto err_g3;
 
@@ -377,6 +389,9 @@
 err_g4:
 	drm_put_minor(&dev->primary);
 err_g3:
+	if (dev->render)
+		drm_put_minor(&dev->render);
+err_g21:
 	if (drm_core_check_feature(dev, DRIVER_MODESET))
 		drm_put_minor(&dev->control);
 err_g2:
diff --git a/drivers/gpu/drm/drm_platform.c b/drivers/gpu/drm/drm_platform.c
index b8a282e..f7a18c6 100644
--- a/drivers/gpu/drm/drm_platform.c
+++ b/drivers/gpu/drm/drm_platform.c
@@ -28,7 +28,7 @@
 #include <linux/export.h>
 #include <drm/drmP.h>
 
-/**
+/*
  * Register.
  *
  * \param platdev - Platform device struture
@@ -39,8 +39,8 @@
  * Try and register, if we fail to register, backout previous work.
  */
 
-int drm_get_platform_dev(struct platform_device *platdev,
-			 struct drm_driver *driver)
+static int drm_get_platform_dev(struct platform_device *platdev,
+				struct drm_driver *driver)
 {
 	struct drm_device *dev;
 	int ret;
@@ -69,6 +69,12 @@
 			goto err_g1;
 	}
 
+	if (drm_core_check_feature(dev, DRIVER_RENDER) && drm_rnodes) {
+		ret = drm_get_minor(dev, &dev->render, DRM_MINOR_RENDER);
+		if (ret)
+			goto err_g11;
+	}
+
 	ret = drm_get_minor(dev, &dev->primary, DRM_MINOR_LEGACY);
 	if (ret)
 		goto err_g2;
@@ -100,6 +106,9 @@
 err_g3:
 	drm_put_minor(&dev->primary);
 err_g2:
+	if (dev->render)
+		drm_put_minor(&dev->render);
+err_g11:
 	if (drm_core_check_feature(dev, DRIVER_MODESET))
 		drm_put_minor(&dev->control);
 err_g1:
@@ -107,7 +116,6 @@
 	mutex_unlock(&drm_global_mutex);
 	return ret;
 }
-EXPORT_SYMBOL(drm_get_platform_dev);
 
 static int drm_platform_get_irq(struct drm_device *dev)
 {
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 85e450e..276d470 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -83,6 +83,34 @@
 	return 0;
 }
 
+static struct dma_buf *drm_prime_lookup_buf_by_handle(struct drm_prime_file_private *prime_fpriv,
+						      uint32_t handle)
+{
+	struct drm_prime_member *member;
+
+	list_for_each_entry(member, &prime_fpriv->head, entry) {
+		if (member->handle == handle)
+			return member->dma_buf;
+	}
+
+	return NULL;
+}
+
+static int drm_prime_lookup_buf_handle(struct drm_prime_file_private *prime_fpriv,
+				       struct dma_buf *dma_buf,
+				       uint32_t *handle)
+{
+	struct drm_prime_member *member;
+
+	list_for_each_entry(member, &prime_fpriv->head, entry) {
+		if (member->dma_buf == dma_buf) {
+			*handle = member->handle;
+			return 0;
+		}
+	}
+	return -ENOENT;
+}
+
 static int drm_gem_map_attach(struct dma_buf *dma_buf,
 			      struct device *target_dev,
 			      struct dma_buf_attachment *attach)
@@ -131,9 +159,8 @@
 	attach->priv = NULL;
 }
 
-static void drm_prime_remove_buf_handle_locked(
-		struct drm_prime_file_private *prime_fpriv,
-		struct dma_buf *dma_buf)
+void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpriv,
+					struct dma_buf *dma_buf)
 {
 	struct drm_prime_member *member, *safe;
 
@@ -167,8 +194,6 @@
 	if (WARN_ON(prime_attach->dir != DMA_NONE))
 		return ERR_PTR(-EBUSY);
 
-	mutex_lock(&obj->dev->struct_mutex);
-
 	sgt = obj->dev->driver->gem_prime_get_sg_table(obj);
 
 	if (!IS_ERR(sgt)) {
@@ -182,7 +207,6 @@
 		}
 	}
 
-	mutex_unlock(&obj->dev->struct_mutex);
 	return sgt;
 }
 
@@ -192,16 +216,14 @@
 	/* nothing to be done here */
 }
 
-static void drm_gem_dmabuf_release(struct dma_buf *dma_buf)
+void drm_gem_dmabuf_release(struct dma_buf *dma_buf)
 {
 	struct drm_gem_object *obj = dma_buf->priv;
 
-	if (obj->export_dma_buf == dma_buf) {
-		/* drop the reference on the export fd holds */
-		obj->export_dma_buf = NULL;
-		drm_gem_object_unreference_unlocked(obj);
-	}
+	/* drop the reference on the export fd holds */
+	drm_gem_object_unreference_unlocked(obj);
 }
+EXPORT_SYMBOL(drm_gem_dmabuf_release);
 
 static void *drm_gem_dmabuf_vmap(struct dma_buf *dma_buf)
 {
@@ -300,62 +322,107 @@
 }
 EXPORT_SYMBOL(drm_gem_prime_export);
 
+static struct dma_buf *export_and_register_object(struct drm_device *dev,
+						  struct drm_gem_object *obj,
+						  uint32_t flags)
+{
+	struct dma_buf *dmabuf;
+
+	/* prevent races with concurrent gem_close. */
+	if (obj->handle_count == 0) {
+		dmabuf = ERR_PTR(-ENOENT);
+		return dmabuf;
+	}
+
+	dmabuf = dev->driver->gem_prime_export(dev, obj, flags);
+	if (IS_ERR(dmabuf)) {
+		/* normally the created dma-buf takes ownership of the ref,
+		 * but if that fails then drop the ref
+		 */
+		return dmabuf;
+	}
+
+	/*
+	 * Note that callers do not need to clean up the export cache
+	 * since the check for obj->handle_count guarantees that someone
+	 * will clean it up.
+	 */
+	obj->dma_buf = dmabuf;
+	get_dma_buf(obj->dma_buf);
+	/* Grab a new ref since the callers is now used by the dma-buf */
+	drm_gem_object_reference(obj);
+
+	return dmabuf;
+}
+
 int drm_gem_prime_handle_to_fd(struct drm_device *dev,
 		struct drm_file *file_priv, uint32_t handle, uint32_t flags,
 		int *prime_fd)
 {
 	struct drm_gem_object *obj;
-	void *buf;
 	int ret = 0;
 	struct dma_buf *dmabuf;
 
-	obj = drm_gem_object_lookup(dev, file_priv, handle);
-	if (!obj)
-		return -ENOENT;
-
 	mutex_lock(&file_priv->prime.lock);
+	obj = drm_gem_object_lookup(dev, file_priv, handle);
+	if (!obj)  {
+		ret = -ENOENT;
+		goto out_unlock;
+	}
+
+	dmabuf = drm_prime_lookup_buf_by_handle(&file_priv->prime, handle);
+	if (dmabuf) {
+		get_dma_buf(dmabuf);
+		goto out_have_handle;
+	}
+
+	mutex_lock(&dev->object_name_lock);
 	/* re-export the original imported object */
 	if (obj->import_attach) {
 		dmabuf = obj->import_attach->dmabuf;
+		get_dma_buf(dmabuf);
 		goto out_have_obj;
 	}
 
-	if (obj->export_dma_buf) {
-		dmabuf = obj->export_dma_buf;
+	if (obj->dma_buf) {
+		get_dma_buf(obj->dma_buf);
+		dmabuf = obj->dma_buf;
 		goto out_have_obj;
 	}
 
-	buf = dev->driver->gem_prime_export(dev, obj, flags);
-	if (IS_ERR(buf)) {
+	dmabuf = export_and_register_object(dev, obj, flags);
+	if (IS_ERR(dmabuf)) {
 		/* normally the created dma-buf takes ownership of the ref,
 		 * but if that fails then drop the ref
 		 */
-		ret = PTR_ERR(buf);
+		ret = PTR_ERR(dmabuf);
+		mutex_unlock(&dev->object_name_lock);
 		goto out;
 	}
-	obj->export_dma_buf = buf;
 
-	/* if we've exported this buffer the cheat and add it to the import list
-	 * so we get the correct handle back
+out_have_obj:
+	/*
+	 * If we've exported this buffer then cheat and add it to the import list
+	 * so we get the correct handle back. We must do this under the
+	 * protection of dev->object_name_lock to ensure that a racing gem close
+	 * ioctl doesn't miss to remove this buffer handle from the cache.
 	 */
 	ret = drm_prime_add_buf_handle(&file_priv->prime,
-				       obj->export_dma_buf, handle);
+				       dmabuf, handle);
+	mutex_unlock(&dev->object_name_lock);
 	if (ret)
 		goto fail_put_dmabuf;
 
-	ret = dma_buf_fd(buf, flags);
-	if (ret < 0)
-		goto fail_rm_handle;
-
-	*prime_fd = ret;
-	mutex_unlock(&file_priv->prime.lock);
-	return 0;
-
-out_have_obj:
-	get_dma_buf(dmabuf);
+out_have_handle:
 	ret = dma_buf_fd(dmabuf, flags);
+	/*
+	 * We must _not_ remove the buffer from the handle cache since the newly
+	 * created dma buf is already linked in the global obj->dma_buf pointer,
+	 * and that is invariant as long as a userspace gem handle exists.
+	 * Closing the handle will clean out the cache anyway, so we don't leak.
+	 */
 	if (ret < 0) {
-		dma_buf_put(dmabuf);
+		goto fail_put_dmabuf;
 	} else {
 		*prime_fd = ret;
 		ret = 0;
@@ -363,15 +430,13 @@
 
 	goto out;
 
-fail_rm_handle:
-	drm_prime_remove_buf_handle_locked(&file_priv->prime, buf);
 fail_put_dmabuf:
-	/* clear NOT to be checked when releasing dma_buf */
-	obj->export_dma_buf = NULL;
-	dma_buf_put(buf);
+	dma_buf_put(dmabuf);
 out:
 	drm_gem_object_unreference_unlocked(obj);
+out_unlock:
 	mutex_unlock(&file_priv->prime.lock);
+
 	return ret;
 }
 EXPORT_SYMBOL(drm_gem_prime_handle_to_fd);
@@ -446,19 +511,26 @@
 
 	ret = drm_prime_lookup_buf_handle(&file_priv->prime,
 			dma_buf, handle);
-	if (!ret) {
-		ret = 0;
+	if (ret == 0)
 		goto out_put;
-	}
 
 	/* never seen this one, need to import */
+	mutex_lock(&dev->object_name_lock);
 	obj = dev->driver->gem_prime_import(dev, dma_buf);
 	if (IS_ERR(obj)) {
 		ret = PTR_ERR(obj);
-		goto out_put;
+		goto out_unlock;
 	}
 
-	ret = drm_gem_handle_create(file_priv, obj, handle);
+	if (obj->dma_buf) {
+		WARN_ON(obj->dma_buf != dma_buf);
+	} else {
+		obj->dma_buf = dma_buf;
+		get_dma_buf(dma_buf);
+	}
+
+	/* drm_gem_handle_create_tail unlocks dev->object_name_lock. */
+	ret = drm_gem_handle_create_tail(file_priv, obj, handle);
 	drm_gem_object_unreference_unlocked(obj);
 	if (ret)
 		goto out_put;
@@ -478,7 +550,9 @@
 	/* hmm, if driver attached, we are relying on the free-object path
 	 * to detach.. which seems ok..
 	 */
-	drm_gem_object_handle_unreference_unlocked(obj);
+	drm_gem_handle_delete(file_priv, *handle);
+out_unlock:
+	mutex_unlock(&dev->object_name_lock);
 out_put:
 	dma_buf_put(dma_buf);
 	mutex_unlock(&file_priv->prime.lock);
@@ -618,25 +692,3 @@
 	WARN_ON(!list_empty(&prime_fpriv->head));
 }
 EXPORT_SYMBOL(drm_prime_destroy_file_private);
-
-int drm_prime_lookup_buf_handle(struct drm_prime_file_private *prime_fpriv, struct dma_buf *dma_buf, uint32_t *handle)
-{
-	struct drm_prime_member *member;
-
-	list_for_each_entry(member, &prime_fpriv->head, entry) {
-		if (member->dma_buf == dma_buf) {
-			*handle = member->handle;
-			return 0;
-		}
-	}
-	return -ENOENT;
-}
-EXPORT_SYMBOL(drm_prime_lookup_buf_handle);
-
-void drm_prime_remove_buf_handle(struct drm_prime_file_private *prime_fpriv, struct dma_buf *dma_buf)
-{
-	mutex_lock(&prime_fpriv->lock);
-	drm_prime_remove_buf_handle_locked(prime_fpriv, dma_buf);
-	mutex_unlock(&prime_fpriv->lock);
-}
-EXPORT_SYMBOL(drm_prime_remove_buf_handle);
diff --git a/drivers/gpu/drm/drm_proc.c b/drivers/gpu/drm/drm_proc.c
deleted file mode 100644
index d7f2324..0000000
--- a/drivers/gpu/drm/drm_proc.c
+++ /dev/null
@@ -1,209 +0,0 @@
-/**
- * \file drm_proc.c
- * /proc support for DRM
- *
- * \author Rickard E. (Rik) Faith <faith@valinux.com>
- * \author Gareth Hughes <gareth@valinux.com>
- *
- * \par Acknowledgements:
- *    Matthew J Sottek <matthew.j.sottek@intel.com> sent in a patch to fix
- *    the problem with the proc files not outputting all their information.
- */
-
-/*
- * Created: Mon Jan 11 09:48:47 1999 by faith@valinux.com
- *
- * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas.
- * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <linux/seq_file.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <drm/drmP.h>
-
-/***************************************************
- * Initialization, etc.
- **************************************************/
-
-/**
- * Proc file list.
- */
-static const struct drm_info_list drm_proc_list[] = {
-	{"name", drm_name_info, 0},
-	{"vm", drm_vm_info, 0},
-	{"clients", drm_clients_info, 0},
-	{"bufs", drm_bufs_info, 0},
-	{"gem_names", drm_gem_name_info, DRIVER_GEM},
-#if DRM_DEBUG_CODE
-	{"vma", drm_vma_info, 0},
-#endif
-};
-#define DRM_PROC_ENTRIES ARRAY_SIZE(drm_proc_list)
-
-static int drm_proc_open(struct inode *inode, struct file *file)
-{
-	struct drm_info_node* node = PDE_DATA(inode);
-
-	return single_open(file, node->info_ent->show, node);
-}
-
-static const struct file_operations drm_proc_fops = {
-	.owner = THIS_MODULE,
-	.open = drm_proc_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
-
-/**
- * Initialize a given set of proc files for a device
- *
- * \param files The array of files to create
- * \param count The number of files given
- * \param root DRI proc dir entry.
- * \param minor device minor number
- * \return Zero on success, non-zero on failure
- *
- * Create a given set of proc files represented by an array of
- * gdm_proc_lists in the given root directory.
- */
-static int drm_proc_create_files(const struct drm_info_list *files, int count,
-			  struct proc_dir_entry *root, struct drm_minor *minor)
-{
-	struct drm_device *dev = minor->dev;
-	struct proc_dir_entry *ent;
-	struct drm_info_node *tmp;
-	int i;
-
-	for (i = 0; i < count; i++) {
-		u32 features = files[i].driver_features;
-
-		if (features != 0 &&
-		    (dev->driver->driver_features & features) != features)
-			continue;
-
-		tmp = kmalloc(sizeof(struct drm_info_node), GFP_KERNEL);
-		if (!tmp)
-			return -1;
-
-		tmp->minor = minor;
-		tmp->info_ent = &files[i];
-		list_add(&tmp->list, &minor->proc_nodes.list);
-
-		ent = proc_create_data(files[i].name, S_IRUGO, root,
-				       &drm_proc_fops, tmp);
-		if (!ent) {
-			DRM_ERROR("Cannot create /proc/dri/%u/%s\n",
-				  minor->index, files[i].name);
-			list_del(&tmp->list);
-			kfree(tmp);
-			return -1;
-		}
-	}
-	return 0;
-}
-
-/**
- * Initialize the DRI proc filesystem for a device
- *
- * \param dev DRM device
- * \param root DRI proc dir entry.
- * \param dev_root resulting DRI device proc dir entry.
- * \return root entry pointer on success, or NULL on failure.
- *
- * Create the DRI proc root entry "/proc/dri", the device proc root entry
- * "/proc/dri/%minor%/", and each entry in proc_list as
- * "/proc/dri/%minor%/%name%".
- */
-int drm_proc_init(struct drm_minor *minor, struct proc_dir_entry *root)
-{
-	char name[12];
-	int ret;
-
-	INIT_LIST_HEAD(&minor->proc_nodes.list);
-	sprintf(name, "%u", minor->index);
-	minor->proc_root = proc_mkdir(name, root);
-	if (!minor->proc_root) {
-		DRM_ERROR("Cannot create /proc/dri/%s\n", name);
-		return -1;
-	}
-
-	ret = drm_proc_create_files(drm_proc_list, DRM_PROC_ENTRIES,
-				    minor->proc_root, minor);
-	if (ret) {
-		remove_proc_subtree(name, root);
-		minor->proc_root = NULL;
-		DRM_ERROR("Failed to create core drm proc files\n");
-		return ret;
-	}
-
-	return 0;
-}
-
-static int drm_proc_remove_files(const struct drm_info_list *files, int count,
-			  struct drm_minor *minor)
-{
-	struct list_head *pos, *q;
-	struct drm_info_node *tmp;
-	int i;
-
-	for (i = 0; i < count; i++) {
-		list_for_each_safe(pos, q, &minor->proc_nodes.list) {
-			tmp = list_entry(pos, struct drm_info_node, list);
-			if (tmp->info_ent == &files[i]) {
-				remove_proc_entry(files[i].name,
-						  minor->proc_root);
-				list_del(pos);
-				kfree(tmp);
-			}
-		}
-	}
-	return 0;
-}
-
-/**
- * Cleanup the proc filesystem resources.
- *
- * \param minor device minor number.
- * \param root DRI proc dir entry.
- * \param dev_root DRI device proc dir entry.
- * \return always zero.
- *
- * Remove all proc entries created by proc_init().
- */
-int drm_proc_cleanup(struct drm_minor *minor, struct proc_dir_entry *root)
-{
-	char name[64];
-
-	if (!root || !minor->proc_root)
-		return 0;
-
-	drm_proc_remove_files(drm_proc_list, DRM_PROC_ENTRIES, minor);
-
-	sprintf(name, "%d", minor->index);
-	remove_proc_subtree(name, root);
-	return 0;
-}
-
diff --git a/drivers/gpu/drm/drm_scatter.c b/drivers/gpu/drm/drm_scatter.c
index d87f60b..1c78406 100644
--- a/drivers/gpu/drm/drm_scatter.c
+++ b/drivers/gpu/drm/drm_scatter.c
@@ -46,7 +46,7 @@
 #endif
 }
 
-void drm_sg_cleanup(struct drm_sg_mem * entry)
+static void drm_sg_cleanup(struct drm_sg_mem * entry)
 {
 	struct page *page;
 	int i;
@@ -64,19 +64,32 @@
 	kfree(entry);
 }
 
+void drm_legacy_sg_cleanup(struct drm_device *dev)
+{
+	if (drm_core_check_feature(dev, DRIVER_SG) && dev->sg &&
+	    !drm_core_check_feature(dev, DRIVER_MODESET)) {
+		drm_sg_cleanup(dev->sg);
+		dev->sg = NULL;
+	}
+}
 #ifdef _LP64
 # define ScatterHandle(x) (unsigned int)((x >> 32) + (x & ((1L << 32) - 1)))
 #else
 # define ScatterHandle(x) (unsigned int)(x)
 #endif
 
-int drm_sg_alloc(struct drm_device *dev, struct drm_scatter_gather * request)
+int drm_sg_alloc(struct drm_device *dev, void *data,
+		 struct drm_file *file_priv)
 {
+	struct drm_scatter_gather *request = data;
 	struct drm_sg_mem *entry;
 	unsigned long pages, i, j;
 
 	DRM_DEBUG("\n");
 
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	if (!drm_core_check_feature(dev, DRIVER_SG))
 		return -EINVAL;
 
@@ -181,21 +194,15 @@
 	return -ENOMEM;
 }
 
-int drm_sg_alloc_ioctl(struct drm_device *dev, void *data,
-		       struct drm_file *file_priv)
-{
-	struct drm_scatter_gather *request = data;
-
-	return drm_sg_alloc(dev, request);
-
-}
-
 int drm_sg_free(struct drm_device *dev, void *data,
 		struct drm_file *file_priv)
 {
 	struct drm_scatter_gather *request = data;
 	struct drm_sg_mem *entry;
 
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
 	if (!drm_core_check_feature(dev, DRIVER_SG))
 		return -EINVAL;
 
diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c
index 327ca19..e7eb027 100644
--- a/drivers/gpu/drm/drm_stub.c
+++ b/drivers/gpu/drm/drm_stub.c
@@ -40,6 +40,9 @@
 unsigned int drm_debug = 0;	/* 1 to enable debug output */
 EXPORT_SYMBOL(drm_debug);
 
+unsigned int drm_rnodes = 0;	/* 1 to enable experimental render nodes API */
+EXPORT_SYMBOL(drm_rnodes);
+
 unsigned int drm_vblank_offdelay = 5000;    /* Default to 5000 msecs. */
 EXPORT_SYMBOL(drm_vblank_offdelay);
 
@@ -56,11 +59,13 @@
 MODULE_DESCRIPTION(CORE_DESC);
 MODULE_LICENSE("GPL and additional rights");
 MODULE_PARM_DESC(debug, "Enable debug output");
+MODULE_PARM_DESC(rnodes, "Enable experimental render nodes API");
 MODULE_PARM_DESC(vblankoffdelay, "Delay until vblank irq auto-disable [msecs]");
 MODULE_PARM_DESC(timestamp_precision_usec, "Max. error on timestamps [usecs]");
 MODULE_PARM_DESC(timestamp_monotonic, "Use monotonic timestamps");
 
 module_param_named(debug, drm_debug, int, 0600);
+module_param_named(rnodes, drm_rnodes, int, 0600);
 module_param_named(vblankoffdelay, drm_vblank_offdelay, int, 0600);
 module_param_named(timestamp_precision_usec, drm_timestamp_precision, int, 0600);
 module_param_named(timestamp_monotonic, drm_timestamp_monotonic, int, 0600);
@@ -68,7 +73,6 @@
 struct idr drm_minors_idr;
 
 struct class *drm_class;
-struct proc_dir_entry *drm_proc_root;
 struct dentry *drm_debugfs_root;
 
 int drm_err(const char *func, const char *format, ...)
@@ -113,12 +117,12 @@
 	int base = 0, limit = 63;
 
 	if (type == DRM_MINOR_CONTROL) {
-                base += 64;
-                limit = base + 127;
-        } else if (type == DRM_MINOR_RENDER) {
-                base += 128;
-                limit = base + 255;
-        }
+		base += 64;
+		limit = base + 63;
+	} else if (type == DRM_MINOR_RENDER) {
+		base += 128;
+		limit = base + 63;
+	}
 
 	mutex_lock(&dev->struct_mutex);
 	ret = idr_alloc(&drm_minors_idr, NULL, base, limit, GFP_KERNEL);
@@ -288,13 +292,7 @@
 			goto error_out_unreg;
 	}
 
-
-
-	retcode = drm_ctxbitmap_init(dev);
-	if (retcode) {
-		DRM_ERROR("Cannot allocate memory for context bitmap.\n");
-		goto error_out_unreg;
-	}
+	drm_legacy_ctxbitmap_init(dev);
 
 	if (driver->driver_features & DRIVER_GEM) {
 		retcode = drm_gem_init(dev);
@@ -321,9 +319,8 @@
  * \param sec-minor structure to hold the assigned minor
  * \return negative number on failure.
  *
- * Search an empty entry and initialize it to the given parameters, and
- * create the proc init entry via proc_init(). This routines assigns
- * minor numbers to secondary heads of multi-headed cards
+ * Search an empty entry and initialize it to the given parameters. This
+ * routines assigns minor numbers to secondary heads of multi-headed cards
  */
 int drm_get_minor(struct drm_device *dev, struct drm_minor **minor, int type)
 {
@@ -351,20 +348,11 @@
 
 	idr_replace(&drm_minors_idr, new_minor, minor_id);
 
-	if (type == DRM_MINOR_LEGACY) {
-		ret = drm_proc_init(new_minor, drm_proc_root);
-		if (ret) {
-			DRM_ERROR("DRM: Failed to initialize /proc/dri.\n");
-			goto err_mem;
-		}
-	} else
-		new_minor->proc_root = NULL;
-
 #if defined(CONFIG_DEBUG_FS)
 	ret = drm_debugfs_init(new_minor, minor_id, drm_debugfs_root);
 	if (ret) {
 		DRM_ERROR("DRM: Failed to initialize /sys/kernel/debug/dri.\n");
-		goto err_g2;
+		goto err_mem;
 	}
 #endif
 
@@ -372,7 +360,7 @@
 	if (ret) {
 		printk(KERN_ERR
 		       "DRM: Error sysfs_device_add.\n");
-		goto err_g2;
+		goto err_debugfs;
 	}
 	*minor = new_minor;
 
@@ -380,10 +368,11 @@
 	return 0;
 
 
-err_g2:
-	if (new_minor->type == DRM_MINOR_LEGACY)
-		drm_proc_cleanup(new_minor, drm_proc_root);
+err_debugfs:
+#if defined(CONFIG_DEBUG_FS)
+	drm_debugfs_cleanup(new_minor);
 err_mem:
+#endif
 	kfree(new_minor);
 err_idr:
 	idr_remove(&drm_minors_idr, minor_id);
@@ -397,10 +386,6 @@
  *
  * \param sec_minor - structure to be released
  * \return always zero
- *
- * Cleans up the proc resources. Not legal for this to be the
- * last minor released.
- *
  */
 int drm_put_minor(struct drm_minor **minor_p)
 {
@@ -408,8 +393,6 @@
 
 	DRM_DEBUG("release secondary minor %d\n", minor->index);
 
-	if (minor->type == DRM_MINOR_LEGACY)
-		drm_proc_cleanup(minor, drm_proc_root);
 #if defined(CONFIG_DEBUG_FS)
 	drm_debugfs_cleanup(minor);
 #endif
@@ -451,16 +434,11 @@
 
 	drm_lastclose(dev);
 
-	if (drm_core_has_MTRR(dev) && drm_core_has_AGP(dev) && dev->agp)
-		arch_phys_wc_del(dev->agp->agp_mtrr);
-
 	if (dev->driver->unload)
 		dev->driver->unload(dev);
 
-	if (drm_core_has_AGP(dev) && dev->agp) {
-		kfree(dev->agp);
-		dev->agp = NULL;
-	}
+	if (dev->driver->bus->agp_destroy)
+		dev->driver->bus->agp_destroy(dev);
 
 	drm_vblank_cleanup(dev);
 
@@ -468,11 +446,14 @@
 		drm_rmmap(dev, r_list->map);
 	drm_ht_remove(&dev->map_hash);
 
-	drm_ctxbitmap_cleanup(dev);
+	drm_legacy_ctxbitmap_cleanup(dev);
 
 	if (drm_core_check_feature(dev, DRIVER_MODESET))
 		drm_put_minor(&dev->control);
 
+	if (dev->render)
+		drm_put_minor(&dev->render);
+
 	if (driver->driver_features & DRIVER_GEM)
 		drm_gem_destroy(dev);
 
@@ -489,6 +470,8 @@
 	/* for a USB device */
 	if (drm_core_check_feature(dev, DRIVER_MODESET))
 		drm_unplug_minor(dev->control);
+	if (dev->render)
+		drm_unplug_minor(dev->render);
 	drm_unplug_minor(dev->primary);
 
 	mutex_lock(&drm_global_mutex);
diff --git a/drivers/gpu/drm/drm_usb.c b/drivers/gpu/drm/drm_usb.c
index 34a156f..8766472 100644
--- a/drivers/gpu/drm/drm_usb.c
+++ b/drivers/gpu/drm/drm_usb.c
@@ -33,6 +33,12 @@
 	if (ret)
 		goto err_g1;
 
+	if (drm_core_check_feature(dev, DRIVER_RENDER) && drm_rnodes) {
+		ret = drm_get_minor(dev, &dev->render, DRM_MINOR_RENDER);
+		if (ret)
+			goto err_g11;
+	}
+
 	ret = drm_get_minor(dev, &dev->primary, DRM_MINOR_LEGACY);
 	if (ret)
 		goto err_g2;
@@ -62,6 +68,9 @@
 err_g3:
 	drm_put_minor(&dev->primary);
 err_g2:
+	if (dev->render)
+		drm_put_minor(&dev->render);
+err_g11:
 	drm_put_minor(&dev->control);
 err_g1:
 	kfree(dev);
diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c
index feb2003..b5c5af7 100644
--- a/drivers/gpu/drm/drm_vm.c
+++ b/drivers/gpu/drm/drm_vm.c
@@ -251,8 +251,7 @@
 			switch (map->type) {
 			case _DRM_REGISTERS:
 			case _DRM_FRAME_BUFFER:
-				if (drm_core_has_MTRR(dev))
-					arch_phys_wc_del(map->mtrr);
+				arch_phys_wc_del(map->mtrr);
 				iounmap(map->handle);
 				break;
 			case _DRM_SHM:
diff --git a/drivers/gpu/drm/drm_vma_manager.c b/drivers/gpu/drm/drm_vma_manager.c
new file mode 100644
index 0000000..63b4712
--- /dev/null
+++ b/drivers/gpu/drm/drm_vma_manager.c
@@ -0,0 +1,436 @@
+/*
+ * Copyright (c) 2006-2009 VMware, Inc., Palo Alto, CA., USA
+ * Copyright (c) 2012 David Airlie <airlied@linux.ie>
+ * Copyright (c) 2013 David Herrmann <dh.herrmann@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <drm/drmP.h>
+#include <drm/drm_mm.h>
+#include <drm/drm_vma_manager.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/rbtree.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+/**
+ * DOC: vma offset manager
+ *
+ * The vma-manager is responsible to map arbitrary driver-dependent memory
+ * regions into the linear user address-space. It provides offsets to the
+ * caller which can then be used on the address_space of the drm-device. It
+ * takes care to not overlap regions, size them appropriately and to not
+ * confuse mm-core by inconsistent fake vm_pgoff fields.
+ * Drivers shouldn't use this for object placement in VMEM. This manager should
+ * only be used to manage mappings into linear user-space VMs.
+ *
+ * We use drm_mm as backend to manage object allocations. But it is highly
+ * optimized for alloc/free calls, not lookups. Hence, we use an rb-tree to
+ * speed up offset lookups.
+ *
+ * You must not use multiple offset managers on a single address_space.
+ * Otherwise, mm-core will be unable to tear down memory mappings as the VM will
+ * no longer be linear. Please use VM_NONLINEAR in that case and implement your
+ * own offset managers.
+ *
+ * This offset manager works on page-based addresses. That is, every argument
+ * and return code (with the exception of drm_vma_node_offset_addr()) is given
+ * in number of pages, not number of bytes. That means, object sizes and offsets
+ * must always be page-aligned (as usual).
+ * If you want to get a valid byte-based user-space address for a given offset,
+ * please see drm_vma_node_offset_addr().
+ *
+ * Additionally to offset management, the vma offset manager also handles access
+ * management. For every open-file context that is allowed to access a given
+ * node, you must call drm_vma_node_allow(). Otherwise, an mmap() call on this
+ * open-file with the offset of the node will fail with -EACCES. To revoke
+ * access again, use drm_vma_node_revoke(). However, the caller is responsible
+ * for destroying already existing mappings, if required.
+ */
+
+/**
+ * drm_vma_offset_manager_init - Initialize new offset-manager
+ * @mgr: Manager object
+ * @page_offset: Offset of available memory area (page-based)
+ * @size: Size of available address space range (page-based)
+ *
+ * Initialize a new offset-manager. The offset and area size available for the
+ * manager are given as @page_offset and @size. Both are interpreted as
+ * page-numbers, not bytes.
+ *
+ * Adding/removing nodes from the manager is locked internally and protected
+ * against concurrent access. However, node allocation and destruction is left
+ * for the caller. While calling into the vma-manager, a given node must
+ * always be guaranteed to be referenced.
+ */
+void drm_vma_offset_manager_init(struct drm_vma_offset_manager *mgr,
+				 unsigned long page_offset, unsigned long size)
+{
+	rwlock_init(&mgr->vm_lock);
+	mgr->vm_addr_space_rb = RB_ROOT;
+	drm_mm_init(&mgr->vm_addr_space_mm, page_offset, size);
+}
+EXPORT_SYMBOL(drm_vma_offset_manager_init);
+
+/**
+ * drm_vma_offset_manager_destroy() - Destroy offset manager
+ * @mgr: Manager object
+ *
+ * Destroy an object manager which was previously created via
+ * drm_vma_offset_manager_init(). The caller must remove all allocated nodes
+ * before destroying the manager. Otherwise, drm_mm will refuse to free the
+ * requested resources.
+ *
+ * The manager must not be accessed after this function is called.
+ */
+void drm_vma_offset_manager_destroy(struct drm_vma_offset_manager *mgr)
+{
+	/* take the lock to protect against buggy drivers */
+	write_lock(&mgr->vm_lock);
+	drm_mm_takedown(&mgr->vm_addr_space_mm);
+	write_unlock(&mgr->vm_lock);
+}
+EXPORT_SYMBOL(drm_vma_offset_manager_destroy);
+
+/**
+ * drm_vma_offset_lookup() - Find node in offset space
+ * @mgr: Manager object
+ * @start: Start address for object (page-based)
+ * @pages: Size of object (page-based)
+ *
+ * Find a node given a start address and object size. This returns the _best_
+ * match for the given node. That is, @start may point somewhere into a valid
+ * region and the given node will be returned, as long as the node spans the
+ * whole requested area (given the size in number of pages as @pages).
+ *
+ * RETURNS:
+ * Returns NULL if no suitable node can be found. Otherwise, the best match
+ * is returned. It's the caller's responsibility to make sure the node doesn't
+ * get destroyed before the caller can access it.
+ */
+struct drm_vma_offset_node *drm_vma_offset_lookup(struct drm_vma_offset_manager *mgr,
+						  unsigned long start,
+						  unsigned long pages)
+{
+	struct drm_vma_offset_node *node;
+
+	read_lock(&mgr->vm_lock);
+	node = drm_vma_offset_lookup_locked(mgr, start, pages);
+	read_unlock(&mgr->vm_lock);
+
+	return node;
+}
+EXPORT_SYMBOL(drm_vma_offset_lookup);
+
+/**
+ * drm_vma_offset_lookup_locked() - Find node in offset space
+ * @mgr: Manager object
+ * @start: Start address for object (page-based)
+ * @pages: Size of object (page-based)
+ *
+ * Same as drm_vma_offset_lookup() but requires the caller to lock offset lookup
+ * manually. See drm_vma_offset_lock_lookup() for an example.
+ *
+ * RETURNS:
+ * Returns NULL if no suitable node can be found. Otherwise, the best match
+ * is returned.
+ */
+struct drm_vma_offset_node *drm_vma_offset_lookup_locked(struct drm_vma_offset_manager *mgr,
+							 unsigned long start,
+							 unsigned long pages)
+{
+	struct drm_vma_offset_node *node, *best;
+	struct rb_node *iter;
+	unsigned long offset;
+
+	iter = mgr->vm_addr_space_rb.rb_node;
+	best = NULL;
+
+	while (likely(iter)) {
+		node = rb_entry(iter, struct drm_vma_offset_node, vm_rb);
+		offset = node->vm_node.start;
+		if (start >= offset) {
+			iter = iter->rb_right;
+			best = node;
+			if (start == offset)
+				break;
+		} else {
+			iter = iter->rb_left;
+		}
+	}
+
+	/* verify that the node spans the requested area */
+	if (best) {
+		offset = best->vm_node.start + best->vm_node.size;
+		if (offset < start + pages)
+			best = NULL;
+	}
+
+	return best;
+}
+EXPORT_SYMBOL(drm_vma_offset_lookup_locked);
+
+/* internal helper to link @node into the rb-tree */
+static void _drm_vma_offset_add_rb(struct drm_vma_offset_manager *mgr,
+				   struct drm_vma_offset_node *node)
+{
+	struct rb_node **iter = &mgr->vm_addr_space_rb.rb_node;
+	struct rb_node *parent = NULL;
+	struct drm_vma_offset_node *iter_node;
+
+	while (likely(*iter)) {
+		parent = *iter;
+		iter_node = rb_entry(*iter, struct drm_vma_offset_node, vm_rb);
+
+		if (node->vm_node.start < iter_node->vm_node.start)
+			iter = &(*iter)->rb_left;
+		else if (node->vm_node.start > iter_node->vm_node.start)
+			iter = &(*iter)->rb_right;
+		else
+			BUG();
+	}
+
+	rb_link_node(&node->vm_rb, parent, iter);
+	rb_insert_color(&node->vm_rb, &mgr->vm_addr_space_rb);
+}
+
+/**
+ * drm_vma_offset_add() - Add offset node to manager
+ * @mgr: Manager object
+ * @node: Node to be added
+ * @pages: Allocation size visible to user-space (in number of pages)
+ *
+ * Add a node to the offset-manager. If the node was already added, this does
+ * nothing and return 0. @pages is the size of the object given in number of
+ * pages.
+ * After this call succeeds, you can access the offset of the node until it
+ * is removed again.
+ *
+ * If this call fails, it is safe to retry the operation or call
+ * drm_vma_offset_remove(), anyway. However, no cleanup is required in that
+ * case.
+ *
+ * @pages is not required to be the same size as the underlying memory object
+ * that you want to map. It only limits the size that user-space can map into
+ * their address space.
+ *
+ * RETURNS:
+ * 0 on success, negative error code on failure.
+ */
+int drm_vma_offset_add(struct drm_vma_offset_manager *mgr,
+		       struct drm_vma_offset_node *node, unsigned long pages)
+{
+	int ret;
+
+	write_lock(&mgr->vm_lock);
+
+	if (drm_mm_node_allocated(&node->vm_node)) {
+		ret = 0;
+		goto out_unlock;
+	}
+
+	ret = drm_mm_insert_node(&mgr->vm_addr_space_mm, &node->vm_node,
+				 pages, 0, DRM_MM_SEARCH_DEFAULT);
+	if (ret)
+		goto out_unlock;
+
+	_drm_vma_offset_add_rb(mgr, node);
+
+out_unlock:
+	write_unlock(&mgr->vm_lock);
+	return ret;
+}
+EXPORT_SYMBOL(drm_vma_offset_add);
+
+/**
+ * drm_vma_offset_remove() - Remove offset node from manager
+ * @mgr: Manager object
+ * @node: Node to be removed
+ *
+ * Remove a node from the offset manager. If the node wasn't added before, this
+ * does nothing. After this call returns, the offset and size will be 0 until a
+ * new offset is allocated via drm_vma_offset_add() again. Helper functions like
+ * drm_vma_node_start() and drm_vma_node_offset_addr() will return 0 if no
+ * offset is allocated.
+ */
+void drm_vma_offset_remove(struct drm_vma_offset_manager *mgr,
+			   struct drm_vma_offset_node *node)
+{
+	write_lock(&mgr->vm_lock);
+
+	if (drm_mm_node_allocated(&node->vm_node)) {
+		rb_erase(&node->vm_rb, &mgr->vm_addr_space_rb);
+		drm_mm_remove_node(&node->vm_node);
+		memset(&node->vm_node, 0, sizeof(node->vm_node));
+	}
+
+	write_unlock(&mgr->vm_lock);
+}
+EXPORT_SYMBOL(drm_vma_offset_remove);
+
+/**
+ * drm_vma_node_allow - Add open-file to list of allowed users
+ * @node: Node to modify
+ * @filp: Open file to add
+ *
+ * Add @filp to the list of allowed open-files for this node. If @filp is
+ * already on this list, the ref-count is incremented.
+ *
+ * The list of allowed-users is preserved across drm_vma_offset_add() and
+ * drm_vma_offset_remove() calls. You may even call it if the node is currently
+ * not added to any offset-manager.
+ *
+ * You must remove all open-files the same number of times as you added them
+ * before destroying the node. Otherwise, you will leak memory.
+ *
+ * This is locked against concurrent access internally.
+ *
+ * RETURNS:
+ * 0 on success, negative error code on internal failure (out-of-mem)
+ */
+int drm_vma_node_allow(struct drm_vma_offset_node *node, struct file *filp)
+{
+	struct rb_node **iter;
+	struct rb_node *parent = NULL;
+	struct drm_vma_offset_file *new, *entry;
+	int ret = 0;
+
+	/* Preallocate entry to avoid atomic allocations below. It is quite
+	 * unlikely that an open-file is added twice to a single node so we
+	 * don't optimize for this case. OOM is checked below only if the entry
+	 * is actually used. */
+	new = kmalloc(sizeof(*entry), GFP_KERNEL);
+
+	write_lock(&node->vm_lock);
+
+	iter = &node->vm_files.rb_node;
+
+	while (likely(*iter)) {
+		parent = *iter;
+		entry = rb_entry(*iter, struct drm_vma_offset_file, vm_rb);
+
+		if (filp == entry->vm_filp) {
+			entry->vm_count++;
+			goto unlock;
+		} else if (filp > entry->vm_filp) {
+			iter = &(*iter)->rb_right;
+		} else {
+			iter = &(*iter)->rb_left;
+		}
+	}
+
+	if (!new) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
+
+	new->vm_filp = filp;
+	new->vm_count = 1;
+	rb_link_node(&new->vm_rb, parent, iter);
+	rb_insert_color(&new->vm_rb, &node->vm_files);
+	new = NULL;
+
+unlock:
+	write_unlock(&node->vm_lock);
+	kfree(new);
+	return ret;
+}
+EXPORT_SYMBOL(drm_vma_node_allow);
+
+/**
+ * drm_vma_node_revoke - Remove open-file from list of allowed users
+ * @node: Node to modify
+ * @filp: Open file to remove
+ *
+ * Decrement the ref-count of @filp in the list of allowed open-files on @node.
+ * If the ref-count drops to zero, remove @filp from the list. You must call
+ * this once for every drm_vma_node_allow() on @filp.
+ *
+ * This is locked against concurrent access internally.
+ *
+ * If @filp is not on the list, nothing is done.
+ */
+void drm_vma_node_revoke(struct drm_vma_offset_node *node, struct file *filp)
+{
+	struct drm_vma_offset_file *entry;
+	struct rb_node *iter;
+
+	write_lock(&node->vm_lock);
+
+	iter = node->vm_files.rb_node;
+	while (likely(iter)) {
+		entry = rb_entry(iter, struct drm_vma_offset_file, vm_rb);
+		if (filp == entry->vm_filp) {
+			if (!--entry->vm_count) {
+				rb_erase(&entry->vm_rb, &node->vm_files);
+				kfree(entry);
+			}
+			break;
+		} else if (filp > entry->vm_filp) {
+			iter = iter->rb_right;
+		} else {
+			iter = iter->rb_left;
+		}
+	}
+
+	write_unlock(&node->vm_lock);
+}
+EXPORT_SYMBOL(drm_vma_node_revoke);
+
+/**
+ * drm_vma_node_is_allowed - Check whether an open-file is granted access
+ * @node: Node to check
+ * @filp: Open-file to check for
+ *
+ * Search the list in @node whether @filp is currently on the list of allowed
+ * open-files (see drm_vma_node_allow()).
+ *
+ * This is locked against concurrent access internally.
+ *
+ * RETURNS:
+ * true iff @filp is on the list
+ */
+bool drm_vma_node_is_allowed(struct drm_vma_offset_node *node,
+			     struct file *filp)
+{
+	struct drm_vma_offset_file *entry;
+	struct rb_node *iter;
+
+	read_lock(&node->vm_lock);
+
+	iter = node->vm_files.rb_node;
+	while (likely(iter)) {
+		entry = rb_entry(iter, struct drm_vma_offset_file, vm_rb);
+		if (filp == entry->vm_filp)
+			break;
+		else if (filp > entry->vm_filp)
+			iter = iter->rb_right;
+		else
+			iter = iter->rb_left;
+	}
+
+	read_unlock(&node->vm_lock);
+
+	return iter;
+}
+EXPORT_SYMBOL(drm_vma_node_is_allowed);
diff --git a/drivers/gpu/drm/exynos/exynos_ddc.c b/drivers/gpu/drm/exynos/exynos_ddc.c
index 95c75ed..30ef41b 100644
--- a/drivers/gpu/drm/exynos/exynos_ddc.c
+++ b/drivers/gpu/drm/exynos/exynos_ddc.c
@@ -15,7 +15,6 @@
 
 #include <linux/kernel.h>
 #include <linux/i2c.h>
-#include <linux/module.h>
 
 
 #include "exynos_drm_drv.h"
diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
index 9a35d17..14f5c1d 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_crtc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
@@ -184,8 +184,9 @@
 };
 
 static int exynos_drm_crtc_page_flip(struct drm_crtc *crtc,
-				      struct drm_framebuffer *fb,
-				      struct drm_pending_vblank_event *event)
+				     struct drm_framebuffer *fb,
+				     struct drm_pending_vblank_event *event,
+				     uint32_t page_flip_flags)
 {
 	struct drm_device *dev = crtc->dev;
 	struct exynos_drm_private *dev_priv = dev->dev_private;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c b/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c
index a0f997e..fd76449 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dmabuf.c
@@ -22,6 +22,11 @@
 	bool is_mapped;
 };
 
+static struct exynos_drm_gem_obj *dma_buf_to_obj(struct dma_buf *buf)
+{
+	return to_exynos_gem_obj(buf->priv);
+}
+
 static int exynos_gem_attach_dma_buf(struct dma_buf *dmabuf,
 					struct device *dev,
 					struct dma_buf_attachment *attach)
@@ -63,7 +68,7 @@
 					enum dma_data_direction dir)
 {
 	struct exynos_drm_dmabuf_attachment *exynos_attach = attach->priv;
-	struct exynos_drm_gem_obj *gem_obj = attach->dmabuf->priv;
+	struct exynos_drm_gem_obj *gem_obj = dma_buf_to_obj(attach->dmabuf);
 	struct drm_device *dev = gem_obj->base.dev;
 	struct exynos_drm_gem_buf *buf;
 	struct scatterlist *rd, *wr;
@@ -127,27 +132,6 @@
 	/* Nothing to do. */
 }
 
-static void exynos_dmabuf_release(struct dma_buf *dmabuf)
-{
-	struct exynos_drm_gem_obj *exynos_gem_obj = dmabuf->priv;
-
-	/*
-	 * exynos_dmabuf_release() call means that file object's
-	 * f_count is 0 and it calls drm_gem_object_handle_unreference()
-	 * to drop the references that these values had been increased
-	 * at drm_prime_handle_to_fd()
-	 */
-	if (exynos_gem_obj->base.export_dma_buf == dmabuf) {
-		exynos_gem_obj->base.export_dma_buf = NULL;
-
-		/*
-		 * drop this gem object refcount to release allocated buffer
-		 * and resources.
-		 */
-		drm_gem_object_unreference_unlocked(&exynos_gem_obj->base);
-	}
-}
-
 static void *exynos_gem_dmabuf_kmap_atomic(struct dma_buf *dma_buf,
 						unsigned long page_num)
 {
@@ -193,7 +177,7 @@
 	.kunmap			= exynos_gem_dmabuf_kunmap,
 	.kunmap_atomic		= exynos_gem_dmabuf_kunmap_atomic,
 	.mmap			= exynos_gem_dmabuf_mmap,
-	.release		= exynos_dmabuf_release,
+	.release		= drm_gem_dmabuf_release,
 };
 
 struct dma_buf *exynos_dmabuf_prime_export(struct drm_device *drm_dev,
@@ -201,7 +185,7 @@
 {
 	struct exynos_drm_gem_obj *exynos_gem_obj = to_exynos_gem_obj(obj);
 
-	return dma_buf_export(exynos_gem_obj, &exynos_dmabuf_ops,
+	return dma_buf_export(obj, &exynos_dmabuf_ops,
 				exynos_gem_obj->base.size, flags);
 }
 
@@ -219,8 +203,7 @@
 	if (dma_buf->ops == &exynos_dmabuf_ops) {
 		struct drm_gem_object *obj;
 
-		exynos_gem_obj = dma_buf->priv;
-		obj = &exynos_gem_obj->base;
+		obj = dma_buf->priv;
 
 		/* is it from our device? */
 		if (obj->dev == drm_dev) {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index ca2729a..df81d3c 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -213,7 +213,7 @@
 	.close = drm_gem_vm_close,
 };
 
-static struct drm_ioctl_desc exynos_ioctls[] = {
+static const struct drm_ioctl_desc exynos_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(EXYNOS_GEM_CREATE, exynos_drm_gem_create_ioctl,
 			DRM_UNLOCKED | DRM_AUTH),
 	DRM_IOCTL_DEF_DRV(EXYNOS_GEM_MAP_OFFSET,
@@ -271,12 +271,13 @@
 	.gem_vm_ops		= &exynos_drm_gem_vm_ops,
 	.dumb_create		= exynos_drm_gem_dumb_create,
 	.dumb_map_offset	= exynos_drm_gem_dumb_map_offset,
-	.dumb_destroy		= exynos_drm_gem_dumb_destroy,
+	.dumb_destroy		= drm_gem_dumb_destroy,
 	.prime_handle_to_fd	= drm_gem_prime_handle_to_fd,
 	.prime_fd_to_handle	= drm_gem_prime_fd_to_handle,
 	.gem_prime_export	= exynos_dmabuf_prime_export,
 	.gem_prime_import	= exynos_dmabuf_prime_import,
 	.ioctls			= exynos_ioctls,
+	.num_ioctls		= ARRAY_SIZE(exynos_ioctls),
 	.fops			= &exynos_drm_driver_fops,
 	.name	= DRIVER_NAME,
 	.desc	= DRIVER_DESC,
@@ -288,7 +289,6 @@
 static int exynos_drm_platform_probe(struct platform_device *pdev)
 {
 	pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
-	exynos_drm_driver.num_ioctls = DRM_ARRAY_SIZE(exynos_ioctls);
 
 	return drm_platform_init(&exynos_drm_driver, pdev);
 }
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
index 61b094f..6e047bd 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
@@ -12,7 +12,6 @@
  *
  */
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/mfd/syscon.h>
 #include <linux/regmap.h>
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index 3e106be..1c263da 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -14,7 +14,6 @@
 #include <drm/drmP.h>
 
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/clk.h>
 #include <linux/of_device.h>
@@ -130,7 +129,6 @@
 	  .data = &exynos5_fimd_driver_data },
 	{},
 };
-MODULE_DEVICE_TABLE(of, fimd_driver_dt_match);
 #endif
 
 static inline struct fimd_driver_data *drm_fimd_get_driver_data(
@@ -1082,7 +1080,6 @@
 	},
 	{},
 };
-MODULE_DEVICE_TABLE(platform, fimd_driver_ids);
 
 static const struct dev_pm_ops fimd_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(fimd_suspend, fimd_resume)
diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 42a5a546..eddea49 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -8,7 +8,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
@@ -806,9 +805,20 @@
 	struct g2d_cmdlist_node *node =
 				list_first_entry(&runqueue_node->run_cmdlist,
 						struct g2d_cmdlist_node, list);
+	int ret;
 
-	pm_runtime_get_sync(g2d->dev);
-	clk_enable(g2d->gate_clk);
+	ret = pm_runtime_get_sync(g2d->dev);
+	if (ret < 0) {
+		dev_warn(g2d->dev, "failed pm power on.\n");
+		return;
+	}
+
+	ret = clk_prepare_enable(g2d->gate_clk);
+	if (ret < 0) {
+		dev_warn(g2d->dev, "failed to enable clock.\n");
+		pm_runtime_put_sync(g2d->dev);
+		return;
+	}
 
 	writel_relaxed(node->dma_addr, g2d->regs + G2D_DMA_SFR_BASE_ADDR);
 	writel_relaxed(G2D_DMA_START, g2d->regs + G2D_DMA_COMMAND);
@@ -861,7 +871,7 @@
 					    runqueue_work);
 
 	mutex_lock(&g2d->runqueue_mutex);
-	clk_disable(g2d->gate_clk);
+	clk_disable_unprepare(g2d->gate_clk);
 	pm_runtime_put_sync(g2d->dev);
 
 	complete(&g2d->runqueue_node->complete);
@@ -1521,7 +1531,6 @@
 	{ .compatible = "samsung,exynos5250-g2d" },
 	{},
 };
-MODULE_DEVICE_TABLE(of, exynos_g2d_match);
 #endif
 
 struct platform_driver g2d_driver = {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index 24c22a8..f3c6f40 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -10,6 +10,7 @@
  */
 
 #include <drm/drmP.h>
+#include <drm/drm_vma_manager.h>
 
 #include <linux/shmem_fs.h>
 #include <drm/exynos_drm.h>
@@ -135,7 +136,7 @@
 	obj = &exynos_gem_obj->base;
 	buf = exynos_gem_obj->buffer;
 
-	DRM_DEBUG_KMS("handle count = %d\n", atomic_read(&obj->handle_count));
+	DRM_DEBUG_KMS("handle count = %d\n", obj->handle_count);
 
 	/*
 	 * do not release memory region from exporter.
@@ -152,8 +153,7 @@
 	exynos_drm_fini_buf(obj->dev, buf);
 	exynos_gem_obj->buffer = NULL;
 
-	if (obj->map_list.map)
-		drm_gem_free_mmap_offset(obj);
+	drm_gem_free_mmap_offset(obj);
 
 	/* release file pointer to gem object. */
 	drm_gem_object_release(obj);
@@ -703,13 +703,11 @@
 		goto unlock;
 	}
 
-	if (!obj->map_list.map) {
-		ret = drm_gem_create_mmap_offset(obj);
-		if (ret)
-			goto out;
-	}
+	ret = drm_gem_create_mmap_offset(obj);
+	if (ret)
+		goto out;
 
-	*offset = (u64)obj->map_list.hash.key << PAGE_SHIFT;
+	*offset = drm_vma_node_offset_addr(&obj->vma_node);
 	DRM_DEBUG_KMS("offset = 0x%lx\n", (unsigned long)*offset);
 
 out:
@@ -719,26 +717,6 @@
 	return ret;
 }
 
-int exynos_drm_gem_dumb_destroy(struct drm_file *file_priv,
-				struct drm_device *dev,
-				unsigned int handle)
-{
-	int ret;
-
-	/*
-	 * obj->refcount and obj->handle_count are decreased and
-	 * if both them are 0 then exynos_drm_gem_free_object()
-	 * would be called by callback to release resources.
-	 */
-	ret = drm_gem_handle_delete(file_priv, handle);
-	if (ret < 0) {
-		DRM_ERROR("failed to delete drm_gem_handle.\n");
-		return ret;
-	}
-
-	return 0;
-}
-
 int exynos_drm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct drm_gem_object *obj = vma->vm_private_data;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h
index 468766b..09555af 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h
@@ -151,15 +151,6 @@
 				   struct drm_device *dev, uint32_t handle,
 				   uint64_t *offset);
 
-/*
- * destroy memory region allocated.
- *	- a gem handle and physical memory region pointed by a gem object
- *	would be released by drm_gem_handle_delete().
- */
-int exynos_drm_gem_dumb_destroy(struct drm_file *file_priv,
-				struct drm_device *dev,
-				unsigned int handle);
-
 /* page fault handler and mmap fault address(virtual) to physical memory. */
 int exynos_drm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
index 472e3b2..90b8a1a 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
@@ -12,7 +12,6 @@
  *
  */
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/clk.h>
 #include <linux/pm_runtime.h>
diff --git a/drivers/gpu/drm/exynos/exynos_drm_hdmi.c b/drivers/gpu/drm/exynos/exynos_drm_hdmi.c
index aaa550d..8d3bc01 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_hdmi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_hdmi.c
@@ -15,7 +15,6 @@
 
 #include <linux/kernel.h>
 #include <linux/wait.h>
-#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
index b1ef8e7..d2b6ab4 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_ipp.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
@@ -12,7 +12,6 @@
  *
  */
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/types.h>
 #include <linux/clk.h>
@@ -342,10 +341,10 @@
 		 */
 		ippdrv = ipp_find_obj(&ctx->ipp_idr, &ctx->ipp_lock,
 						prop_list->ipp_id);
-		if (!ippdrv) {
+		if (IS_ERR(ippdrv)) {
 			DRM_ERROR("not found ipp%d driver.\n",
 					prop_list->ipp_id);
-			return -EINVAL;
+			return PTR_ERR(ippdrv);
 		}
 
 		prop_list = ippdrv->prop_list;
@@ -970,9 +969,9 @@
 	/* find command node */
 	c_node = ipp_find_obj(&ctx->prop_idr, &ctx->prop_lock,
 		qbuf->prop_id);
-	if (!c_node) {
+	if (IS_ERR(c_node)) {
 		DRM_ERROR("failed to get command node.\n");
-		return -EFAULT;
+		return PTR_ERR(c_node);
 	}
 
 	/* buffer control */
@@ -1106,9 +1105,9 @@
 
 	c_node = ipp_find_obj(&ctx->prop_idr, &ctx->prop_lock,
 		cmd_ctrl->prop_id);
-	if (!c_node) {
+	if (IS_ERR(c_node)) {
 		DRM_ERROR("invalid command node list.\n");
-		return -EINVAL;
+		return PTR_ERR(c_node);
 	}
 
 	if (!exynos_drm_ipp_check_valid(ippdrv->dev, cmd_ctrl->ctrl,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
index 427640a..49669aa 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
@@ -10,7 +10,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
index 41cc74d..c57c565 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c
@@ -13,7 +13,6 @@
 #include <drm/drmP.h>
 
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/platform_device.h>
 
 #include <drm/exynos_drm.h>
diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c
index 62ef597..2f5c694 100644
--- a/drivers/gpu/drm/exynos/exynos_hdmi.c
+++ b/drivers/gpu/drm/exynos/exynos_hdmi.c
@@ -24,7 +24,6 @@
 #include <linux/spinlock.h>
 #include <linux/wait.h>
 #include <linux/i2c.h>
-#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
diff --git a/drivers/gpu/drm/exynos/exynos_hdmiphy.c b/drivers/gpu/drm/exynos/exynos_hdmiphy.c
index ef04255..6e320ae 100644
--- a/drivers/gpu/drm/exynos/exynos_hdmiphy.c
+++ b/drivers/gpu/drm/exynos/exynos_hdmiphy.c
@@ -15,7 +15,6 @@
 
 #include <linux/kernel.h>
 #include <linux/i2c.h>
-#include <linux/module.h>
 
 #include "exynos_drm_drv.h"
 #include "exynos_hdmi.h"
diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c
index 42ffb71..c9a137c 100644
--- a/drivers/gpu/drm/exynos/exynos_mixer.c
+++ b/drivers/gpu/drm/exynos/exynos_mixer.c
@@ -23,7 +23,6 @@
 #include <linux/spinlock.h>
 #include <linux/wait.h>
 #include <linux/i2c.h>
-#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
diff --git a/drivers/gpu/drm/gma500/Makefile b/drivers/gpu/drm/gma500/Makefile
index 7a2d40a..e9064dd 100644
--- a/drivers/gpu/drm/gma500/Makefile
+++ b/drivers/gpu/drm/gma500/Makefile
@@ -15,6 +15,7 @@
 	  mmu.o \
 	  power.o \
 	  psb_drv.o \
+	  gma_display.o \
 	  psb_intel_display.o \
 	  psb_intel_lvds.o \
 	  psb_intel_modes.o \
diff --git a/drivers/gpu/drm/gma500/cdv_device.c b/drivers/gpu/drm/gma500/cdv_device.c
index 23e14e9..162f686 100644
--- a/drivers/gpu/drm/gma500/cdv_device.c
+++ b/drivers/gpu/drm/gma500/cdv_device.c
@@ -641,6 +641,7 @@
 
 	.crtc_helper = &cdv_intel_helper_funcs,
 	.crtc_funcs = &cdv_intel_crtc_funcs,
+	.clock_funcs = &cdv_clock_funcs,
 
 	.output_init = cdv_output_init,
 	.hotplug = cdv_hotplug_event,
@@ -655,4 +656,6 @@
 	.restore_regs = cdv_restore_display_registers,
 	.power_down = cdv_power_down,
 	.power_up = cdv_power_up,
+	.update_wm = cdv_update_wm,
+	.disable_sr = cdv_disable_sr,
 };
diff --git a/drivers/gpu/drm/gma500/cdv_device.h b/drivers/gpu/drm/gma500/cdv_device.h
index 9561e17..705c11d 100644
--- a/drivers/gpu/drm/gma500/cdv_device.h
+++ b/drivers/gpu/drm/gma500/cdv_device.h
@@ -17,6 +17,7 @@
 
 extern const struct drm_crtc_helper_funcs cdv_intel_helper_funcs;
 extern const struct drm_crtc_funcs cdv_intel_crtc_funcs;
+extern const struct gma_clock_funcs cdv_clock_funcs;
 extern void cdv_intel_crt_init(struct drm_device *dev,
 			struct psb_intel_mode_device *mode_dev);
 extern void cdv_intel_lvds_init(struct drm_device *dev,
@@ -25,12 +26,5 @@
 			int reg);
 extern struct drm_display_mode *cdv_intel_crtc_mode_get(struct drm_device *dev,
 					     struct drm_crtc *crtc);
-
-static inline void cdv_intel_wait_for_vblank(struct drm_device *dev)
-{
-	/* Wait for 20ms, i.e. one cycle at 50hz. */
-        /* FIXME: msleep ?? */
-	mdelay(20);
-}
-
-
+extern void cdv_update_wm(struct drm_device *dev, struct drm_crtc *crtc);
+extern void cdv_disable_sr(struct drm_device *dev);
diff --git a/drivers/gpu/drm/gma500/cdv_intel_crt.c b/drivers/gpu/drm/gma500/cdv_intel_crt.c
index 7b8386f..661af49 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_crt.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_crt.c
@@ -95,13 +95,12 @@
 
 	struct drm_device *dev = encoder->dev;
 	struct drm_crtc *crtc = encoder->crtc;
-	struct psb_intel_crtc *psb_intel_crtc =
-					to_psb_intel_crtc(crtc);
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
 	int dpll_md_reg;
 	u32 adpa, dpll_md;
 	u32 adpa_reg;
 
-	if (psb_intel_crtc->pipe == 0)
+	if (gma_crtc->pipe == 0)
 		dpll_md_reg = DPLL_A_MD;
 	else
 		dpll_md_reg = DPLL_B_MD;
@@ -124,7 +123,7 @@
 	if (adjusted_mode->flags & DRM_MODE_FLAG_PVSYNC)
 		adpa |= ADPA_VSYNC_ACTIVE_HIGH;
 
-	if (psb_intel_crtc->pipe == 0)
+	if (gma_crtc->pipe == 0)
 		adpa |= ADPA_PIPE_A_SELECT;
 	else
 		adpa |= ADPA_PIPE_B_SELECT;
@@ -197,10 +196,9 @@
 
 static void cdv_intel_crt_destroy(struct drm_connector *connector)
 {
-	struct psb_intel_encoder *psb_intel_encoder =
-					psb_intel_attached_encoder(connector);
+	struct gma_encoder *gma_encoder = gma_attached_encoder(connector);
 
-	psb_intel_i2c_destroy(psb_intel_encoder->ddc_bus);
+	psb_intel_i2c_destroy(gma_encoder->ddc_bus);
 	drm_sysfs_connector_remove(connector);
 	drm_connector_cleanup(connector);
 	kfree(connector);
@@ -208,9 +206,9 @@
 
 static int cdv_intel_crt_get_modes(struct drm_connector *connector)
 {
-	struct psb_intel_encoder *psb_intel_encoder =
-				psb_intel_attached_encoder(connector);
-	return psb_intel_ddc_get_modes(connector, &psb_intel_encoder->ddc_bus->adapter);
+	struct gma_encoder *gma_encoder = gma_attached_encoder(connector);
+	return psb_intel_ddc_get_modes(connector,
+				       &gma_encoder->ddc_bus->adapter);
 }
 
 static int cdv_intel_crt_set_property(struct drm_connector *connector,
@@ -227,8 +225,8 @@
 static const struct drm_encoder_helper_funcs cdv_intel_crt_helper_funcs = {
 	.dpms = cdv_intel_crt_dpms,
 	.mode_fixup = cdv_intel_crt_mode_fixup,
-	.prepare = psb_intel_encoder_prepare,
-	.commit = psb_intel_encoder_commit,
+	.prepare = gma_encoder_prepare,
+	.commit = gma_encoder_commit,
 	.mode_set = cdv_intel_crt_mode_set,
 };
 
@@ -244,7 +242,7 @@
 				cdv_intel_crt_connector_helper_funcs = {
 	.mode_valid = cdv_intel_crt_mode_valid,
 	.get_modes = cdv_intel_crt_get_modes,
-	.best_encoder = psb_intel_best_encoder,
+	.best_encoder = gma_best_encoder,
 };
 
 static void cdv_intel_crt_enc_destroy(struct drm_encoder *encoder)
@@ -260,32 +258,31 @@
 			struct psb_intel_mode_device *mode_dev)
 {
 
-	struct psb_intel_connector *psb_intel_connector;
-	struct psb_intel_encoder *psb_intel_encoder;
+	struct gma_connector *gma_connector;
+	struct gma_encoder *gma_encoder;
 	struct drm_connector *connector;
 	struct drm_encoder *encoder;
 
 	u32 i2c_reg;
 
-	psb_intel_encoder = kzalloc(sizeof(struct psb_intel_encoder), GFP_KERNEL);
-	if (!psb_intel_encoder)
+	gma_encoder = kzalloc(sizeof(struct gma_encoder), GFP_KERNEL);
+	if (!gma_encoder)
 		return;
 
-	psb_intel_connector = kzalloc(sizeof(struct psb_intel_connector), GFP_KERNEL);
-	if (!psb_intel_connector)
+	gma_connector = kzalloc(sizeof(struct gma_connector), GFP_KERNEL);
+	if (!gma_connector)
 		goto failed_connector;
 
-	connector = &psb_intel_connector->base;
+	connector = &gma_connector->base;
 	connector->polled = DRM_CONNECTOR_POLL_HPD;
 	drm_connector_init(dev, connector,
 		&cdv_intel_crt_connector_funcs, DRM_MODE_CONNECTOR_VGA);
 
-	encoder = &psb_intel_encoder->base;
+	encoder = &gma_encoder->base;
 	drm_encoder_init(dev, encoder,
 		&cdv_intel_crt_enc_funcs, DRM_MODE_ENCODER_DAC);
 
-	psb_intel_connector_attach_encoder(psb_intel_connector,
-					   psb_intel_encoder);
+	gma_connector_attach_encoder(gma_connector, gma_encoder);
 
 	/* Set up the DDC bus. */
 	i2c_reg = GPIOA;
@@ -294,15 +291,15 @@
 	if (dev_priv->crt_ddc_bus != 0)
 		i2c_reg = dev_priv->crt_ddc_bus;
 	}*/
-	psb_intel_encoder->ddc_bus = psb_intel_i2c_create(dev,
+	gma_encoder->ddc_bus = psb_intel_i2c_create(dev,
 							  i2c_reg, "CRTDDC_A");
-	if (!psb_intel_encoder->ddc_bus) {
+	if (!gma_encoder->ddc_bus) {
 		dev_printk(KERN_ERR, &dev->pdev->dev, "DDC bus registration "
 			   "failed.\n");
 		goto failed_ddc;
 	}
 
-	psb_intel_encoder->type = INTEL_OUTPUT_ANALOG;
+	gma_encoder->type = INTEL_OUTPUT_ANALOG;
 	/*
 	psb_intel_output->clone_mask = (1 << INTEL_ANALOG_CLONE_BIT);
 	psb_intel_output->crtc_mask = (1 << 0) | (1 << 1);
@@ -318,10 +315,10 @@
 
 	return;
 failed_ddc:
-	drm_encoder_cleanup(&psb_intel_encoder->base);
-	drm_connector_cleanup(&psb_intel_connector->base);
-	kfree(psb_intel_connector);
+	drm_encoder_cleanup(&gma_encoder->base);
+	drm_connector_cleanup(&gma_connector->base);
+	kfree(gma_connector);
 failed_connector:
-	kfree(psb_intel_encoder);
+	kfree(gma_encoder);
 	return;
 }
diff --git a/drivers/gpu/drm/gma500/cdv_intel_display.c b/drivers/gpu/drm/gma500/cdv_intel_display.c
index 82430ad..8fbfa06 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_display.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_display.c
@@ -19,54 +19,20 @@
  */
 
 #include <linux/i2c.h>
-#include <linux/pm_runtime.h>
 
 #include <drm/drmP.h>
 #include "framebuffer.h"
 #include "psb_drv.h"
 #include "psb_intel_drv.h"
 #include "psb_intel_reg.h"
-#include "psb_intel_display.h"
+#include "gma_display.h"
 #include "power.h"
 #include "cdv_device.h"
 
+static bool cdv_intel_find_dp_pll(const struct gma_limit_t *limit,
+				  struct drm_crtc *crtc, int target,
+				  int refclk, struct gma_clock_t *best_clock);
 
-struct cdv_intel_range_t {
-	int min, max;
-};
-
-struct cdv_intel_p2_t {
-	int dot_limit;
-	int p2_slow, p2_fast;
-};
-
-struct cdv_intel_clock_t {
-	/* given values */
-	int n;
-	int m1, m2;
-	int p1, p2;
-	/* derived values */
-	int dot;
-	int vco;
-	int m;
-	int p;
-};
-
-#define INTEL_P2_NUM		      2
-
-struct cdv_intel_limit_t {
-	struct cdv_intel_range_t dot, vco, n, m, m1, m2, p, p1;
-	struct cdv_intel_p2_t p2;
-	bool (*find_pll)(const struct cdv_intel_limit_t *, struct drm_crtc *,
-			int, int, struct cdv_intel_clock_t *);
-};
-
-static bool cdv_intel_find_best_PLL(const struct cdv_intel_limit_t *limit,
-	struct drm_crtc *crtc, int target, int refclk,
-	struct cdv_intel_clock_t *best_clock);
-static bool cdv_intel_find_dp_pll(const struct cdv_intel_limit_t *limit, struct drm_crtc *crtc, int target,
-				int refclk,
-				struct cdv_intel_clock_t *best_clock);
 
 #define CDV_LIMIT_SINGLE_LVDS_96	0
 #define CDV_LIMIT_SINGLE_LVDS_100	1
@@ -75,7 +41,7 @@
 #define CDV_LIMIT_DP_27			4
 #define CDV_LIMIT_DP_100		5
 
-static const struct cdv_intel_limit_t cdv_intel_limits[] = {
+static const struct gma_limit_t cdv_intel_limits[] = {
 	{			/* CDV_SINGLE_LVDS_96MHz */
 	 .dot = {.min = 20000, .max = 115500},
 	 .vco = {.min = 1800000, .max = 3600000},
@@ -85,9 +51,8 @@
 	 .m2 = {.min = 58, .max = 158},
 	 .p = {.min = 28, .max = 140},
 	 .p1 = {.min = 2, .max = 10},
-	 .p2 = {.dot_limit = 200000,
-		.p2_slow = 14, .p2_fast = 14},
-		.find_pll = cdv_intel_find_best_PLL,
+	 .p2 = {.dot_limit = 200000, .p2_slow = 14, .p2_fast = 14},
+	 .find_pll = gma_find_best_pll,
 	 },
 	{			/* CDV_SINGLE_LVDS_100MHz */
 	 .dot = {.min = 20000, .max = 115500},
@@ -102,7 +67,7 @@
 	  * is 80-224Mhz.  Prefer single channel as much as possible.
 	  */
 	 .p2 = {.dot_limit = 200000, .p2_slow = 14, .p2_fast = 14},
-	.find_pll = cdv_intel_find_best_PLL,
+	 .find_pll = gma_find_best_pll,
 	 },
 	{			/* CDV_DAC_HDMI_27MHz */
 	 .dot = {.min = 20000, .max = 400000},
@@ -114,7 +79,7 @@
 	 .p = {.min = 5, .max = 90},
 	 .p1 = {.min = 1, .max = 9},
 	 .p2 = {.dot_limit = 225000, .p2_slow = 10, .p2_fast = 5},
-	.find_pll = cdv_intel_find_best_PLL,
+	 .find_pll = gma_find_best_pll,
 	 },
 	{			/* CDV_DAC_HDMI_96MHz */
 	 .dot = {.min = 20000, .max = 400000},
@@ -126,7 +91,7 @@
 	 .p = {.min = 5, .max = 100},
 	 .p1 = {.min = 1, .max = 10},
 	 .p2 = {.dot_limit = 225000, .p2_slow = 10, .p2_fast = 5},
-	.find_pll = cdv_intel_find_best_PLL,
+	 .find_pll = gma_find_best_pll,
 	 },
 	{			/* CDV_DP_27MHz */
 	 .dot = {.min = 160000, .max = 272000},
@@ -255,10 +220,10 @@
  */
 static int
 cdv_dpll_set_clock_cdv(struct drm_device *dev, struct drm_crtc *crtc,
-			       struct cdv_intel_clock_t *clock, bool is_lvds, u32 ddi_select)
+		       struct gma_clock_t *clock, bool is_lvds, u32 ddi_select)
 {
-	struct psb_intel_crtc *psb_crtc = to_psb_intel_crtc(crtc);
-	int pipe = psb_crtc->pipe;
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
+	int pipe = gma_crtc->pipe;
 	u32 m, n_vco, p;
 	int ret = 0;
 	int dpll_reg = (pipe == 0) ? DPLL_A : DPLL_B;
@@ -405,31 +370,11 @@
 	return 0;
 }
 
-/*
- * Returns whether any encoder on the specified pipe is of the specified type
- */
-static bool cdv_intel_pipe_has_type(struct drm_crtc *crtc, int type)
+static const struct gma_limit_t *cdv_intel_limit(struct drm_crtc *crtc,
+						 int refclk)
 {
-	struct drm_device *dev = crtc->dev;
-	struct drm_mode_config *mode_config = &dev->mode_config;
-	struct drm_connector *l_entry;
-
-	list_for_each_entry(l_entry, &mode_config->connector_list, head) {
-		if (l_entry->encoder && l_entry->encoder->crtc == crtc) {
-			struct psb_intel_encoder *psb_intel_encoder =
-					psb_intel_attached_encoder(l_entry);
-			if (psb_intel_encoder->type == type)
-				return true;
-		}
-	}
-	return false;
-}
-
-static const struct cdv_intel_limit_t *cdv_intel_limit(struct drm_crtc *crtc,
-							int refclk)
-{
-	const struct cdv_intel_limit_t *limit;
-	if (cdv_intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) {
+	const struct gma_limit_t *limit;
+	if (gma_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) {
 		/*
 		 * Now only single-channel LVDS is supported on CDV. If it is
 		 * incorrect, please add the dual-channel LVDS.
@@ -438,8 +383,8 @@
 			limit = &cdv_intel_limits[CDV_LIMIT_SINGLE_LVDS_96];
 		else
 			limit = &cdv_intel_limits[CDV_LIMIT_SINGLE_LVDS_100];
-	} else if (psb_intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT) ||
-			psb_intel_pipe_has_type(crtc, INTEL_OUTPUT_EDP)) {
+	} else if (gma_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT) ||
+			gma_pipe_has_type(crtc, INTEL_OUTPUT_EDP)) {
 		if (refclk == 27000)
 			limit = &cdv_intel_limits[CDV_LIMIT_DP_27];
 		else
@@ -454,8 +399,7 @@
 }
 
 /* m1 is reserved as 0 in CDV, n is a ring counter */
-static void cdv_intel_clock(struct drm_device *dev,
-			int refclk, struct cdv_intel_clock_t *clock)
+static void cdv_intel_clock(int refclk, struct gma_clock_t *clock)
 {
 	clock->m = clock->m2 + 2;
 	clock->p = clock->p1 * clock->p2;
@@ -463,93 +407,12 @@
 	clock->dot = clock->vco / clock->p;
 }
 
-
-#define INTELPllInvalid(s)   { /* ErrorF (s) */; return false; }
-static bool cdv_intel_PLL_is_valid(struct drm_crtc *crtc,
-				const struct cdv_intel_limit_t *limit,
-			       struct cdv_intel_clock_t *clock)
+static bool cdv_intel_find_dp_pll(const struct gma_limit_t *limit,
+				  struct drm_crtc *crtc, int target,
+				  int refclk,
+				  struct gma_clock_t *best_clock)
 {
-	if (clock->p1 < limit->p1.min || limit->p1.max < clock->p1)
-		INTELPllInvalid("p1 out of range\n");
-	if (clock->p < limit->p.min || limit->p.max < clock->p)
-		INTELPllInvalid("p out of range\n");
-	/* unnecessary to check the range of m(m1/M2)/n again */
-	if (clock->vco < limit->vco.min || limit->vco.max < clock->vco)
-		INTELPllInvalid("vco out of range\n");
-	/* XXX: We may need to be checking "Dot clock"
-	 * depending on the multiplier, connector, etc.,
-	 * rather than just a single range.
-	 */
-	if (clock->dot < limit->dot.min || limit->dot.max < clock->dot)
-		INTELPllInvalid("dot out of range\n");
-
-	return true;
-}
-
-static bool cdv_intel_find_best_PLL(const struct cdv_intel_limit_t *limit,
-	struct drm_crtc *crtc, int target, int refclk,
-	struct cdv_intel_clock_t *best_clock)
-{
-	struct drm_device *dev = crtc->dev;
-	struct cdv_intel_clock_t clock;
-	int err = target;
-
-
-	if (cdv_intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS) &&
-	    (REG_READ(LVDS) & LVDS_PORT_EN) != 0) {
-		/*
-		 * For LVDS, if the panel is on, just rely on its current
-		 * settings for dual-channel.  We haven't figured out how to
-		 * reliably set up different single/dual channel state, if we
-		 * even can.
-		 */
-		if ((REG_READ(LVDS) & LVDS_CLKB_POWER_MASK) ==
-		    LVDS_CLKB_POWER_UP)
-			clock.p2 = limit->p2.p2_fast;
-		else
-			clock.p2 = limit->p2.p2_slow;
-	} else {
-		if (target < limit->p2.dot_limit)
-			clock.p2 = limit->p2.p2_slow;
-		else
-			clock.p2 = limit->p2.p2_fast;
-	}
-
-	memset(best_clock, 0, sizeof(*best_clock));
-	clock.m1 = 0;
-	/* m1 is reserved as 0 in CDV, n is a ring counter.
-	   So skip the m1 loop */
-	for (clock.n = limit->n.min; clock.n <= limit->n.max; clock.n++) {
-		for (clock.m2 = limit->m2.min; clock.m2 <= limit->m2.max;
-					     clock.m2++) {
-			for (clock.p1 = limit->p1.min;
-					clock.p1 <= limit->p1.max;
-					clock.p1++) {
-				int this_err;
-
-				cdv_intel_clock(dev, refclk, &clock);
-
-				if (!cdv_intel_PLL_is_valid(crtc,
-								limit, &clock))
-						continue;
-
-				this_err = abs(clock.dot - target);
-				if (this_err < err) {
-					*best_clock = clock;
-					err = this_err;
-				}
-			}
-		}
-	}
-
-	return err != target;
-}
-
-static bool cdv_intel_find_dp_pll(const struct cdv_intel_limit_t *limit, struct drm_crtc *crtc, int target,
-				int refclk,
-				struct cdv_intel_clock_t *best_clock)
-{
-	struct cdv_intel_clock_t clock;
+	struct gma_clock_t clock;
 	if (refclk == 27000) {
 		if (target < 200000) {
 			clock.p1 = 2;
@@ -584,85 +447,10 @@
 	clock.p = clock.p1 * clock.p2;
 	clock.vco = (refclk * clock.m) / clock.n;
 	clock.dot = clock.vco / clock.p;
-	memcpy(best_clock, &clock, sizeof(struct cdv_intel_clock_t));
+	memcpy(best_clock, &clock, sizeof(struct gma_clock_t));
 	return true;
 }
 
-static int cdv_intel_pipe_set_base(struct drm_crtc *crtc,
-			    int x, int y, struct drm_framebuffer *old_fb)
-{
-	struct drm_device *dev = crtc->dev;
-	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
-	struct psb_framebuffer *psbfb = to_psb_fb(crtc->fb);
-	int pipe = psb_intel_crtc->pipe;
-	const struct psb_offset *map = &dev_priv->regmap[pipe];
-	unsigned long start, offset;
-	u32 dspcntr;
-	int ret = 0;
-
-	if (!gma_power_begin(dev, true))
-		return 0;
-
-	/* no fb bound */
-	if (!crtc->fb) {
-		dev_err(dev->dev, "No FB bound\n");
-		goto psb_intel_pipe_cleaner;
-	}
-
-
-	/* We are displaying this buffer, make sure it is actually loaded
-	   into the GTT */
-	ret = psb_gtt_pin(psbfb->gtt);
-	if (ret < 0)
-		goto psb_intel_pipe_set_base_exit;
-	start = psbfb->gtt->offset;
-	offset = y * crtc->fb->pitches[0] + x * (crtc->fb->bits_per_pixel / 8);
-
-	REG_WRITE(map->stride, crtc->fb->pitches[0]);
-
-	dspcntr = REG_READ(map->cntr);
-	dspcntr &= ~DISPPLANE_PIXFORMAT_MASK;
-
-	switch (crtc->fb->bits_per_pixel) {
-	case 8:
-		dspcntr |= DISPPLANE_8BPP;
-		break;
-	case 16:
-		if (crtc->fb->depth == 15)
-			dspcntr |= DISPPLANE_15_16BPP;
-		else
-			dspcntr |= DISPPLANE_16BPP;
-		break;
-	case 24:
-	case 32:
-		dspcntr |= DISPPLANE_32BPP_NO_ALPHA;
-		break;
-	default:
-		dev_err(dev->dev, "Unknown color depth\n");
-		ret = -EINVAL;
-		goto psb_intel_pipe_set_base_exit;
-	}
-	REG_WRITE(map->cntr, dspcntr);
-
-	dev_dbg(dev->dev,
-		"Writing base %08lX %08lX %d %d\n", start, offset, x, y);
-
-	REG_WRITE(map->base, offset);
-	REG_READ(map->base);
-	REG_WRITE(map->surf, start);
-	REG_READ(map->surf);
-
-psb_intel_pipe_cleaner:
-	/* If there was a previous display we can now unpin it */
-	if (old_fb)
-		psb_gtt_unpin(to_psb_fb(old_fb)->gtt);
-
-psb_intel_pipe_set_base_exit:
-	gma_power_end(dev);
-	return ret;
-}
-
 #define		FIFO_PIPEA		(1 << 0)
 #define		FIFO_PIPEB		(1 << 1)
 
@@ -670,12 +458,12 @@
 {
 	struct drm_crtc *crtc;
 	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct psb_intel_crtc *psb_intel_crtc = NULL;
+	struct gma_crtc *gma_crtc = NULL;
 
 	crtc = dev_priv->pipe_to_crtc_mapping[pipe];
-	psb_intel_crtc = to_psb_intel_crtc(crtc);
+	gma_crtc = to_gma_crtc(crtc);
 
-	if (crtc->fb == NULL || !psb_intel_crtc->active)
+	if (crtc->fb == NULL || !gma_crtc->active)
 		return false;
 	return true;
 }
@@ -701,29 +489,29 @@
 
 static bool is_pipeb_lvds(struct drm_device *dev, struct drm_crtc *crtc)
 {
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
 	struct drm_mode_config *mode_config = &dev->mode_config;
 	struct drm_connector *connector;
 
-	if (psb_intel_crtc->pipe != 1)
+	if (gma_crtc->pipe != 1)
 		return false;
 
 	list_for_each_entry(connector, &mode_config->connector_list, head) {
-		struct psb_intel_encoder *psb_intel_encoder =
-					psb_intel_attached_encoder(connector);
+		struct gma_encoder *gma_encoder =
+					gma_attached_encoder(connector);
 
 		if (!connector->encoder
 		    || connector->encoder->crtc != crtc)
 			continue;
 
-		if (psb_intel_encoder->type == INTEL_OUTPUT_LVDS)
+		if (gma_encoder->type == INTEL_OUTPUT_LVDS)
 			return true;
 	}
 
 	return false;
 }
 
-static void cdv_intel_disable_self_refresh (struct drm_device *dev)
+void cdv_disable_sr(struct drm_device *dev)
 {
 	if (REG_READ(FW_BLC_SELF) & FW_BLC_SELF_EN) {
 
@@ -731,7 +519,7 @@
 		REG_WRITE(FW_BLC_SELF, (REG_READ(FW_BLC_SELF) & ~FW_BLC_SELF_EN));
 		REG_READ(FW_BLC_SELF);
 
-		cdv_intel_wait_for_vblank(dev);
+		gma_wait_for_vblank(dev);
 
 		/* Cedarview workaround to write ovelay plane, which force to leave
 		 * MAX_FIFO state.
@@ -739,13 +527,14 @@
 		REG_WRITE(OV_OVADD, 0/*dev_priv->ovl_offset*/);
 		REG_READ(OV_OVADD);
 
-		cdv_intel_wait_for_vblank(dev);
+		gma_wait_for_vblank(dev);
 	}
 
 }
 
-static void cdv_intel_update_watermark (struct drm_device *dev, struct drm_crtc *crtc)
+void cdv_update_wm(struct drm_device *dev, struct drm_crtc *crtc)
 {
+	struct drm_psb_private *dev_priv = dev->dev_private;
 
 	if (cdv_intel_single_pipe_active(dev)) {
 		u32 fw;
@@ -780,12 +569,12 @@
 
 		REG_WRITE(DSPFW6, 0x10);
 
-		cdv_intel_wait_for_vblank(dev);
+		gma_wait_for_vblank(dev);
 
 		/* enable self-refresh for single pipe active */
 		REG_WRITE(FW_BLC_SELF, FW_BLC_SELF_EN);
 		REG_READ(FW_BLC_SELF);
-		cdv_intel_wait_for_vblank(dev);
+		gma_wait_for_vblank(dev);
 
 	} else {
 
@@ -797,216 +586,12 @@
 		REG_WRITE(DSPFW5, 0x01010101);
 		REG_WRITE(DSPFW6, 0x1d0);
 
-		cdv_intel_wait_for_vblank(dev);
+		gma_wait_for_vblank(dev);
 
-		cdv_intel_disable_self_refresh(dev);
-	
+		dev_priv->ops->disable_sr(dev);
 	}
 }
 
-/** Loads the palette/gamma unit for the CRTC with the prepared values */
-static void cdv_intel_crtc_load_lut(struct drm_crtc *crtc)
-{
-	struct drm_device *dev = crtc->dev;
-	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
-	int palreg = PALETTE_A;
-	int i;
-
-	/* The clocks have to be on to load the palette. */
-	if (!crtc->enabled)
-		return;
-
-	switch (psb_intel_crtc->pipe) {
-	case 0:
-		break;
-	case 1:
-		palreg = PALETTE_B;
-		break;
-	case 2:
-		palreg = PALETTE_C;
-		break;
-	default:
-		dev_err(dev->dev, "Illegal Pipe Number.\n");
-		return;
-	}
-
-	if (gma_power_begin(dev, false)) {
-		for (i = 0; i < 256; i++) {
-			REG_WRITE(palreg + 4 * i,
-				  ((psb_intel_crtc->lut_r[i] +
-				  psb_intel_crtc->lut_adj[i]) << 16) |
-				  ((psb_intel_crtc->lut_g[i] +
-				  psb_intel_crtc->lut_adj[i]) << 8) |
-				  (psb_intel_crtc->lut_b[i] +
-				  psb_intel_crtc->lut_adj[i]));
-		}
-		gma_power_end(dev);
-	} else {
-		for (i = 0; i < 256; i++) {
-			dev_priv->regs.pipe[0].palette[i] =
-				  ((psb_intel_crtc->lut_r[i] +
-				  psb_intel_crtc->lut_adj[i]) << 16) |
-				  ((psb_intel_crtc->lut_g[i] +
-				  psb_intel_crtc->lut_adj[i]) << 8) |
-				  (psb_intel_crtc->lut_b[i] +
-				  psb_intel_crtc->lut_adj[i]);
-		}
-
-	}
-}
-
-/**
- * Sets the power management mode of the pipe and plane.
- *
- * This code should probably grow support for turning the cursor off and back
- * on appropriately at the same time as we're turning the pipe off/on.
- */
-static void cdv_intel_crtc_dpms(struct drm_crtc *crtc, int mode)
-{
-	struct drm_device *dev = crtc->dev;
-	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
-	int pipe = psb_intel_crtc->pipe;
-	const struct psb_offset *map = &dev_priv->regmap[pipe];
-	u32 temp;
-
-	/* XXX: When our outputs are all unaware of DPMS modes other than off
-	 * and on, we should map those modes to DRM_MODE_DPMS_OFF in the CRTC.
-	 */
-	cdv_intel_disable_self_refresh(dev);
-
-	switch (mode) {
-	case DRM_MODE_DPMS_ON:
-	case DRM_MODE_DPMS_STANDBY:
-	case DRM_MODE_DPMS_SUSPEND:
-		if (psb_intel_crtc->active)
-			break;
-
-		psb_intel_crtc->active = true;
-
-		/* Enable the DPLL */
-		temp = REG_READ(map->dpll);
-		if ((temp & DPLL_VCO_ENABLE) == 0) {
-			REG_WRITE(map->dpll, temp);
-			REG_READ(map->dpll);
-			/* Wait for the clocks to stabilize. */
-			udelay(150);
-			REG_WRITE(map->dpll, temp | DPLL_VCO_ENABLE);
-			REG_READ(map->dpll);
-			/* Wait for the clocks to stabilize. */
-			udelay(150);
-			REG_WRITE(map->dpll, temp | DPLL_VCO_ENABLE);
-			REG_READ(map->dpll);
-			/* Wait for the clocks to stabilize. */
-			udelay(150);
-		}
-
-		/* Jim Bish - switch plan and pipe per scott */
-		/* Enable the plane */
-		temp = REG_READ(map->cntr);
-		if ((temp & DISPLAY_PLANE_ENABLE) == 0) {
-			REG_WRITE(map->cntr,
-				  temp | DISPLAY_PLANE_ENABLE);
-			/* Flush the plane changes */
-			REG_WRITE(map->base, REG_READ(map->base));
-		}
-
-		udelay(150);
-
-		/* Enable the pipe */
-		temp = REG_READ(map->conf);
-		if ((temp & PIPEACONF_ENABLE) == 0)
-			REG_WRITE(map->conf, temp | PIPEACONF_ENABLE);
-
-		temp = REG_READ(map->status);
-		temp &= ~(0xFFFF);
-		temp |= PIPE_FIFO_UNDERRUN;
-		REG_WRITE(map->status, temp);
-		REG_READ(map->status);
-
-		cdv_intel_crtc_load_lut(crtc);
-
-		/* Give the overlay scaler a chance to enable
-		 * if it's on this pipe */
-		/* psb_intel_crtc_dpms_video(crtc, true); TODO */
-		break;
-	case DRM_MODE_DPMS_OFF:
-		if (!psb_intel_crtc->active)
-			break;
-
-		psb_intel_crtc->active = false;
-
-		/* Give the overlay scaler a chance to disable
-		 * if it's on this pipe */
-		/* psb_intel_crtc_dpms_video(crtc, FALSE); TODO */
-
-		/* Disable the VGA plane that we never use */
-		REG_WRITE(VGACNTRL, VGA_DISP_DISABLE);
-
-		/* Jim Bish - changed pipe/plane here as well. */
-
-		drm_vblank_off(dev, pipe);
-		/* Wait for vblank for the disable to take effect */
-		cdv_intel_wait_for_vblank(dev);
-
-		/* Next, disable display pipes */
-		temp = REG_READ(map->conf);
-		if ((temp & PIPEACONF_ENABLE) != 0) {
-			REG_WRITE(map->conf, temp & ~PIPEACONF_ENABLE);
-			REG_READ(map->conf);
-		}
-
-		/* Wait for vblank for the disable to take effect. */
-		cdv_intel_wait_for_vblank(dev);
-
-		udelay(150);
-
-		/* Disable display plane */
-		temp = REG_READ(map->cntr);
-		if ((temp & DISPLAY_PLANE_ENABLE) != 0) {
-			REG_WRITE(map->cntr,
-				  temp & ~DISPLAY_PLANE_ENABLE);
-			/* Flush the plane changes */
-			REG_WRITE(map->base, REG_READ(map->base));
-			REG_READ(map->base);
-		}
-
-		temp = REG_READ(map->dpll);
-		if ((temp & DPLL_VCO_ENABLE) != 0) {
-			REG_WRITE(map->dpll, temp & ~DPLL_VCO_ENABLE);
-			REG_READ(map->dpll);
-		}
-
-		/* Wait for the clocks to turn off. */
-		udelay(150);
-		break;
-	}
-	cdv_intel_update_watermark(dev, crtc);
-	/*Set FIFO Watermarks*/
-	REG_WRITE(DSPARB, 0x3F3E);
-}
-
-static void cdv_intel_crtc_prepare(struct drm_crtc *crtc)
-{
-	struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
-	crtc_funcs->dpms(crtc, DRM_MODE_DPMS_OFF);
-}
-
-static void cdv_intel_crtc_commit(struct drm_crtc *crtc)
-{
-	struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
-	crtc_funcs->dpms(crtc, DRM_MODE_DPMS_ON);
-}
-
-static bool cdv_intel_crtc_mode_fixup(struct drm_crtc *crtc,
-				  const struct drm_display_mode *mode,
-				  struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
-
 /**
  * Return the pipe currently connected to the panel fitter,
  * or -1 if the panel fitter is not present or not in use
@@ -1031,31 +616,31 @@
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
-	int pipe = psb_intel_crtc->pipe;
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
+	int pipe = gma_crtc->pipe;
 	const struct psb_offset *map = &dev_priv->regmap[pipe];
 	int refclk;
-	struct cdv_intel_clock_t clock;
+	struct gma_clock_t clock;
 	u32 dpll = 0, dspcntr, pipeconf;
 	bool ok;
 	bool is_crt = false, is_lvds = false, is_tv = false;
 	bool is_hdmi = false, is_dp = false;
 	struct drm_mode_config *mode_config = &dev->mode_config;
 	struct drm_connector *connector;
-	const struct cdv_intel_limit_t *limit;
+	const struct gma_limit_t *limit;
 	u32 ddi_select = 0;
 	bool is_edp = false;
 
 	list_for_each_entry(connector, &mode_config->connector_list, head) {
-		struct psb_intel_encoder *psb_intel_encoder =
-					psb_intel_attached_encoder(connector);
+		struct gma_encoder *gma_encoder =
+					gma_attached_encoder(connector);
 
 		if (!connector->encoder
 		    || connector->encoder->crtc != crtc)
 			continue;
 
-		ddi_select = psb_intel_encoder->ddi_select;
-		switch (psb_intel_encoder->type) {
+		ddi_select = gma_encoder->ddi_select;
+		switch (gma_encoder->type) {
 		case INTEL_OUTPUT_LVDS:
 			is_lvds = true;
 			break;
@@ -1108,12 +693,13 @@
 
 	drm_mode_debug_printmodeline(adjusted_mode);
 	
-	limit = cdv_intel_limit(crtc, refclk);
+	limit = gma_crtc->clock_funcs->limit(crtc, refclk);
 
 	ok = limit->find_pll(limit, crtc, adjusted_mode->clock, refclk,
 				 &clock);
 	if (!ok) {
-		dev_err(dev->dev, "Couldn't find PLL settings for mode!\n");
+		DRM_ERROR("Couldn't find PLL settings for mode! target: %d, actual: %d",
+			  adjusted_mode->clock, clock.dot);
 		return 0;
 	}
 
@@ -1264,7 +850,7 @@
 	REG_WRITE(map->conf, pipeconf);
 	REG_READ(map->conf);
 
-	cdv_intel_wait_for_vblank(dev);
+	gma_wait_for_vblank(dev);
 
 	REG_WRITE(map->cntr, dspcntr);
 
@@ -1275,344 +861,16 @@
 		crtc_funcs->mode_set_base(crtc, x, y, old_fb);
 	}
 
-	cdv_intel_wait_for_vblank(dev);
+	gma_wait_for_vblank(dev);
 
 	return 0;
 }
 
-
-/**
- * Save HW states of giving crtc
- */
-static void cdv_intel_crtc_save(struct drm_crtc *crtc)
-{
-	struct drm_device *dev = crtc->dev;
-	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
-	struct psb_intel_crtc_state *crtc_state = psb_intel_crtc->crtc_state;
-	const struct psb_offset *map = &dev_priv->regmap[psb_intel_crtc->pipe];
-	uint32_t paletteReg;
-	int i;
-
-	if (!crtc_state) {
-		dev_dbg(dev->dev, "No CRTC state found\n");
-		return;
-	}
-
-	crtc_state->saveDSPCNTR = REG_READ(map->cntr);
-	crtc_state->savePIPECONF = REG_READ(map->conf);
-	crtc_state->savePIPESRC = REG_READ(map->src);
-	crtc_state->saveFP0 = REG_READ(map->fp0);
-	crtc_state->saveFP1 = REG_READ(map->fp1);
-	crtc_state->saveDPLL = REG_READ(map->dpll);
-	crtc_state->saveHTOTAL = REG_READ(map->htotal);
-	crtc_state->saveHBLANK = REG_READ(map->hblank);
-	crtc_state->saveHSYNC = REG_READ(map->hsync);
-	crtc_state->saveVTOTAL = REG_READ(map->vtotal);
-	crtc_state->saveVBLANK = REG_READ(map->vblank);
-	crtc_state->saveVSYNC = REG_READ(map->vsync);
-	crtc_state->saveDSPSTRIDE = REG_READ(map->stride);
-
-	/*NOTE: DSPSIZE DSPPOS only for psb*/
-	crtc_state->saveDSPSIZE = REG_READ(map->size);
-	crtc_state->saveDSPPOS = REG_READ(map->pos);
-
-	crtc_state->saveDSPBASE = REG_READ(map->base);
-
-	DRM_DEBUG("(%x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x)\n",
-			crtc_state->saveDSPCNTR,
-			crtc_state->savePIPECONF,
-			crtc_state->savePIPESRC,
-			crtc_state->saveFP0,
-			crtc_state->saveFP1,
-			crtc_state->saveDPLL,
-			crtc_state->saveHTOTAL,
-			crtc_state->saveHBLANK,
-			crtc_state->saveHSYNC,
-			crtc_state->saveVTOTAL,
-			crtc_state->saveVBLANK,
-			crtc_state->saveVSYNC,
-			crtc_state->saveDSPSTRIDE,
-			crtc_state->saveDSPSIZE,
-			crtc_state->saveDSPPOS,
-			crtc_state->saveDSPBASE
-		);
-
-	paletteReg = map->palette;
-	for (i = 0; i < 256; ++i)
-		crtc_state->savePalette[i] = REG_READ(paletteReg + (i << 2));
-}
-
-/**
- * Restore HW states of giving crtc
- */
-static void cdv_intel_crtc_restore(struct drm_crtc *crtc)
-{
-	struct drm_device *dev = crtc->dev;
-	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct psb_intel_crtc *psb_intel_crtc =  to_psb_intel_crtc(crtc);
-	struct psb_intel_crtc_state *crtc_state = psb_intel_crtc->crtc_state;
-	const struct psb_offset *map = &dev_priv->regmap[psb_intel_crtc->pipe];
-	uint32_t paletteReg;
-	int i;
-
-	if (!crtc_state) {
-		dev_dbg(dev->dev, "No crtc state\n");
-		return;
-	}
-
-	DRM_DEBUG(
-		"current:(%x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x)\n",
-		REG_READ(map->cntr),
-		REG_READ(map->conf),
-		REG_READ(map->src),
-		REG_READ(map->fp0),
-		REG_READ(map->fp1),
-		REG_READ(map->dpll),
-		REG_READ(map->htotal),
-		REG_READ(map->hblank),
-		REG_READ(map->hsync),
-		REG_READ(map->vtotal),
-		REG_READ(map->vblank),
-		REG_READ(map->vsync),
-		REG_READ(map->stride),
-		REG_READ(map->size),
-		REG_READ(map->pos),
-		REG_READ(map->base)
-	);
-
-	DRM_DEBUG(
-		"saved: (%x %x %x %x %x %x %x %x %x %x %x %x %x %x %x %x)\n",
-		crtc_state->saveDSPCNTR,
-		crtc_state->savePIPECONF,
-		crtc_state->savePIPESRC,
-		crtc_state->saveFP0,
-		crtc_state->saveFP1,
-		crtc_state->saveDPLL,
-		crtc_state->saveHTOTAL,
-		crtc_state->saveHBLANK,
-		crtc_state->saveHSYNC,
-		crtc_state->saveVTOTAL,
-		crtc_state->saveVBLANK,
-		crtc_state->saveVSYNC,
-		crtc_state->saveDSPSTRIDE,
-		crtc_state->saveDSPSIZE,
-		crtc_state->saveDSPPOS,
-		crtc_state->saveDSPBASE
-	);
-
-
-	if (crtc_state->saveDPLL & DPLL_VCO_ENABLE) {
-		REG_WRITE(map->dpll,
-				crtc_state->saveDPLL & ~DPLL_VCO_ENABLE);
-		REG_READ(map->dpll);
-		DRM_DEBUG("write dpll: %x\n",
-				REG_READ(map->dpll));
-		udelay(150);
-	}
-
-	REG_WRITE(map->fp0, crtc_state->saveFP0);
-	REG_READ(map->fp0);
-
-	REG_WRITE(map->fp1, crtc_state->saveFP1);
-	REG_READ(map->fp1);
-
-	REG_WRITE(map->dpll, crtc_state->saveDPLL);
-	REG_READ(map->dpll);
-	udelay(150);
-
-	REG_WRITE(map->htotal, crtc_state->saveHTOTAL);
-	REG_WRITE(map->hblank, crtc_state->saveHBLANK);
-	REG_WRITE(map->hsync, crtc_state->saveHSYNC);
-	REG_WRITE(map->vtotal, crtc_state->saveVTOTAL);
-	REG_WRITE(map->vblank, crtc_state->saveVBLANK);
-	REG_WRITE(map->vsync, crtc_state->saveVSYNC);
-	REG_WRITE(map->stride, crtc_state->saveDSPSTRIDE);
-
-	REG_WRITE(map->size, crtc_state->saveDSPSIZE);
-	REG_WRITE(map->pos, crtc_state->saveDSPPOS);
-
-	REG_WRITE(map->src, crtc_state->savePIPESRC);
-	REG_WRITE(map->base, crtc_state->saveDSPBASE);
-	REG_WRITE(map->conf, crtc_state->savePIPECONF);
-
-	cdv_intel_wait_for_vblank(dev);
-
-	REG_WRITE(map->cntr, crtc_state->saveDSPCNTR);
-	REG_WRITE(map->base, crtc_state->saveDSPBASE);
-
-	cdv_intel_wait_for_vblank(dev);
-
-	paletteReg = map->palette;
-	for (i = 0; i < 256; ++i)
-		REG_WRITE(paletteReg + (i << 2), crtc_state->savePalette[i]);
-}
-
-static int cdv_intel_crtc_cursor_set(struct drm_crtc *crtc,
-				 struct drm_file *file_priv,
-				 uint32_t handle,
-				 uint32_t width, uint32_t height)
-{
-	struct drm_device *dev = crtc->dev;
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
-	int pipe = psb_intel_crtc->pipe;
-	uint32_t control = (pipe == 0) ? CURACNTR : CURBCNTR;
-	uint32_t base = (pipe == 0) ? CURABASE : CURBBASE;
-	uint32_t temp;
-	size_t addr = 0;
-	struct gtt_range *gt;
-	struct drm_gem_object *obj;
-	int ret = 0;
-
-	/* if we want to turn of the cursor ignore width and height */
-	if (!handle) {
-		/* turn off the cursor */
-		temp = CURSOR_MODE_DISABLE;
-
-		if (gma_power_begin(dev, false)) {
-			REG_WRITE(control, temp);
-			REG_WRITE(base, 0);
-			gma_power_end(dev);
-		}
-
-		/* unpin the old GEM object */
-		if (psb_intel_crtc->cursor_obj) {
-			gt = container_of(psb_intel_crtc->cursor_obj,
-							struct gtt_range, gem);
-			psb_gtt_unpin(gt);
-			drm_gem_object_unreference(psb_intel_crtc->cursor_obj);
-			psb_intel_crtc->cursor_obj = NULL;
-		}
-
-		return 0;
-	}
-
-	/* Currently we only support 64x64 cursors */
-	if (width != 64 || height != 64) {
-		dev_dbg(dev->dev, "we currently only support 64x64 cursors\n");
-		return -EINVAL;
-	}
-
-	obj = drm_gem_object_lookup(dev, file_priv, handle);
-	if (!obj)
-		return -ENOENT;
-
-	if (obj->size < width * height * 4) {
-		dev_dbg(dev->dev, "buffer is to small\n");
-		ret = -ENOMEM;
-		goto unref_cursor;
-	}
-
-	gt = container_of(obj, struct gtt_range, gem);
-
-	/* Pin the memory into the GTT */
-	ret = psb_gtt_pin(gt);
-	if (ret) {
-		dev_err(dev->dev, "Can not pin down handle 0x%x\n", handle);
-		goto unref_cursor;
-	}
-
-	addr = gt->offset;	/* Or resource.start ??? */
-
-	psb_intel_crtc->cursor_addr = addr;
-
-	temp = 0;
-	/* set the pipe for the cursor */
-	temp |= (pipe << 28);
-	temp |= CURSOR_MODE_64_ARGB_AX | MCURSOR_GAMMA_ENABLE;
-
-	if (gma_power_begin(dev, false)) {
-		REG_WRITE(control, temp);
-		REG_WRITE(base, addr);
-		gma_power_end(dev);
-	}
-
-	/* unpin the old GEM object */
-	if (psb_intel_crtc->cursor_obj) {
-		gt = container_of(psb_intel_crtc->cursor_obj,
-							struct gtt_range, gem);
-		psb_gtt_unpin(gt);
-		drm_gem_object_unreference(psb_intel_crtc->cursor_obj);
-	}
-
-	psb_intel_crtc->cursor_obj = obj;
-	return ret;
-
-unref_cursor:
-	drm_gem_object_unreference(obj);
-	return ret;
-}
-
-static int cdv_intel_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
-{
-	struct drm_device *dev = crtc->dev;
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
-	int pipe = psb_intel_crtc->pipe;
-	uint32_t temp = 0;
-	uint32_t adder;
-
-
-	if (x < 0) {
-		temp |= (CURSOR_POS_SIGN << CURSOR_X_SHIFT);
-		x = -x;
-	}
-	if (y < 0) {
-		temp |= (CURSOR_POS_SIGN << CURSOR_Y_SHIFT);
-		y = -y;
-	}
-
-	temp |= ((x & CURSOR_POS_MASK) << CURSOR_X_SHIFT);
-	temp |= ((y & CURSOR_POS_MASK) << CURSOR_Y_SHIFT);
-
-	adder = psb_intel_crtc->cursor_addr;
-
-	if (gma_power_begin(dev, false)) {
-		REG_WRITE((pipe == 0) ? CURAPOS : CURBPOS, temp);
-		REG_WRITE((pipe == 0) ? CURABASE : CURBBASE, adder);
-		gma_power_end(dev);
-	}
-	return 0;
-}
-
-static void cdv_intel_crtc_gamma_set(struct drm_crtc *crtc, u16 *red,
-			 u16 *green, u16 *blue, uint32_t start, uint32_t size)
-{
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
-	int i;
-	int end = (start + size > 256) ? 256 : start + size;
-
-	for (i = start; i < end; i++) {
-		psb_intel_crtc->lut_r[i] = red[i] >> 8;
-		psb_intel_crtc->lut_g[i] = green[i] >> 8;
-		psb_intel_crtc->lut_b[i] = blue[i] >> 8;
-	}
-
-	cdv_intel_crtc_load_lut(crtc);
-}
-
-static int cdv_crtc_set_config(struct drm_mode_set *set)
-{
-	int ret = 0;
-	struct drm_device *dev = set->crtc->dev;
-	struct drm_psb_private *dev_priv = dev->dev_private;
-
-	if (!dev_priv->rpm_enabled)
-		return drm_crtc_helper_set_config(set);
-
-	pm_runtime_forbid(&dev->pdev->dev);
-
-	ret = drm_crtc_helper_set_config(set);
-
-	pm_runtime_allow(&dev->pdev->dev);
-
-	return ret;
-}
-
 /** Derive the pixel clock for the given refclk and divisors for 8xx chips. */
 
 /* FIXME: why are we using this, should it be cdv_ in this tree ? */
 
-static void i8xx_clock(int refclk, struct cdv_intel_clock_t *clock)
+static void i8xx_clock(int refclk, struct gma_clock_t *clock)
 {
 	clock->m = 5 * (clock->m1 + 2) + (clock->m2 + 2);
 	clock->p = clock->p1 * clock->p2;
@@ -1625,12 +883,12 @@
 				struct drm_crtc *crtc)
 {
 	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
-	int pipe = psb_intel_crtc->pipe;
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
+	int pipe = gma_crtc->pipe;
 	const struct psb_offset *map = &dev_priv->regmap[pipe];
 	u32 dpll;
 	u32 fp;
-	struct cdv_intel_clock_t clock;
+	struct gma_clock_t clock;
 	bool is_lvds;
 	struct psb_pipe *p = &dev_priv->regs.pipe[pipe];
 
@@ -1703,8 +961,8 @@
 struct drm_display_mode *cdv_intel_crtc_mode_get(struct drm_device *dev,
 					     struct drm_crtc *crtc)
 {
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
-	int pipe = psb_intel_crtc->pipe;
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
+	int pipe = gma_crtc->pipe;
 	struct drm_psb_private *dev_priv = dev->dev_private;
 	struct psb_pipe *p = &dev_priv->regs.pipe[pipe];
 	const struct psb_offset *map = &dev_priv->regmap[pipe];
@@ -1747,44 +1005,28 @@
 	return mode;
 }
 
-static void cdv_intel_crtc_destroy(struct drm_crtc *crtc)
-{
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
-
-	kfree(psb_intel_crtc->crtc_state);
-	drm_crtc_cleanup(crtc);
-	kfree(psb_intel_crtc);
-}
-
-static void cdv_intel_crtc_disable(struct drm_crtc *crtc)
-{
-	struct gtt_range *gt;
-	struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
-
-	crtc_funcs->dpms(crtc, DRM_MODE_DPMS_OFF);
-
-	if (crtc->fb) {
-		gt = to_psb_fb(crtc->fb)->gtt;
-		psb_gtt_unpin(gt);
-	}
-}
-
 const struct drm_crtc_helper_funcs cdv_intel_helper_funcs = {
-	.dpms = cdv_intel_crtc_dpms,
-	.mode_fixup = cdv_intel_crtc_mode_fixup,
+	.dpms = gma_crtc_dpms,
+	.mode_fixup = gma_crtc_mode_fixup,
 	.mode_set = cdv_intel_crtc_mode_set,
-	.mode_set_base = cdv_intel_pipe_set_base,
-	.prepare = cdv_intel_crtc_prepare,
-	.commit = cdv_intel_crtc_commit,
-	.disable = cdv_intel_crtc_disable,
+	.mode_set_base = gma_pipe_set_base,
+	.prepare = gma_crtc_prepare,
+	.commit = gma_crtc_commit,
+	.disable = gma_crtc_disable,
 };
 
 const struct drm_crtc_funcs cdv_intel_crtc_funcs = {
-	.save = cdv_intel_crtc_save,
-	.restore = cdv_intel_crtc_restore,
-	.cursor_set = cdv_intel_crtc_cursor_set,
-	.cursor_move = cdv_intel_crtc_cursor_move,
-	.gamma_set = cdv_intel_crtc_gamma_set,
-	.set_config = cdv_crtc_set_config,
-	.destroy = cdv_intel_crtc_destroy,
+	.save = gma_crtc_save,
+	.restore = gma_crtc_restore,
+	.cursor_set = gma_crtc_cursor_set,
+	.cursor_move = gma_crtc_cursor_move,
+	.gamma_set = gma_crtc_gamma_set,
+	.set_config = gma_crtc_set_config,
+	.destroy = gma_crtc_destroy,
+};
+
+const struct gma_clock_funcs cdv_clock_funcs = {
+	.clock = cdv_intel_clock,
+	.limit = cdv_intel_limit,
+	.pll_is_valid = gma_pll_is_valid,
 };
diff --git a/drivers/gpu/drm/gma500/cdv_intel_dp.c b/drivers/gpu/drm/gma500/cdv_intel_dp.c
index 88d9ef6..f4eb435 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_dp.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_dp.c
@@ -34,6 +34,7 @@
 #include "psb_drv.h"
 #include "psb_intel_drv.h"
 #include "psb_intel_reg.h"
+#include "gma_display.h"
 #include <drm/drm_dp_helper.h>
 
 #define _wait_for(COND, MS, W) ({ \
@@ -68,7 +69,7 @@
 	uint8_t link_bw;
 	uint8_t lane_count;
 	uint8_t dpcd[4];
-	struct psb_intel_encoder *encoder;
+	struct gma_encoder *encoder;
 	struct i2c_adapter adapter;
 	struct i2c_algo_dp_aux_data algo;
 	uint8_t	train_set[4];
@@ -114,18 +115,18 @@
  * If a CPU or PCH DP output is attached to an eDP panel, this function
  * will return true, and false otherwise.
  */
-static bool is_edp(struct psb_intel_encoder *encoder)
+static bool is_edp(struct gma_encoder *encoder)
 {
 	return encoder->type == INTEL_OUTPUT_EDP;
 }
 
 
-static void cdv_intel_dp_start_link_train(struct psb_intel_encoder *encoder);
-static void cdv_intel_dp_complete_link_train(struct psb_intel_encoder *encoder);
-static void cdv_intel_dp_link_down(struct psb_intel_encoder *encoder);
+static void cdv_intel_dp_start_link_train(struct gma_encoder *encoder);
+static void cdv_intel_dp_complete_link_train(struct gma_encoder *encoder);
+static void cdv_intel_dp_link_down(struct gma_encoder *encoder);
 
 static int
-cdv_intel_dp_max_lane_count(struct psb_intel_encoder *encoder)
+cdv_intel_dp_max_lane_count(struct gma_encoder *encoder)
 {
 	struct cdv_intel_dp *intel_dp = encoder->dev_priv;
 	int max_lane_count = 4;
@@ -143,7 +144,7 @@
 }
 
 static int
-cdv_intel_dp_max_link_bw(struct psb_intel_encoder *encoder)
+cdv_intel_dp_max_link_bw(struct gma_encoder *encoder)
 {
 	struct cdv_intel_dp *intel_dp = encoder->dev_priv;
 	int max_link_bw = intel_dp->dpcd[DP_MAX_LINK_RATE];
@@ -180,7 +181,7 @@
 	return (max_link_clock * max_lanes * 19) / 20;
 }
 
-static void cdv_intel_edp_panel_vdd_on(struct psb_intel_encoder *intel_encoder)
+static void cdv_intel_edp_panel_vdd_on(struct gma_encoder *intel_encoder)
 {
 	struct drm_device *dev = intel_encoder->base.dev;
 	struct cdv_intel_dp *intel_dp = intel_encoder->dev_priv;
@@ -200,7 +201,7 @@
 	msleep(intel_dp->panel_power_up_delay);
 }
 
-static void cdv_intel_edp_panel_vdd_off(struct psb_intel_encoder *intel_encoder)
+static void cdv_intel_edp_panel_vdd_off(struct gma_encoder *intel_encoder)
 {
 	struct drm_device *dev = intel_encoder->base.dev;
 	u32 pp;
@@ -215,7 +216,7 @@
 }
 
 /* Returns true if the panel was already on when called */
-static bool cdv_intel_edp_panel_on(struct psb_intel_encoder *intel_encoder)
+static bool cdv_intel_edp_panel_on(struct gma_encoder *intel_encoder)
 {
 	struct drm_device *dev = intel_encoder->base.dev;
 	struct cdv_intel_dp *intel_dp = intel_encoder->dev_priv;
@@ -242,7 +243,7 @@
 	return false;
 }
 
-static void cdv_intel_edp_panel_off (struct psb_intel_encoder *intel_encoder)
+static void cdv_intel_edp_panel_off (struct gma_encoder *intel_encoder)
 {
 	struct drm_device *dev = intel_encoder->base.dev;
 	u32 pp, idle_off_mask = PP_ON ;
@@ -274,7 +275,7 @@
 	DRM_DEBUG_KMS("Over\n");
 }
 
-static void cdv_intel_edp_backlight_on (struct psb_intel_encoder *intel_encoder)
+static void cdv_intel_edp_backlight_on (struct gma_encoder *intel_encoder)
 {
 	struct drm_device *dev = intel_encoder->base.dev;
 	u32 pp;
@@ -294,7 +295,7 @@
 	gma_backlight_enable(dev);
 }
 
-static void cdv_intel_edp_backlight_off (struct psb_intel_encoder *intel_encoder)
+static void cdv_intel_edp_backlight_off (struct gma_encoder *intel_encoder)
 {
 	struct drm_device *dev = intel_encoder->base.dev;
 	struct cdv_intel_dp *intel_dp = intel_encoder->dev_priv;
@@ -314,7 +315,7 @@
 cdv_intel_dp_mode_valid(struct drm_connector *connector,
 		    struct drm_display_mode *mode)
 {
-	struct psb_intel_encoder *encoder = psb_intel_attached_encoder(connector);
+	struct gma_encoder *encoder = gma_attached_encoder(connector);
 	struct cdv_intel_dp *intel_dp = encoder->dev_priv;
 	int max_link_clock = cdv_intel_dp_link_clock(cdv_intel_dp_max_link_bw(encoder));
 	int max_lanes = cdv_intel_dp_max_lane_count(encoder);
@@ -370,7 +371,7 @@
 }
 
 static int
-cdv_intel_dp_aux_ch(struct psb_intel_encoder *encoder,
+cdv_intel_dp_aux_ch(struct gma_encoder *encoder,
 		uint8_t *send, int send_bytes,
 		uint8_t *recv, int recv_size)
 {
@@ -472,7 +473,7 @@
 
 /* Write data to the aux channel in native mode */
 static int
-cdv_intel_dp_aux_native_write(struct psb_intel_encoder *encoder,
+cdv_intel_dp_aux_native_write(struct gma_encoder *encoder,
 			  uint16_t address, uint8_t *send, int send_bytes)
 {
 	int ret;
@@ -504,7 +505,7 @@
 
 /* Write a single byte to the aux channel in native mode */
 static int
-cdv_intel_dp_aux_native_write_1(struct psb_intel_encoder *encoder,
+cdv_intel_dp_aux_native_write_1(struct gma_encoder *encoder,
 			    uint16_t address, uint8_t byte)
 {
 	return cdv_intel_dp_aux_native_write(encoder, address, &byte, 1);
@@ -512,7 +513,7 @@
 
 /* read bytes from a native aux channel */
 static int
-cdv_intel_dp_aux_native_read(struct psb_intel_encoder *encoder,
+cdv_intel_dp_aux_native_read(struct gma_encoder *encoder,
 			 uint16_t address, uint8_t *recv, int recv_bytes)
 {
 	uint8_t msg[4];
@@ -557,7 +558,7 @@
 	struct cdv_intel_dp *intel_dp = container_of(adapter,
 						struct cdv_intel_dp,
 						adapter);
-	struct psb_intel_encoder *encoder = intel_dp->encoder;
+	struct gma_encoder *encoder = intel_dp->encoder;
 	uint16_t address = algo_data->address;
 	uint8_t msg[5];
 	uint8_t reply[2];
@@ -647,7 +648,8 @@
 }
 
 static int
-cdv_intel_dp_i2c_init(struct psb_intel_connector *connector, struct psb_intel_encoder *encoder, const char *name)
+cdv_intel_dp_i2c_init(struct gma_connector *connector,
+		      struct gma_encoder *encoder, const char *name)
 {
 	struct cdv_intel_dp *intel_dp = encoder->dev_priv;
 	int ret;
@@ -698,7 +700,7 @@
 		    struct drm_display_mode *adjusted_mode)
 {
 	struct drm_psb_private *dev_priv = encoder->dev->dev_private;
-	struct psb_intel_encoder *intel_encoder = to_psb_intel_encoder(encoder);
+	struct gma_encoder *intel_encoder = to_gma_encoder(encoder);
 	struct cdv_intel_dp *intel_dp = intel_encoder->dev_priv;
 	int lane_count, clock;
 	int max_lane_count = cdv_intel_dp_max_lane_count(intel_encoder);
@@ -792,22 +794,22 @@
 	struct drm_psb_private *dev_priv = dev->dev_private;
 	struct drm_mode_config *mode_config = &dev->mode_config;
 	struct drm_encoder *encoder;
-	struct psb_intel_crtc *intel_crtc = to_psb_intel_crtc(crtc);
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
 	int lane_count = 4, bpp = 24;
 	struct cdv_intel_dp_m_n m_n;
-	int pipe = intel_crtc->pipe;
+	int pipe = gma_crtc->pipe;
 
 	/*
 	 * Find the lane count in the intel_encoder private
 	 */
 	list_for_each_entry(encoder, &mode_config->encoder_list, head) {
-		struct psb_intel_encoder *intel_encoder;
+		struct gma_encoder *intel_encoder;
 		struct cdv_intel_dp *intel_dp;
 
 		if (encoder->crtc != crtc)
 			continue;
 
-		intel_encoder = to_psb_intel_encoder(encoder);
+		intel_encoder = to_gma_encoder(encoder);
 		intel_dp = intel_encoder->dev_priv;
 		if (intel_encoder->type == INTEL_OUTPUT_DISPLAYPORT) {
 			lane_count = intel_dp->lane_count;
@@ -841,9 +843,9 @@
 cdv_intel_dp_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
 		  struct drm_display_mode *adjusted_mode)
 {
-	struct psb_intel_encoder *intel_encoder = to_psb_intel_encoder(encoder);
+	struct gma_encoder *intel_encoder = to_gma_encoder(encoder);
 	struct drm_crtc *crtc = encoder->crtc;
-	struct psb_intel_crtc *intel_crtc = to_psb_intel_crtc(crtc);
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
 	struct cdv_intel_dp *intel_dp = intel_encoder->dev_priv;
 	struct drm_device *dev = encoder->dev;
 
@@ -885,7 +887,7 @@
 	}
 
 	/* CPT DP's pipe select is decided in TRANS_DP_CTL */
-	if (intel_crtc->pipe == 1)
+	if (gma_crtc->pipe == 1)
 		intel_dp->DP |= DP_PIPEB_SELECT;
 
 	REG_WRITE(intel_dp->output_reg, (intel_dp->DP | DP_PORT_EN));
@@ -900,7 +902,7 @@
 		else
 			pfit_control = 0;
 
-		pfit_control |= intel_crtc->pipe << PFIT_PIPE_SHIFT;
+		pfit_control |= gma_crtc->pipe << PFIT_PIPE_SHIFT;
 
 		REG_WRITE(PFIT_CONTROL, pfit_control);
 	}
@@ -908,7 +910,7 @@
 
 
 /* If the sink supports it, try to set the power state appropriately */
-static void cdv_intel_dp_sink_dpms(struct psb_intel_encoder *encoder, int mode)
+static void cdv_intel_dp_sink_dpms(struct gma_encoder *encoder, int mode)
 {
 	struct cdv_intel_dp *intel_dp = encoder->dev_priv;
 	int ret, i;
@@ -940,7 +942,7 @@
 
 static void cdv_intel_dp_prepare(struct drm_encoder *encoder)
 {
-	struct psb_intel_encoder *intel_encoder = to_psb_intel_encoder(encoder);
+	struct gma_encoder *intel_encoder = to_gma_encoder(encoder);
 	int edp = is_edp(intel_encoder);
 
 	if (edp) {
@@ -957,7 +959,7 @@
 
 static void cdv_intel_dp_commit(struct drm_encoder *encoder)
 {
-	struct psb_intel_encoder *intel_encoder = to_psb_intel_encoder(encoder);
+	struct gma_encoder *intel_encoder = to_gma_encoder(encoder);
 	int edp = is_edp(intel_encoder);
 
 	if (edp)
@@ -971,7 +973,7 @@
 static void
 cdv_intel_dp_dpms(struct drm_encoder *encoder, int mode)
 {
-	struct psb_intel_encoder *intel_encoder = to_psb_intel_encoder(encoder);
+	struct gma_encoder *intel_encoder = to_gma_encoder(encoder);
 	struct cdv_intel_dp *intel_dp = intel_encoder->dev_priv;
 	struct drm_device *dev = encoder->dev;
 	uint32_t dp_reg = REG_READ(intel_dp->output_reg);
@@ -1006,7 +1008,7 @@
  * cases where the sink may still be asleep.
  */
 static bool
-cdv_intel_dp_aux_native_read_retry(struct psb_intel_encoder *encoder, uint16_t address,
+cdv_intel_dp_aux_native_read_retry(struct gma_encoder *encoder, uint16_t address,
 			       uint8_t *recv, int recv_bytes)
 {
 	int ret, i;
@@ -1031,7 +1033,7 @@
  * link status information
  */
 static bool
-cdv_intel_dp_get_link_status(struct psb_intel_encoder *encoder)
+cdv_intel_dp_get_link_status(struct gma_encoder *encoder)
 {
 	struct cdv_intel_dp *intel_dp = encoder->dev_priv;
 	return cdv_intel_dp_aux_native_read_retry(encoder,
@@ -1105,7 +1107,7 @@
 }
 */
 static void
-cdv_intel_get_adjust_train(struct psb_intel_encoder *encoder)
+cdv_intel_get_adjust_train(struct gma_encoder *encoder)
 {
 	struct cdv_intel_dp *intel_dp = encoder->dev_priv;
 	uint8_t v = 0;
@@ -1164,7 +1166,7 @@
 			 DP_LANE_CHANNEL_EQ_DONE|\
 			 DP_LANE_SYMBOL_LOCKED)
 static bool
-cdv_intel_channel_eq_ok(struct psb_intel_encoder *encoder)
+cdv_intel_channel_eq_ok(struct gma_encoder *encoder)
 {
 	struct cdv_intel_dp *intel_dp = encoder->dev_priv;
 	uint8_t lane_align;
@@ -1184,7 +1186,7 @@
 }
 
 static bool
-cdv_intel_dp_set_link_train(struct psb_intel_encoder *encoder,
+cdv_intel_dp_set_link_train(struct gma_encoder *encoder,
 			uint32_t dp_reg_value,
 			uint8_t dp_train_pat)
 {
@@ -1211,7 +1213,7 @@
 
 
 static bool
-cdv_intel_dplink_set_level(struct psb_intel_encoder *encoder,
+cdv_intel_dplink_set_level(struct gma_encoder *encoder,
 			uint8_t dp_train_pat)
 {
 	
@@ -1232,7 +1234,7 @@
 }
 
 static void
-cdv_intel_dp_set_vswing_premph(struct psb_intel_encoder *encoder, uint8_t signal_level)
+cdv_intel_dp_set_vswing_premph(struct gma_encoder *encoder, uint8_t signal_level)
 {
 	struct drm_device *dev = encoder->base.dev;
 	struct cdv_intel_dp *intel_dp = encoder->dev_priv;
@@ -1298,7 +1300,7 @@
 
 /* Enable corresponding port and start training pattern 1 */
 static void
-cdv_intel_dp_start_link_train(struct psb_intel_encoder *encoder)
+cdv_intel_dp_start_link_train(struct gma_encoder *encoder)
 {
 	struct drm_device *dev = encoder->base.dev;
 	struct cdv_intel_dp *intel_dp = encoder->dev_priv;
@@ -1317,7 +1319,7 @@
 	/* Enable output, wait for it to become active */
 	REG_WRITE(intel_dp->output_reg, reg);
 	REG_READ(intel_dp->output_reg);
-	psb_intel_wait_for_vblank(dev);
+	gma_wait_for_vblank(dev);
 
 	DRM_DEBUG_KMS("Link config\n");
 	/* Write the link configuration data */
@@ -1392,7 +1394,7 @@
 }
 
 static void
-cdv_intel_dp_complete_link_train(struct psb_intel_encoder *encoder)
+cdv_intel_dp_complete_link_train(struct gma_encoder *encoder)
 {
 	struct drm_device *dev = encoder->base.dev;
 	struct cdv_intel_dp *intel_dp = encoder->dev_priv;
@@ -1478,7 +1480,7 @@
 }
 
 static void
-cdv_intel_dp_link_down(struct psb_intel_encoder *encoder)
+cdv_intel_dp_link_down(struct gma_encoder *encoder)
 {
 	struct drm_device *dev = encoder->base.dev;
 	struct cdv_intel_dp *intel_dp = encoder->dev_priv;
@@ -1502,8 +1504,7 @@
 	REG_READ(intel_dp->output_reg);
 }
 
-static enum drm_connector_status
-cdv_dp_detect(struct psb_intel_encoder *encoder)
+static enum drm_connector_status cdv_dp_detect(struct gma_encoder *encoder)
 {
 	struct cdv_intel_dp *intel_dp = encoder->dev_priv;
 	enum drm_connector_status status;
@@ -1531,7 +1532,7 @@
 static enum drm_connector_status
 cdv_intel_dp_detect(struct drm_connector *connector, bool force)
 {
-	struct psb_intel_encoder *encoder = psb_intel_attached_encoder(connector);
+	struct gma_encoder *encoder = gma_attached_encoder(connector);
 	struct cdv_intel_dp *intel_dp = encoder->dev_priv;
 	enum drm_connector_status status;
 	struct edid *edid = NULL;
@@ -1565,7 +1566,7 @@
 
 static int cdv_intel_dp_get_modes(struct drm_connector *connector)
 {
-	struct psb_intel_encoder *intel_encoder = psb_intel_attached_encoder(connector);
+	struct gma_encoder *intel_encoder = gma_attached_encoder(connector);
 	struct cdv_intel_dp *intel_dp = intel_encoder->dev_priv;
 	struct edid *edid = NULL;
 	int ret = 0;
@@ -1621,7 +1622,7 @@
 static bool
 cdv_intel_dp_detect_audio(struct drm_connector *connector)
 {
-	struct psb_intel_encoder *encoder = psb_intel_attached_encoder(connector);
+	struct gma_encoder *encoder = gma_attached_encoder(connector);
 	struct cdv_intel_dp *intel_dp = encoder->dev_priv;
 	struct edid *edid;
 	bool has_audio = false;
@@ -1647,7 +1648,7 @@
 		      uint64_t val)
 {
 	struct drm_psb_private *dev_priv = connector->dev->dev_private;
-	struct psb_intel_encoder *encoder = psb_intel_attached_encoder(connector);
+	struct gma_encoder *encoder = gma_attached_encoder(connector);
 	struct cdv_intel_dp *intel_dp = encoder->dev_priv;
 	int ret;
 
@@ -1700,11 +1701,10 @@
 static void
 cdv_intel_dp_destroy(struct drm_connector *connector)
 {
-	struct psb_intel_encoder *psb_intel_encoder =
-					psb_intel_attached_encoder(connector);
-	struct cdv_intel_dp *intel_dp = psb_intel_encoder->dev_priv;
+	struct gma_encoder *gma_encoder = gma_attached_encoder(connector);
+	struct cdv_intel_dp *intel_dp = gma_encoder->dev_priv;
 
-	if (is_edp(psb_intel_encoder)) {
+	if (is_edp(gma_encoder)) {
 	/*	cdv_intel_panel_destroy_backlight(connector->dev); */
 		if (intel_dp->panel_fixed_mode) {
 			kfree(intel_dp->panel_fixed_mode);
@@ -1741,7 +1741,7 @@
 static const struct drm_connector_helper_funcs cdv_intel_dp_connector_helper_funcs = {
 	.get_modes = cdv_intel_dp_get_modes,
 	.mode_valid = cdv_intel_dp_mode_valid,
-	.best_encoder = psb_intel_best_encoder,
+	.best_encoder = gma_best_encoder,
 };
 
 static const struct drm_encoder_funcs cdv_intel_dp_enc_funcs = {
@@ -1800,19 +1800,19 @@
 void
 cdv_intel_dp_init(struct drm_device *dev, struct psb_intel_mode_device *mode_dev, int output_reg)
 {
-	struct psb_intel_encoder *psb_intel_encoder;
-	struct psb_intel_connector *psb_intel_connector;
+	struct gma_encoder *gma_encoder;
+	struct gma_connector *gma_connector;
 	struct drm_connector *connector;
 	struct drm_encoder *encoder;
 	struct cdv_intel_dp *intel_dp;
 	const char *name = NULL;
 	int type = DRM_MODE_CONNECTOR_DisplayPort;
 
-	psb_intel_encoder = kzalloc(sizeof(struct psb_intel_encoder), GFP_KERNEL);
-	if (!psb_intel_encoder)
+	gma_encoder = kzalloc(sizeof(struct gma_encoder), GFP_KERNEL);
+	if (!gma_encoder)
 		return;
-        psb_intel_connector = kzalloc(sizeof(struct psb_intel_connector), GFP_KERNEL);
-        if (!psb_intel_connector)
+        gma_connector = kzalloc(sizeof(struct gma_connector), GFP_KERNEL);
+        if (!gma_connector)
                 goto err_connector;
 	intel_dp = kzalloc(sizeof(struct cdv_intel_dp), GFP_KERNEL);
 	if (!intel_dp)
@@ -1821,22 +1821,22 @@
 	if ((output_reg == DP_C) && cdv_intel_dpc_is_edp(dev))
 		type = DRM_MODE_CONNECTOR_eDP;
 
-	connector = &psb_intel_connector->base;
-	encoder = &psb_intel_encoder->base;
+	connector = &gma_connector->base;
+	encoder = &gma_encoder->base;
 
 	drm_connector_init(dev, connector, &cdv_intel_dp_connector_funcs, type);
 	drm_encoder_init(dev, encoder, &cdv_intel_dp_enc_funcs, DRM_MODE_ENCODER_TMDS);
 
-	psb_intel_connector_attach_encoder(psb_intel_connector, psb_intel_encoder);
+	gma_connector_attach_encoder(gma_connector, gma_encoder);
 
 	if (type == DRM_MODE_CONNECTOR_DisplayPort)
-        	psb_intel_encoder->type = INTEL_OUTPUT_DISPLAYPORT;
+		gma_encoder->type = INTEL_OUTPUT_DISPLAYPORT;
         else
-		psb_intel_encoder->type = INTEL_OUTPUT_EDP;
+		gma_encoder->type = INTEL_OUTPUT_EDP;
 
 
-	psb_intel_encoder->dev_priv=intel_dp;
-	intel_dp->encoder = psb_intel_encoder;
+	gma_encoder->dev_priv=intel_dp;
+	intel_dp->encoder = gma_encoder;
 	intel_dp->output_reg = output_reg;
 	
 	drm_encoder_helper_add(encoder, &cdv_intel_dp_helper_funcs);
@@ -1852,21 +1852,21 @@
 	switch (output_reg) {
 		case DP_B:
 			name = "DPDDC-B";
-			psb_intel_encoder->ddi_select = (DP_MASK | DDI0_SELECT);
+			gma_encoder->ddi_select = (DP_MASK | DDI0_SELECT);
 			break;
 		case DP_C:
 			name = "DPDDC-C";
-			psb_intel_encoder->ddi_select = (DP_MASK | DDI1_SELECT);
+			gma_encoder->ddi_select = (DP_MASK | DDI1_SELECT);
 			break;
 	}
 
 	cdv_disable_intel_clock_gating(dev);
 
-	cdv_intel_dp_i2c_init(psb_intel_connector, psb_intel_encoder, name);
+	cdv_intel_dp_i2c_init(gma_connector, gma_encoder, name);
         /* FIXME:fail check */
 	cdv_intel_dp_add_properties(connector);
 
-	if (is_edp(psb_intel_encoder)) {
+	if (is_edp(gma_encoder)) {
 		int ret;
 		struct edp_power_seq cur;
                 u32 pp_on, pp_off, pp_div;
@@ -1920,11 +1920,11 @@
                               intel_dp->backlight_on_delay, intel_dp->backlight_off_delay);
 
 
-		cdv_intel_edp_panel_vdd_on(psb_intel_encoder);
-		ret = cdv_intel_dp_aux_native_read(psb_intel_encoder, DP_DPCD_REV,
+		cdv_intel_edp_panel_vdd_on(gma_encoder);
+		ret = cdv_intel_dp_aux_native_read(gma_encoder, DP_DPCD_REV,
 					       intel_dp->dpcd,
 					       sizeof(intel_dp->dpcd));
-		cdv_intel_edp_panel_vdd_off(psb_intel_encoder);
+		cdv_intel_edp_panel_vdd_off(gma_encoder);
 		if (ret == 0) {
 			/* if this fails, presume the device is a ghost */
 			DRM_INFO("failed to retrieve link info, disabling eDP\n");
@@ -1945,7 +1945,7 @@
 	return;
 
 err_priv:
-	kfree(psb_intel_connector);
+	kfree(gma_connector);
 err_connector:
-	kfree(psb_intel_encoder);
+	kfree(gma_encoder);
 }
diff --git a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
index 464153d..1c0d723 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
@@ -64,11 +64,11 @@
 			struct drm_display_mode *adjusted_mode)
 {
 	struct drm_device *dev = encoder->dev;
-	struct psb_intel_encoder *psb_intel_encoder = to_psb_intel_encoder(encoder);
-	struct mid_intel_hdmi_priv *hdmi_priv = psb_intel_encoder->dev_priv;
+	struct gma_encoder *gma_encoder = to_gma_encoder(encoder);
+	struct mid_intel_hdmi_priv *hdmi_priv = gma_encoder->dev_priv;
 	u32 hdmib;
 	struct drm_crtc *crtc = encoder->crtc;
-	struct psb_intel_crtc *intel_crtc = to_psb_intel_crtc(crtc);
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
 
 	hdmib = (2 << 10);
 
@@ -77,7 +77,7 @@
 	if (adjusted_mode->flags & DRM_MODE_FLAG_PHSYNC)
 		hdmib |= HDMI_HSYNC_ACTIVE_HIGH;
 
-	if (intel_crtc->pipe == 1)
+	if (gma_crtc->pipe == 1)
 		hdmib |= HDMIB_PIPE_B_SELECT;
 
 	if (hdmi_priv->has_hdmi_audio) {
@@ -99,9 +99,8 @@
 static void cdv_hdmi_dpms(struct drm_encoder *encoder, int mode)
 {
 	struct drm_device *dev = encoder->dev;
-	struct psb_intel_encoder *psb_intel_encoder =
-						to_psb_intel_encoder(encoder);
-	struct mid_intel_hdmi_priv *hdmi_priv = psb_intel_encoder->dev_priv;
+	struct gma_encoder *gma_encoder = to_gma_encoder(encoder);
+	struct mid_intel_hdmi_priv *hdmi_priv = gma_encoder->dev_priv;
 	u32 hdmib;
 
 	hdmib = REG_READ(hdmi_priv->hdmi_reg);
@@ -116,9 +115,8 @@
 static void cdv_hdmi_save(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
-	struct psb_intel_encoder *psb_intel_encoder =
-					psb_intel_attached_encoder(connector);
-	struct mid_intel_hdmi_priv *hdmi_priv = psb_intel_encoder->dev_priv;
+	struct gma_encoder *gma_encoder = gma_attached_encoder(connector);
+	struct mid_intel_hdmi_priv *hdmi_priv = gma_encoder->dev_priv;
 
 	hdmi_priv->save_HDMIB = REG_READ(hdmi_priv->hdmi_reg);
 }
@@ -126,9 +124,8 @@
 static void cdv_hdmi_restore(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
-	struct psb_intel_encoder *psb_intel_encoder =
-					psb_intel_attached_encoder(connector);
-	struct mid_intel_hdmi_priv *hdmi_priv = psb_intel_encoder->dev_priv;
+	struct gma_encoder *gma_encoder = gma_attached_encoder(connector);
+	struct mid_intel_hdmi_priv *hdmi_priv = gma_encoder->dev_priv;
 
 	REG_WRITE(hdmi_priv->hdmi_reg, hdmi_priv->save_HDMIB);
 	REG_READ(hdmi_priv->hdmi_reg);
@@ -137,13 +134,12 @@
 static enum drm_connector_status cdv_hdmi_detect(
 				struct drm_connector *connector, bool force)
 {
-	struct psb_intel_encoder *psb_intel_encoder =
-					psb_intel_attached_encoder(connector);
-	struct mid_intel_hdmi_priv *hdmi_priv = psb_intel_encoder->dev_priv;
+	struct gma_encoder *gma_encoder = gma_attached_encoder(connector);
+	struct mid_intel_hdmi_priv *hdmi_priv = gma_encoder->dev_priv;
 	struct edid *edid = NULL;
 	enum drm_connector_status status = connector_status_disconnected;
 
-	edid = drm_get_edid(connector, &psb_intel_encoder->i2c_bus->adapter);
+	edid = drm_get_edid(connector, &gma_encoder->i2c_bus->adapter);
 
 	hdmi_priv->has_hdmi_sink = false;
 	hdmi_priv->has_hdmi_audio = false;
@@ -167,7 +163,7 @@
 	struct drm_encoder *encoder = connector->encoder;
 
 	if (!strcmp(property->name, "scaling mode") && encoder) {
-		struct psb_intel_crtc *crtc = to_psb_intel_crtc(encoder->crtc);
+		struct gma_crtc *crtc = to_gma_crtc(encoder->crtc);
 		bool centre;
 		uint64_t curValue;
 
@@ -221,12 +217,11 @@
  */
 static int cdv_hdmi_get_modes(struct drm_connector *connector)
 {
-	struct psb_intel_encoder *psb_intel_encoder =
-					psb_intel_attached_encoder(connector);
+	struct gma_encoder *gma_encoder = gma_attached_encoder(connector);
 	struct edid *edid = NULL;
 	int ret = 0;
 
-	edid = drm_get_edid(connector, &psb_intel_encoder->i2c_bus->adapter);
+	edid = drm_get_edid(connector, &gma_encoder->i2c_bus->adapter);
 	if (edid) {
 		drm_mode_connector_update_edid_property(connector, edid);
 		ret = drm_add_edid_modes(connector, edid);
@@ -256,11 +251,10 @@
 
 static void cdv_hdmi_destroy(struct drm_connector *connector)
 {
-	struct psb_intel_encoder *psb_intel_encoder =
-					psb_intel_attached_encoder(connector);
+	struct gma_encoder *gma_encoder = gma_attached_encoder(connector);
 
-	if (psb_intel_encoder->i2c_bus)
-		psb_intel_i2c_destroy(psb_intel_encoder->i2c_bus);
+	if (gma_encoder->i2c_bus)
+		psb_intel_i2c_destroy(gma_encoder->i2c_bus);
 	drm_sysfs_connector_remove(connector);
 	drm_connector_cleanup(connector);
 	kfree(connector);
@@ -269,16 +263,16 @@
 static const struct drm_encoder_helper_funcs cdv_hdmi_helper_funcs = {
 	.dpms = cdv_hdmi_dpms,
 	.mode_fixup = cdv_hdmi_mode_fixup,
-	.prepare = psb_intel_encoder_prepare,
+	.prepare = gma_encoder_prepare,
 	.mode_set = cdv_hdmi_mode_set,
-	.commit = psb_intel_encoder_commit,
+	.commit = gma_encoder_commit,
 };
 
 static const struct drm_connector_helper_funcs
 					cdv_hdmi_connector_helper_funcs = {
 	.get_modes = cdv_hdmi_get_modes,
 	.mode_valid = cdv_hdmi_mode_valid,
-	.best_encoder = psb_intel_best_encoder,
+	.best_encoder = gma_best_encoder,
 };
 
 static const struct drm_connector_funcs cdv_hdmi_connector_funcs = {
@@ -294,23 +288,22 @@
 void cdv_hdmi_init(struct drm_device *dev,
 			struct psb_intel_mode_device *mode_dev, int reg)
 {
-	struct psb_intel_encoder *psb_intel_encoder;
-	struct psb_intel_connector *psb_intel_connector;
+	struct gma_encoder *gma_encoder;
+	struct gma_connector *gma_connector;
 	struct drm_connector *connector;
 	struct drm_encoder *encoder;
 	struct mid_intel_hdmi_priv *hdmi_priv;
 	int ddc_bus;
 
-	psb_intel_encoder = kzalloc(sizeof(struct psb_intel_encoder),
-				    GFP_KERNEL);
+	gma_encoder = kzalloc(sizeof(struct gma_encoder), GFP_KERNEL);
 
-	if (!psb_intel_encoder)
+	if (!gma_encoder)
 		return;
 
-	psb_intel_connector = kzalloc(sizeof(struct psb_intel_connector),
+	gma_connector = kzalloc(sizeof(struct gma_connector),
 				      GFP_KERNEL);
 
-	if (!psb_intel_connector)
+	if (!gma_connector)
 		goto err_connector;
 
 	hdmi_priv = kzalloc(sizeof(struct mid_intel_hdmi_priv), GFP_KERNEL);
@@ -318,9 +311,9 @@
 	if (!hdmi_priv)
 		goto err_priv;
 
-	connector = &psb_intel_connector->base;
+	connector = &gma_connector->base;
 	connector->polled = DRM_CONNECTOR_POLL_HPD;
-	encoder = &psb_intel_encoder->base;
+	encoder = &gma_encoder->base;
 	drm_connector_init(dev, connector,
 			   &cdv_hdmi_connector_funcs,
 			   DRM_MODE_CONNECTOR_DVID);
@@ -328,12 +321,11 @@
 	drm_encoder_init(dev, encoder, &psb_intel_lvds_enc_funcs,
 			 DRM_MODE_ENCODER_TMDS);
 
-	psb_intel_connector_attach_encoder(psb_intel_connector,
-					   psb_intel_encoder);
-	psb_intel_encoder->type = INTEL_OUTPUT_HDMI;
+	gma_connector_attach_encoder(gma_connector, gma_encoder);
+	gma_encoder->type = INTEL_OUTPUT_HDMI;
 	hdmi_priv->hdmi_reg = reg;
 	hdmi_priv->has_hdmi_sink = false;
-	psb_intel_encoder->dev_priv = hdmi_priv;
+	gma_encoder->dev_priv = hdmi_priv;
 
 	drm_encoder_helper_add(encoder, &cdv_hdmi_helper_funcs);
 	drm_connector_helper_add(connector,
@@ -349,11 +341,11 @@
 	switch (reg) {
 	case SDVOB:
 		ddc_bus = GPIOE;
-		psb_intel_encoder->ddi_select = DDI0_SELECT;
+		gma_encoder->ddi_select = DDI0_SELECT;
 		break;
 	case SDVOC:
 		ddc_bus = GPIOD;
-		psb_intel_encoder->ddi_select = DDI1_SELECT;
+		gma_encoder->ddi_select = DDI1_SELECT;
 		break;
 	default:
 		DRM_ERROR("unknown reg 0x%x for HDMI\n", reg);
@@ -361,16 +353,15 @@
 		break;
 	}
 
-	psb_intel_encoder->i2c_bus = psb_intel_i2c_create(dev,
+	gma_encoder->i2c_bus = psb_intel_i2c_create(dev,
 				ddc_bus, (reg == SDVOB) ? "HDMIB" : "HDMIC");
 
-	if (!psb_intel_encoder->i2c_bus) {
+	if (!gma_encoder->i2c_bus) {
 		dev_err(dev->dev, "No ddc adapter available!\n");
 		goto failed_ddc;
 	}
 
-	hdmi_priv->hdmi_i2c_adapter =
-				&(psb_intel_encoder->i2c_bus->adapter);
+	hdmi_priv->hdmi_i2c_adapter = &(gma_encoder->i2c_bus->adapter);
 	hdmi_priv->dev = dev;
 	drm_sysfs_connector_add(connector);
 	return;
@@ -379,7 +370,7 @@
 	drm_encoder_cleanup(encoder);
 	drm_connector_cleanup(connector);
 err_priv:
-	kfree(psb_intel_connector);
+	kfree(gma_connector);
 err_connector:
-	kfree(psb_intel_encoder);
+	kfree(gma_encoder);
 }
diff --git a/drivers/gpu/drm/gma500/cdv_intel_lvds.c b/drivers/gpu/drm/gma500/cdv_intel_lvds.c
index d81dbc3..20e08e6 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_lvds.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_lvds.c
@@ -356,8 +356,7 @@
 {
 	struct drm_device *dev = encoder->dev;
 	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(
-							encoder->crtc);
+	struct gma_crtc *gma_crtc = to_gma_crtc(encoder->crtc);
 	u32 pfit_control;
 
 	/*
@@ -379,7 +378,7 @@
 	else
 		pfit_control = 0;
 
-	pfit_control |= psb_intel_crtc->pipe << PFIT_PIPE_SHIFT;
+	pfit_control |= gma_crtc->pipe << PFIT_PIPE_SHIFT;
 
 	if (dev_priv->lvds_dither)
 		pfit_control |= PANEL_8TO6_DITHER_ENABLE;
@@ -407,12 +406,11 @@
 {
 	struct drm_device *dev = connector->dev;
 	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct psb_intel_encoder *psb_intel_encoder =
-					psb_intel_attached_encoder(connector);
+	struct gma_encoder *gma_encoder = gma_attached_encoder(connector);
 	struct psb_intel_mode_device *mode_dev = &dev_priv->mode_dev;
 	int ret;
 
-	ret = psb_intel_ddc_get_modes(connector, &psb_intel_encoder->i2c_bus->adapter);
+	ret = psb_intel_ddc_get_modes(connector, &gma_encoder->i2c_bus->adapter);
 
 	if (ret)
 		return ret;
@@ -444,11 +442,10 @@
  */
 static void cdv_intel_lvds_destroy(struct drm_connector *connector)
 {
-	struct psb_intel_encoder *psb_intel_encoder =
-					psb_intel_attached_encoder(connector);
+	struct gma_encoder *gma_encoder = gma_attached_encoder(connector);
 
-	if (psb_intel_encoder->i2c_bus)
-		psb_intel_i2c_destroy(psb_intel_encoder->i2c_bus);
+	if (gma_encoder->i2c_bus)
+		psb_intel_i2c_destroy(gma_encoder->i2c_bus);
 	drm_sysfs_connector_remove(connector);
 	drm_connector_cleanup(connector);
 	kfree(connector);
@@ -461,8 +458,7 @@
 	struct drm_encoder *encoder = connector->encoder;
 
 	if (!strcmp(property->name, "scaling mode") && encoder) {
-		struct psb_intel_crtc *crtc =
-					to_psb_intel_crtc(encoder->crtc);
+		struct gma_crtc *crtc = to_gma_crtc(encoder->crtc);
 		uint64_t curValue;
 
 		if (!crtc)
@@ -529,7 +525,7 @@
 				cdv_intel_lvds_connector_helper_funcs = {
 	.get_modes = cdv_intel_lvds_get_modes,
 	.mode_valid = cdv_intel_lvds_mode_valid,
-	.best_encoder = psb_intel_best_encoder,
+	.best_encoder = gma_best_encoder,
 };
 
 static const struct drm_connector_funcs cdv_intel_lvds_connector_funcs = {
@@ -612,8 +608,8 @@
 void cdv_intel_lvds_init(struct drm_device *dev,
 		     struct psb_intel_mode_device *mode_dev)
 {
-	struct psb_intel_encoder *psb_intel_encoder;
-	struct psb_intel_connector *psb_intel_connector;
+	struct gma_encoder *gma_encoder;
+	struct gma_connector *gma_connector;
 	struct cdv_intel_lvds_priv *lvds_priv;
 	struct drm_connector *connector;
 	struct drm_encoder *encoder;
@@ -630,24 +626,24 @@
 		return;
 	}
 
-	psb_intel_encoder = kzalloc(sizeof(struct psb_intel_encoder),
+	gma_encoder = kzalloc(sizeof(struct gma_encoder),
 				    GFP_KERNEL);
-	if (!psb_intel_encoder)
+	if (!gma_encoder)
 		return;
 
-	psb_intel_connector = kzalloc(sizeof(struct psb_intel_connector),
+	gma_connector = kzalloc(sizeof(struct gma_connector),
 				      GFP_KERNEL);
-	if (!psb_intel_connector)
+	if (!gma_connector)
 		goto failed_connector;
 
 	lvds_priv = kzalloc(sizeof(struct cdv_intel_lvds_priv), GFP_KERNEL);
 	if (!lvds_priv)
 		goto failed_lvds_priv;
 
-	psb_intel_encoder->dev_priv = lvds_priv;
+	gma_encoder->dev_priv = lvds_priv;
 
-	connector = &psb_intel_connector->base;
-	encoder = &psb_intel_encoder->base;
+	connector = &gma_connector->base;
+	encoder = &gma_encoder->base;
 
 
 	drm_connector_init(dev, connector,
@@ -659,9 +655,8 @@
 			 DRM_MODE_ENCODER_LVDS);
 
 
-	psb_intel_connector_attach_encoder(psb_intel_connector,
-					   psb_intel_encoder);
-	psb_intel_encoder->type = INTEL_OUTPUT_LVDS;
+	gma_connector_attach_encoder(gma_connector, gma_encoder);
+	gma_encoder->type = INTEL_OUTPUT_LVDS;
 
 	drm_encoder_helper_add(encoder, &cdv_intel_lvds_helper_funcs);
 	drm_connector_helper_add(connector,
@@ -682,16 +677,16 @@
 	 * Set up I2C bus
 	 * FIXME: distroy i2c_bus when exit
 	 */
-	psb_intel_encoder->i2c_bus = psb_intel_i2c_create(dev,
+	gma_encoder->i2c_bus = psb_intel_i2c_create(dev,
 							 GPIOB,
 							 "LVDSBLC_B");
-	if (!psb_intel_encoder->i2c_bus) {
+	if (!gma_encoder->i2c_bus) {
 		dev_printk(KERN_ERR,
 			&dev->pdev->dev, "I2C bus registration failed.\n");
 		goto failed_blc_i2c;
 	}
-	psb_intel_encoder->i2c_bus->slave_addr = 0x2C;
-	dev_priv->lvds_i2c_bus = psb_intel_encoder->i2c_bus;
+	gma_encoder->i2c_bus->slave_addr = 0x2C;
+	dev_priv->lvds_i2c_bus = gma_encoder->i2c_bus;
 
 	/*
 	 * LVDS discovery:
@@ -704,10 +699,10 @@
 	 */
 
 	/* Set up the DDC bus. */
-	psb_intel_encoder->ddc_bus = psb_intel_i2c_create(dev,
+	gma_encoder->ddc_bus = psb_intel_i2c_create(dev,
 							 GPIOC,
 							 "LVDSDDC_C");
-	if (!psb_intel_encoder->ddc_bus) {
+	if (!gma_encoder->ddc_bus) {
 		dev_printk(KERN_ERR, &dev->pdev->dev,
 			   "DDC bus registration " "failed.\n");
 		goto failed_ddc;
@@ -718,7 +713,7 @@
 	 * preferred mode is the right one.
 	 */
 	psb_intel_ddc_get_modes(connector,
-				&psb_intel_encoder->ddc_bus->adapter);
+				&gma_encoder->ddc_bus->adapter);
 	list_for_each_entry(scan, &connector->probed_modes, head) {
 		if (scan->type & DRM_MODE_TYPE_PREFERRED) {
 			mode_dev->panel_fixed_mode =
@@ -782,19 +777,19 @@
 
 failed_find:
 	printk(KERN_ERR "Failed find\n");
-	if (psb_intel_encoder->ddc_bus)
-		psb_intel_i2c_destroy(psb_intel_encoder->ddc_bus);
+	if (gma_encoder->ddc_bus)
+		psb_intel_i2c_destroy(gma_encoder->ddc_bus);
 failed_ddc:
 	printk(KERN_ERR "Failed DDC\n");
-	if (psb_intel_encoder->i2c_bus)
-		psb_intel_i2c_destroy(psb_intel_encoder->i2c_bus);
+	if (gma_encoder->i2c_bus)
+		psb_intel_i2c_destroy(gma_encoder->i2c_bus);
 failed_blc_i2c:
 	printk(KERN_ERR "Failed BLC\n");
 	drm_encoder_cleanup(encoder);
 	drm_connector_cleanup(connector);
 	kfree(lvds_priv);
 failed_lvds_priv:
-	kfree(psb_intel_connector);
+	kfree(gma_connector);
 failed_connector:
-	kfree(psb_intel_encoder);
+	kfree(gma_encoder);
 }
diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c
index 8b1b6d9..01dd7d2 100644
--- a/drivers/gpu/drm/gma500/framebuffer.c
+++ b/drivers/gpu/drm/gma500/framebuffer.c
@@ -321,10 +321,8 @@
 	/* Begin by trying to use stolen memory backing */
 	backing = psb_gtt_alloc_range(dev, aligned_size, "fb", 1);
 	if (backing) {
-		if (drm_gem_private_object_init(dev,
-					&backing->gem, aligned_size) == 0)
-			return backing;
-		psb_gtt_free_range(dev, backing);
+		drm_gem_private_object_init(dev, &backing->gem, aligned_size);
+		return backing;
 	}
 	return NULL;
 }
@@ -522,21 +520,21 @@
 static void psbfb_gamma_set(struct drm_crtc *crtc, u16 red, u16 green,
 							u16 blue, int regno)
 {
-	struct psb_intel_crtc *intel_crtc = to_psb_intel_crtc(crtc);
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
 
-	intel_crtc->lut_r[regno] = red >> 8;
-	intel_crtc->lut_g[regno] = green >> 8;
-	intel_crtc->lut_b[regno] = blue >> 8;
+	gma_crtc->lut_r[regno] = red >> 8;
+	gma_crtc->lut_g[regno] = green >> 8;
+	gma_crtc->lut_b[regno] = blue >> 8;
 }
 
 static void psbfb_gamma_get(struct drm_crtc *crtc, u16 *red,
 					u16 *green, u16 *blue, int regno)
 {
-	struct psb_intel_crtc *intel_crtc = to_psb_intel_crtc(crtc);
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
 
-	*red = intel_crtc->lut_r[regno] << 8;
-	*green = intel_crtc->lut_g[regno] << 8;
-	*blue = intel_crtc->lut_b[regno] << 8;
+	*red = gma_crtc->lut_r[regno] << 8;
+	*green = gma_crtc->lut_g[regno] << 8;
+	*blue = gma_crtc->lut_b[regno] << 8;
 }
 
 static int psbfb_probe(struct drm_fb_helper *helper,
@@ -705,13 +703,12 @@
 
 	list_for_each_entry(connector, &dev->mode_config.connector_list,
 			    head) {
-		struct psb_intel_encoder *psb_intel_encoder =
-			psb_intel_attached_encoder(connector);
-		struct drm_encoder *encoder = &psb_intel_encoder->base;
+		struct gma_encoder *gma_encoder = gma_attached_encoder(connector);
+		struct drm_encoder *encoder = &gma_encoder->base;
 		int crtc_mask = 0, clone_mask = 0;
 
 		/* valid crtcs */
-		switch (psb_intel_encoder->type) {
+		switch (gma_encoder->type) {
 		case INTEL_OUTPUT_ANALOG:
 			crtc_mask = (1 << 0);
 			clone_mask = (1 << INTEL_OUTPUT_ANALOG);
@@ -746,7 +743,7 @@
 		}
 		encoder->possible_crtcs = crtc_mask;
 		encoder->possible_clones =
-		    psb_intel_connector_clones(dev, clone_mask);
+		    gma_connector_clones(dev, clone_mask);
 	}
 }
 
diff --git a/drivers/gpu/drm/gma500/framebuffer.h b/drivers/gpu/drm/gma500/framebuffer.h
index 989558a..395f20b 100644
--- a/drivers/gpu/drm/gma500/framebuffer.h
+++ b/drivers/gpu/drm/gma500/framebuffer.h
@@ -41,7 +41,7 @@
 
 #define to_psb_fb(x) container_of(x, struct psb_framebuffer, base)
 
-extern int psb_intel_connector_clones(struct drm_device *dev, int type_mask);
+extern int gma_connector_clones(struct drm_device *dev, int type_mask);
 
 #endif
 
diff --git a/drivers/gpu/drm/gma500/gem.c b/drivers/gpu/drm/gma500/gem.c
index eefd6cc..10ae8c5 100644
--- a/drivers/gpu/drm/gma500/gem.c
+++ b/drivers/gpu/drm/gma500/gem.c
@@ -26,6 +26,7 @@
 #include <drm/drmP.h>
 #include <drm/drm.h>
 #include <drm/gma_drm.h>
+#include <drm/drm_vma_manager.h>
 #include "psb_drv.h"
 
 int psb_gem_init_object(struct drm_gem_object *obj)
@@ -38,8 +39,7 @@
 	struct gtt_range *gtt = container_of(obj, struct gtt_range, gem);
 
 	/* Remove the list map if one is present */
-	if (obj->map_list.map)
-		drm_gem_free_mmap_offset(obj);
+	drm_gem_free_mmap_offset(obj);
 	drm_gem_object_release(obj);
 
 	/* This must occur last as it frees up the memory of the GEM object */
@@ -81,13 +81,10 @@
 	/* What validation is needed here ? */
 
 	/* Make it mmapable */
-	if (!obj->map_list.map) {
-		ret = drm_gem_create_mmap_offset(obj);
-		if (ret)
-			goto out;
-	}
-	/* GEM should really work out the hash offsets for us */
-	*offset = (u64)obj->map_list.hash.key << PAGE_SHIFT;
+	ret = drm_gem_create_mmap_offset(obj);
+	if (ret)
+		goto out;
+	*offset = drm_vma_node_offset_addr(&obj->vma_node);
 out:
 	drm_gem_object_unreference(obj);
 unlock:
@@ -165,23 +162,6 @@
 }
 
 /**
- *	psb_gem_dumb_destroy	-	destroy a dumb buffer
- *	@file: client file
- *	@dev: our DRM device
- *	@handle: the object handle
- *
- *	Destroy a handle that was created via psb_gem_dumb_create, at least
- *	we hope it was created that way. i915 seems to assume the caller
- *	does the checking but that might be worth review ! FIXME
- */
-int psb_gem_dumb_destroy(struct drm_file *file, struct drm_device *dev,
-			uint32_t handle)
-{
-	/* No special work needed, drop the reference and see what falls out */
-	return drm_gem_handle_delete(file, handle);
-}
-
-/**
  *	psb_gem_fault		-	pagefault handler for GEM objects
  *	@vma: the VMA of the GEM object
  *	@vmf: fault detail
@@ -261,11 +241,12 @@
 	struct gtt_range *gtt = psb_gtt_alloc_range(dev, size, "gem", 1);
 	if (gtt == NULL)
 		return -ENOMEM;
-	if (drm_gem_private_object_init(dev, &gtt->gem, size) != 0)
-		goto free_gtt;
+
+	drm_gem_private_object_init(dev, &gtt->gem, size);
 	if (drm_gem_handle_create(file, &gtt->gem, handle) == 0)
 		return 0;
-free_gtt:
+
+	drm_gem_object_release(&gtt->gem);
 	psb_gtt_free_range(dev, gtt);
 	return -ENOMEM;
 }
diff --git a/drivers/gpu/drm/gma500/gma_display.c b/drivers/gpu/drm/gma500/gma_display.c
new file mode 100644
index 0000000..24e8af3
--- /dev/null
+++ b/drivers/gpu/drm/gma500/gma_display.c
@@ -0,0 +1,776 @@
+/*
+ * Copyright © 2006-2011 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Authors:
+ *	Eric Anholt <eric@anholt.net>
+ *	Patrik Jakobsson <patrik.r.jakobsson@gmail.com>
+ */
+
+#include <drm/drmP.h>
+#include "gma_display.h"
+#include "psb_intel_drv.h"
+#include "psb_intel_reg.h"
+#include "psb_drv.h"
+#include "framebuffer.h"
+
+/**
+ * Returns whether any output on the specified pipe is of the specified type
+ */
+bool gma_pipe_has_type(struct drm_crtc *crtc, int type)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_mode_config *mode_config = &dev->mode_config;
+	struct drm_connector *l_entry;
+
+	list_for_each_entry(l_entry, &mode_config->connector_list, head) {
+		if (l_entry->encoder && l_entry->encoder->crtc == crtc) {
+			struct gma_encoder *gma_encoder =
+						gma_attached_encoder(l_entry);
+			if (gma_encoder->type == type)
+				return true;
+		}
+	}
+
+	return false;
+}
+
+void gma_wait_for_vblank(struct drm_device *dev)
+{
+	/* Wait for 20ms, i.e. one cycle at 50hz. */
+	mdelay(20);
+}
+
+int gma_pipe_set_base(struct drm_crtc *crtc, int x, int y,
+		      struct drm_framebuffer *old_fb)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_psb_private *dev_priv = dev->dev_private;
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
+	struct psb_framebuffer *psbfb = to_psb_fb(crtc->fb);
+	int pipe = gma_crtc->pipe;
+	const struct psb_offset *map = &dev_priv->regmap[pipe];
+	unsigned long start, offset;
+	u32 dspcntr;
+	int ret = 0;
+
+	if (!gma_power_begin(dev, true))
+		return 0;
+
+	/* no fb bound */
+	if (!crtc->fb) {
+		dev_err(dev->dev, "No FB bound\n");
+		goto gma_pipe_cleaner;
+	}
+
+	/* We are displaying this buffer, make sure it is actually loaded
+	   into the GTT */
+	ret = psb_gtt_pin(psbfb->gtt);
+	if (ret < 0)
+		goto gma_pipe_set_base_exit;
+	start = psbfb->gtt->offset;
+	offset = y * crtc->fb->pitches[0] + x * (crtc->fb->bits_per_pixel / 8);
+
+	REG_WRITE(map->stride, crtc->fb->pitches[0]);
+
+	dspcntr = REG_READ(map->cntr);
+	dspcntr &= ~DISPPLANE_PIXFORMAT_MASK;
+
+	switch (crtc->fb->bits_per_pixel) {
+	case 8:
+		dspcntr |= DISPPLANE_8BPP;
+		break;
+	case 16:
+		if (crtc->fb->depth == 15)
+			dspcntr |= DISPPLANE_15_16BPP;
+		else
+			dspcntr |= DISPPLANE_16BPP;
+		break;
+	case 24:
+	case 32:
+		dspcntr |= DISPPLANE_32BPP_NO_ALPHA;
+		break;
+	default:
+		dev_err(dev->dev, "Unknown color depth\n");
+		ret = -EINVAL;
+		goto gma_pipe_set_base_exit;
+	}
+	REG_WRITE(map->cntr, dspcntr);
+
+	dev_dbg(dev->dev,
+		"Writing base %08lX %08lX %d %d\n", start, offset, x, y);
+
+	/* FIXME: Investigate whether this really is the base for psb and why
+		  the linear offset is named base for the other chips. map->surf
+		  should be the base and map->linoff the offset for all chips */
+	if (IS_PSB(dev)) {
+		REG_WRITE(map->base, offset + start);
+		REG_READ(map->base);
+	} else {
+		REG_WRITE(map->base, offset);
+		REG_READ(map->base);
+		REG_WRITE(map->surf, start);
+		REG_READ(map->surf);
+	}
+
+gma_pipe_cleaner:
+	/* If there was a previous display we can now unpin it */
+	if (old_fb)
+		psb_gtt_unpin(to_psb_fb(old_fb)->gtt);
+
+gma_pipe_set_base_exit:
+	gma_power_end(dev);
+	return ret;
+}
+
+/* Loads the palette/gamma unit for the CRTC with the prepared values */
+void gma_crtc_load_lut(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_psb_private *dev_priv = dev->dev_private;
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
+	const struct psb_offset *map = &dev_priv->regmap[gma_crtc->pipe];
+	int palreg = map->palette;
+	int i;
+
+	/* The clocks have to be on to load the palette. */
+	if (!crtc->enabled)
+		return;
+
+	if (gma_power_begin(dev, false)) {
+		for (i = 0; i < 256; i++) {
+			REG_WRITE(palreg + 4 * i,
+				  ((gma_crtc->lut_r[i] +
+				  gma_crtc->lut_adj[i]) << 16) |
+				  ((gma_crtc->lut_g[i] +
+				  gma_crtc->lut_adj[i]) << 8) |
+				  (gma_crtc->lut_b[i] +
+				  gma_crtc->lut_adj[i]));
+		}
+		gma_power_end(dev);
+	} else {
+		for (i = 0; i < 256; i++) {
+			/* FIXME: Why pipe[0] and not pipe[..._crtc->pipe]? */
+			dev_priv->regs.pipe[0].palette[i] =
+				  ((gma_crtc->lut_r[i] +
+				  gma_crtc->lut_adj[i]) << 16) |
+				  ((gma_crtc->lut_g[i] +
+				  gma_crtc->lut_adj[i]) << 8) |
+				  (gma_crtc->lut_b[i] +
+				  gma_crtc->lut_adj[i]);
+		}
+
+	}
+}
+
+void gma_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, u16 *blue,
+			u32 start, u32 size)
+{
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
+	int i;
+	int end = (start + size > 256) ? 256 : start + size;
+
+	for (i = start; i < end; i++) {
+		gma_crtc->lut_r[i] = red[i] >> 8;
+		gma_crtc->lut_g[i] = green[i] >> 8;
+		gma_crtc->lut_b[i] = blue[i] >> 8;
+	}
+
+	gma_crtc_load_lut(crtc);
+}
+
+/**
+ * Sets the power management mode of the pipe and plane.
+ *
+ * This code should probably grow support for turning the cursor off and back
+ * on appropriately at the same time as we're turning the pipe off/on.
+ */
+void gma_crtc_dpms(struct drm_crtc *crtc, int mode)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_psb_private *dev_priv = dev->dev_private;
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
+	int pipe = gma_crtc->pipe;
+	const struct psb_offset *map = &dev_priv->regmap[pipe];
+	u32 temp;
+
+	/* XXX: When our outputs are all unaware of DPMS modes other than off
+	 * and on, we should map those modes to DRM_MODE_DPMS_OFF in the CRTC.
+	 */
+
+	if (IS_CDV(dev))
+		dev_priv->ops->disable_sr(dev);
+
+	switch (mode) {
+	case DRM_MODE_DPMS_ON:
+	case DRM_MODE_DPMS_STANDBY:
+	case DRM_MODE_DPMS_SUSPEND:
+		if (gma_crtc->active)
+			break;
+
+		gma_crtc->active = true;
+
+		/* Enable the DPLL */
+		temp = REG_READ(map->dpll);
+		if ((temp & DPLL_VCO_ENABLE) == 0) {
+			REG_WRITE(map->dpll, temp);
+			REG_READ(map->dpll);
+			/* Wait for the clocks to stabilize. */
+			udelay(150);
+			REG_WRITE(map->dpll, temp | DPLL_VCO_ENABLE);
+			REG_READ(map->dpll);
+			/* Wait for the clocks to stabilize. */
+			udelay(150);
+			REG_WRITE(map->dpll, temp | DPLL_VCO_ENABLE);
+			REG_READ(map->dpll);
+			/* Wait for the clocks to stabilize. */
+			udelay(150);
+		}
+
+		/* Enable the plane */
+		temp = REG_READ(map->cntr);
+		if ((temp & DISPLAY_PLANE_ENABLE) == 0) {
+			REG_WRITE(map->cntr,
+				  temp | DISPLAY_PLANE_ENABLE);
+			/* Flush the plane changes */
+			REG_WRITE(map->base, REG_READ(map->base));
+		}
+
+		udelay(150);
+
+		/* Enable the pipe */
+		temp = REG_READ(map->conf);
+		if ((temp & PIPEACONF_ENABLE) == 0)
+			REG_WRITE(map->conf, temp | PIPEACONF_ENABLE);
+
+		temp = REG_READ(map->status);
+		temp &= ~(0xFFFF);
+		temp |= PIPE_FIFO_UNDERRUN;
+		REG_WRITE(map->status, temp);
+		REG_READ(map->status);
+
+		gma_crtc_load_lut(crtc);
+
+		/* Give the overlay scaler a chance to enable
+		 * if it's on this pipe */
+		/* psb_intel_crtc_dpms_video(crtc, true); TODO */
+		break;
+	case DRM_MODE_DPMS_OFF:
+		if (!gma_crtc->active)
+			break;
+
+		gma_crtc->active = false;
+
+		/* Give the overlay scaler a chance to disable
+		 * if it's on this pipe */
+		/* psb_intel_crtc_dpms_video(crtc, FALSE); TODO */
+
+		/* Disable the VGA plane that we never use */
+		REG_WRITE(VGACNTRL, VGA_DISP_DISABLE);
+
+		/* Turn off vblank interrupts */
+		drm_vblank_off(dev, pipe);
+
+		/* Wait for vblank for the disable to take effect */
+		gma_wait_for_vblank(dev);
+
+		/* Disable plane */
+		temp = REG_READ(map->cntr);
+		if ((temp & DISPLAY_PLANE_ENABLE) != 0) {
+			REG_WRITE(map->cntr,
+				  temp & ~DISPLAY_PLANE_ENABLE);
+			/* Flush the plane changes */
+			REG_WRITE(map->base, REG_READ(map->base));
+			REG_READ(map->base);
+		}
+
+		/* Disable pipe */
+		temp = REG_READ(map->conf);
+		if ((temp & PIPEACONF_ENABLE) != 0) {
+			REG_WRITE(map->conf, temp & ~PIPEACONF_ENABLE);
+			REG_READ(map->conf);
+		}
+
+		/* Wait for vblank for the disable to take effect. */
+		gma_wait_for_vblank(dev);
+
+		udelay(150);
+
+		/* Disable DPLL */
+		temp = REG_READ(map->dpll);
+		if ((temp & DPLL_VCO_ENABLE) != 0) {
+			REG_WRITE(map->dpll, temp & ~DPLL_VCO_ENABLE);
+			REG_READ(map->dpll);
+		}
+
+		/* Wait for the clocks to turn off. */
+		udelay(150);
+		break;
+	}
+
+	if (IS_CDV(dev))
+		dev_priv->ops->update_wm(dev, crtc);
+
+	/* Set FIFO watermarks */
+	REG_WRITE(DSPARB, 0x3F3E);
+}
+
+int gma_crtc_cursor_set(struct drm_crtc *crtc,
+			struct drm_file *file_priv,
+			uint32_t handle,
+			uint32_t width, uint32_t height)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_psb_private *dev_priv = dev->dev_private;
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
+	int pipe = gma_crtc->pipe;
+	uint32_t control = (pipe == 0) ? CURACNTR : CURBCNTR;
+	uint32_t base = (pipe == 0) ? CURABASE : CURBBASE;
+	uint32_t temp;
+	size_t addr = 0;
+	struct gtt_range *gt;
+	struct gtt_range *cursor_gt = gma_crtc->cursor_gt;
+	struct drm_gem_object *obj;
+	void *tmp_dst, *tmp_src;
+	int ret = 0, i, cursor_pages;
+
+	/* If we didn't get a handle then turn the cursor off */
+	if (!handle) {
+		temp = CURSOR_MODE_DISABLE;
+
+		if (gma_power_begin(dev, false)) {
+			REG_WRITE(control, temp);
+			REG_WRITE(base, 0);
+			gma_power_end(dev);
+		}
+
+		/* Unpin the old GEM object */
+		if (gma_crtc->cursor_obj) {
+			gt = container_of(gma_crtc->cursor_obj,
+					  struct gtt_range, gem);
+			psb_gtt_unpin(gt);
+			drm_gem_object_unreference(gma_crtc->cursor_obj);
+			gma_crtc->cursor_obj = NULL;
+		}
+
+		return 0;
+	}
+
+	/* Currently we only support 64x64 cursors */
+	if (width != 64 || height != 64) {
+		dev_dbg(dev->dev, "We currently only support 64x64 cursors\n");
+		return -EINVAL;
+	}
+
+	obj = drm_gem_object_lookup(dev, file_priv, handle);
+	if (!obj)
+		return -ENOENT;
+
+	if (obj->size < width * height * 4) {
+		dev_dbg(dev->dev, "Buffer is too small\n");
+		ret = -ENOMEM;
+		goto unref_cursor;
+	}
+
+	gt = container_of(obj, struct gtt_range, gem);
+
+	/* Pin the memory into the GTT */
+	ret = psb_gtt_pin(gt);
+	if (ret) {
+		dev_err(dev->dev, "Can not pin down handle 0x%x\n", handle);
+		goto unref_cursor;
+	}
+
+	if (dev_priv->ops->cursor_needs_phys) {
+		if (cursor_gt == NULL) {
+			dev_err(dev->dev, "No hardware cursor mem available");
+			ret = -ENOMEM;
+			goto unref_cursor;
+		}
+
+		/* Prevent overflow */
+		if (gt->npage > 4)
+			cursor_pages = 4;
+		else
+			cursor_pages = gt->npage;
+
+		/* Copy the cursor to cursor mem */
+		tmp_dst = dev_priv->vram_addr + cursor_gt->offset;
+		for (i = 0; i < cursor_pages; i++) {
+			tmp_src = kmap(gt->pages[i]);
+			memcpy(tmp_dst, tmp_src, PAGE_SIZE);
+			kunmap(gt->pages[i]);
+			tmp_dst += PAGE_SIZE;
+		}
+
+		addr = gma_crtc->cursor_addr;
+	} else {
+		addr = gt->offset;
+		gma_crtc->cursor_addr = addr;
+	}
+
+	temp = 0;
+	/* set the pipe for the cursor */
+	temp |= (pipe << 28);
+	temp |= CURSOR_MODE_64_ARGB_AX | MCURSOR_GAMMA_ENABLE;
+
+	if (gma_power_begin(dev, false)) {
+		REG_WRITE(control, temp);
+		REG_WRITE(base, addr);
+		gma_power_end(dev);
+	}
+
+	/* unpin the old bo */
+	if (gma_crtc->cursor_obj) {
+		gt = container_of(gma_crtc->cursor_obj, struct gtt_range, gem);
+		psb_gtt_unpin(gt);
+		drm_gem_object_unreference(gma_crtc->cursor_obj);
+	}
+
+	gma_crtc->cursor_obj = obj;
+	return ret;
+
+unref_cursor:
+	drm_gem_object_unreference(obj);
+	return ret;
+}
+
+int gma_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
+{
+	struct drm_device *dev = crtc->dev;
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
+	int pipe = gma_crtc->pipe;
+	uint32_t temp = 0;
+	uint32_t addr;
+
+	if (x < 0) {
+		temp |= (CURSOR_POS_SIGN << CURSOR_X_SHIFT);
+		x = -x;
+	}
+	if (y < 0) {
+		temp |= (CURSOR_POS_SIGN << CURSOR_Y_SHIFT);
+		y = -y;
+	}
+
+	temp |= ((x & CURSOR_POS_MASK) << CURSOR_X_SHIFT);
+	temp |= ((y & CURSOR_POS_MASK) << CURSOR_Y_SHIFT);
+
+	addr = gma_crtc->cursor_addr;
+
+	if (gma_power_begin(dev, false)) {
+		REG_WRITE((pipe == 0) ? CURAPOS : CURBPOS, temp);
+		REG_WRITE((pipe == 0) ? CURABASE : CURBBASE, addr);
+		gma_power_end(dev);
+	}
+	return 0;
+}
+
+bool gma_crtc_mode_fixup(struct drm_crtc *crtc,
+			 const struct drm_display_mode *mode,
+			 struct drm_display_mode *adjusted_mode)
+{
+	return true;
+}
+
+void gma_crtc_prepare(struct drm_crtc *crtc)
+{
+	struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+	crtc_funcs->dpms(crtc, DRM_MODE_DPMS_OFF);
+}
+
+void gma_crtc_commit(struct drm_crtc *crtc)
+{
+	struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+	crtc_funcs->dpms(crtc, DRM_MODE_DPMS_ON);
+}
+
+void gma_crtc_disable(struct drm_crtc *crtc)
+{
+	struct gtt_range *gt;
+	struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+
+	crtc_funcs->dpms(crtc, DRM_MODE_DPMS_OFF);
+
+	if (crtc->fb) {
+		gt = to_psb_fb(crtc->fb)->gtt;
+		psb_gtt_unpin(gt);
+	}
+}
+
+void gma_crtc_destroy(struct drm_crtc *crtc)
+{
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
+
+	kfree(gma_crtc->crtc_state);
+	drm_crtc_cleanup(crtc);
+	kfree(gma_crtc);
+}
+
+int gma_crtc_set_config(struct drm_mode_set *set)
+{
+	struct drm_device *dev = set->crtc->dev;
+	struct drm_psb_private *dev_priv = dev->dev_private;
+	int ret;
+
+	if (!dev_priv->rpm_enabled)
+		return drm_crtc_helper_set_config(set);
+
+	pm_runtime_forbid(&dev->pdev->dev);
+	ret = drm_crtc_helper_set_config(set);
+	pm_runtime_allow(&dev->pdev->dev);
+
+	return ret;
+}
+
+/**
+ * Save HW states of given crtc
+ */
+void gma_crtc_save(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_psb_private *dev_priv = dev->dev_private;
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
+	struct psb_intel_crtc_state *crtc_state = gma_crtc->crtc_state;
+	const struct psb_offset *map = &dev_priv->regmap[gma_crtc->pipe];
+	uint32_t palette_reg;
+	int i;
+
+	if (!crtc_state) {
+		dev_err(dev->dev, "No CRTC state found\n");
+		return;
+	}
+
+	crtc_state->saveDSPCNTR = REG_READ(map->cntr);
+	crtc_state->savePIPECONF = REG_READ(map->conf);
+	crtc_state->savePIPESRC = REG_READ(map->src);
+	crtc_state->saveFP0 = REG_READ(map->fp0);
+	crtc_state->saveFP1 = REG_READ(map->fp1);
+	crtc_state->saveDPLL = REG_READ(map->dpll);
+	crtc_state->saveHTOTAL = REG_READ(map->htotal);
+	crtc_state->saveHBLANK = REG_READ(map->hblank);
+	crtc_state->saveHSYNC = REG_READ(map->hsync);
+	crtc_state->saveVTOTAL = REG_READ(map->vtotal);
+	crtc_state->saveVBLANK = REG_READ(map->vblank);
+	crtc_state->saveVSYNC = REG_READ(map->vsync);
+	crtc_state->saveDSPSTRIDE = REG_READ(map->stride);
+
+	/* NOTE: DSPSIZE DSPPOS only for psb */
+	crtc_state->saveDSPSIZE = REG_READ(map->size);
+	crtc_state->saveDSPPOS = REG_READ(map->pos);
+
+	crtc_state->saveDSPBASE = REG_READ(map->base);
+
+	palette_reg = map->palette;
+	for (i = 0; i < 256; ++i)
+		crtc_state->savePalette[i] = REG_READ(palette_reg + (i << 2));
+}
+
+/**
+ * Restore HW states of given crtc
+ */
+void gma_crtc_restore(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_psb_private *dev_priv = dev->dev_private;
+	struct gma_crtc *gma_crtc =  to_gma_crtc(crtc);
+	struct psb_intel_crtc_state *crtc_state = gma_crtc->crtc_state;
+	const struct psb_offset *map = &dev_priv->regmap[gma_crtc->pipe];
+	uint32_t palette_reg;
+	int i;
+
+	if (!crtc_state) {
+		dev_err(dev->dev, "No crtc state\n");
+		return;
+	}
+
+	if (crtc_state->saveDPLL & DPLL_VCO_ENABLE) {
+		REG_WRITE(map->dpll,
+			crtc_state->saveDPLL & ~DPLL_VCO_ENABLE);
+		REG_READ(map->dpll);
+		udelay(150);
+	}
+
+	REG_WRITE(map->fp0, crtc_state->saveFP0);
+	REG_READ(map->fp0);
+
+	REG_WRITE(map->fp1, crtc_state->saveFP1);
+	REG_READ(map->fp1);
+
+	REG_WRITE(map->dpll, crtc_state->saveDPLL);
+	REG_READ(map->dpll);
+	udelay(150);
+
+	REG_WRITE(map->htotal, crtc_state->saveHTOTAL);
+	REG_WRITE(map->hblank, crtc_state->saveHBLANK);
+	REG_WRITE(map->hsync, crtc_state->saveHSYNC);
+	REG_WRITE(map->vtotal, crtc_state->saveVTOTAL);
+	REG_WRITE(map->vblank, crtc_state->saveVBLANK);
+	REG_WRITE(map->vsync, crtc_state->saveVSYNC);
+	REG_WRITE(map->stride, crtc_state->saveDSPSTRIDE);
+
+	REG_WRITE(map->size, crtc_state->saveDSPSIZE);
+	REG_WRITE(map->pos, crtc_state->saveDSPPOS);
+
+	REG_WRITE(map->src, crtc_state->savePIPESRC);
+	REG_WRITE(map->base, crtc_state->saveDSPBASE);
+	REG_WRITE(map->conf, crtc_state->savePIPECONF);
+
+	gma_wait_for_vblank(dev);
+
+	REG_WRITE(map->cntr, crtc_state->saveDSPCNTR);
+	REG_WRITE(map->base, crtc_state->saveDSPBASE);
+
+	gma_wait_for_vblank(dev);
+
+	palette_reg = map->palette;
+	for (i = 0; i < 256; ++i)
+		REG_WRITE(palette_reg + (i << 2), crtc_state->savePalette[i]);
+}
+
+void gma_encoder_prepare(struct drm_encoder *encoder)
+{
+	struct drm_encoder_helper_funcs *encoder_funcs =
+	    encoder->helper_private;
+	/* lvds has its own version of prepare see psb_intel_lvds_prepare */
+	encoder_funcs->dpms(encoder, DRM_MODE_DPMS_OFF);
+}
+
+void gma_encoder_commit(struct drm_encoder *encoder)
+{
+	struct drm_encoder_helper_funcs *encoder_funcs =
+	    encoder->helper_private;
+	/* lvds has its own version of commit see psb_intel_lvds_commit */
+	encoder_funcs->dpms(encoder, DRM_MODE_DPMS_ON);
+}
+
+void gma_encoder_destroy(struct drm_encoder *encoder)
+{
+	struct gma_encoder *intel_encoder = to_gma_encoder(encoder);
+
+	drm_encoder_cleanup(encoder);
+	kfree(intel_encoder);
+}
+
+/* Currently there is only a 1:1 mapping of encoders and connectors */
+struct drm_encoder *gma_best_encoder(struct drm_connector *connector)
+{
+	struct gma_encoder *gma_encoder = gma_attached_encoder(connector);
+
+	return &gma_encoder->base;
+}
+
+void gma_connector_attach_encoder(struct gma_connector *connector,
+				  struct gma_encoder *encoder)
+{
+	connector->encoder = encoder;
+	drm_mode_connector_attach_encoder(&connector->base,
+					  &encoder->base);
+}
+
+#define GMA_PLL_INVALID(s) { /* DRM_ERROR(s); */ return false; }
+
+bool gma_pll_is_valid(struct drm_crtc *crtc,
+		      const struct gma_limit_t *limit,
+		      struct gma_clock_t *clock)
+{
+	if (clock->p1 < limit->p1.min || limit->p1.max < clock->p1)
+		GMA_PLL_INVALID("p1 out of range");
+	if (clock->p < limit->p.min || limit->p.max < clock->p)
+		GMA_PLL_INVALID("p out of range");
+	if (clock->m2 < limit->m2.min || limit->m2.max < clock->m2)
+		GMA_PLL_INVALID("m2 out of range");
+	if (clock->m1 < limit->m1.min || limit->m1.max < clock->m1)
+		GMA_PLL_INVALID("m1 out of range");
+	/* On CDV m1 is always 0 */
+	if (clock->m1 <= clock->m2 && clock->m1 != 0)
+		GMA_PLL_INVALID("m1 <= m2 && m1 != 0");
+	if (clock->m < limit->m.min || limit->m.max < clock->m)
+		GMA_PLL_INVALID("m out of range");
+	if (clock->n < limit->n.min || limit->n.max < clock->n)
+		GMA_PLL_INVALID("n out of range");
+	if (clock->vco < limit->vco.min || limit->vco.max < clock->vco)
+		GMA_PLL_INVALID("vco out of range");
+	/* XXX: We may need to be checking "Dot clock"
+	 * depending on the multiplier, connector, etc.,
+	 * rather than just a single range.
+	 */
+	if (clock->dot < limit->dot.min || limit->dot.max < clock->dot)
+		GMA_PLL_INVALID("dot out of range");
+
+	return true;
+}
+
+bool gma_find_best_pll(const struct gma_limit_t *limit,
+		       struct drm_crtc *crtc, int target, int refclk,
+		       struct gma_clock_t *best_clock)
+{
+	struct drm_device *dev = crtc->dev;
+	const struct gma_clock_funcs *clock_funcs =
+						to_gma_crtc(crtc)->clock_funcs;
+	struct gma_clock_t clock;
+	int err = target;
+
+	if (gma_pipe_has_type(crtc, INTEL_OUTPUT_LVDS) &&
+	    (REG_READ(LVDS) & LVDS_PORT_EN) != 0) {
+		/*
+		 * For LVDS, if the panel is on, just rely on its current
+		 * settings for dual-channel.  We haven't figured out how to
+		 * reliably set up different single/dual channel state, if we
+		 * even can.
+		 */
+		if ((REG_READ(LVDS) & LVDS_CLKB_POWER_MASK) ==
+		    LVDS_CLKB_POWER_UP)
+			clock.p2 = limit->p2.p2_fast;
+		else
+			clock.p2 = limit->p2.p2_slow;
+	} else {
+		if (target < limit->p2.dot_limit)
+			clock.p2 = limit->p2.p2_slow;
+		else
+			clock.p2 = limit->p2.p2_fast;
+	}
+
+	memset(best_clock, 0, sizeof(*best_clock));
+
+	/* m1 is always 0 on CDV so the outmost loop will run just once */
+	for (clock.m1 = limit->m1.min; clock.m1 <= limit->m1.max; clock.m1++) {
+		for (clock.m2 = limit->m2.min;
+		     (clock.m2 < clock.m1 || clock.m1 == 0) &&
+		      clock.m2 <= limit->m2.max; clock.m2++) {
+			for (clock.n = limit->n.min;
+			     clock.n <= limit->n.max; clock.n++) {
+				for (clock.p1 = limit->p1.min;
+				     clock.p1 <= limit->p1.max;
+				     clock.p1++) {
+					int this_err;
+
+					clock_funcs->clock(refclk, &clock);
+
+					if (!clock_funcs->pll_is_valid(crtc,
+								limit, &clock))
+						continue;
+
+					this_err = abs(clock.dot - target);
+					if (this_err < err) {
+						*best_clock = clock;
+						err = this_err;
+					}
+				}
+			}
+		}
+	}
+
+	return err != target;
+}
diff --git a/drivers/gpu/drm/gma500/gma_display.h b/drivers/gpu/drm/gma500/gma_display.h
new file mode 100644
index 0000000..78b9f98
--- /dev/null
+++ b/drivers/gpu/drm/gma500/gma_display.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright © 2006-2011 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Authors:
+ *	Eric Anholt <eric@anholt.net>
+ *	Patrik Jakobsson <patrik.r.jakobsson@gmail.com>
+ */
+
+#ifndef _GMA_DISPLAY_H_
+#define _GMA_DISPLAY_H_
+
+#include <linux/pm_runtime.h>
+
+struct gma_clock_t {
+	/* given values */
+	int n;
+	int m1, m2;
+	int p1, p2;
+	/* derived values */
+	int dot;
+	int vco;
+	int m;
+	int p;
+};
+
+struct gma_range_t {
+	int min, max;
+};
+
+struct gma_p2_t {
+	int dot_limit;
+	int p2_slow, p2_fast;
+};
+
+struct gma_limit_t {
+	struct gma_range_t dot, vco, n, m, m1, m2, p, p1;
+	struct gma_p2_t p2;
+	bool (*find_pll)(const struct gma_limit_t *, struct drm_crtc *,
+			 int target, int refclk,
+			 struct gma_clock_t *best_clock);
+};
+
+struct gma_clock_funcs {
+	void (*clock)(int refclk, struct gma_clock_t *clock);
+	const struct gma_limit_t *(*limit)(struct drm_crtc *crtc, int refclk);
+	bool (*pll_is_valid)(struct drm_crtc *crtc,
+			     const struct gma_limit_t *limit,
+			     struct gma_clock_t *clock);
+};
+
+/* Common pipe related functions */
+extern bool gma_pipe_has_type(struct drm_crtc *crtc, int type);
+extern void gma_wait_for_vblank(struct drm_device *dev);
+extern int gma_pipe_set_base(struct drm_crtc *crtc, int x, int y,
+			     struct drm_framebuffer *old_fb);
+extern int gma_crtc_cursor_set(struct drm_crtc *crtc,
+			       struct drm_file *file_priv,
+			       uint32_t handle,
+			       uint32_t width, uint32_t height);
+extern int gma_crtc_cursor_move(struct drm_crtc *crtc, int x, int y);
+extern void gma_crtc_load_lut(struct drm_crtc *crtc);
+extern void gma_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
+			       u16 *blue, u32 start, u32 size);
+extern void gma_crtc_dpms(struct drm_crtc *crtc, int mode);
+extern bool gma_crtc_mode_fixup(struct drm_crtc *crtc,
+				const struct drm_display_mode *mode,
+				struct drm_display_mode *adjusted_mode);
+extern void gma_crtc_prepare(struct drm_crtc *crtc);
+extern void gma_crtc_commit(struct drm_crtc *crtc);
+extern void gma_crtc_disable(struct drm_crtc *crtc);
+extern void gma_crtc_destroy(struct drm_crtc *crtc);
+extern int gma_crtc_set_config(struct drm_mode_set *set);
+
+extern void gma_crtc_save(struct drm_crtc *crtc);
+extern void gma_crtc_restore(struct drm_crtc *crtc);
+
+extern void gma_encoder_prepare(struct drm_encoder *encoder);
+extern void gma_encoder_commit(struct drm_encoder *encoder);
+extern void gma_encoder_destroy(struct drm_encoder *encoder);
+
+/* Common clock related functions */
+extern const struct gma_limit_t *gma_limit(struct drm_crtc *crtc, int refclk);
+extern void gma_clock(int refclk, struct gma_clock_t *clock);
+extern bool gma_pll_is_valid(struct drm_crtc *crtc,
+			     const struct gma_limit_t *limit,
+			     struct gma_clock_t *clock);
+extern bool gma_find_best_pll(const struct gma_limit_t *limit,
+			      struct drm_crtc *crtc, int target, int refclk,
+			      struct gma_clock_t *best_clock);
+#endif
diff --git a/drivers/gpu/drm/gma500/gtt.c b/drivers/gpu/drm/gma500/gtt.c
index 1f82183..92babac 100644
--- a/drivers/gpu/drm/gma500/gtt.c
+++ b/drivers/gpu/drm/gma500/gtt.c
@@ -196,37 +196,17 @@
  */
 static int psb_gtt_attach_pages(struct gtt_range *gt)
 {
-	struct inode *inode;
-	struct address_space *mapping;
-	int i;
-	struct page *p;
-	int pages = gt->gem.size / PAGE_SIZE;
+	struct page **pages;
 
 	WARN_ON(gt->pages);
 
-	/* This is the shared memory object that backs the GEM resource */
-	inode = file_inode(gt->gem.filp);
-	mapping = inode->i_mapping;
+	pages = drm_gem_get_pages(&gt->gem, 0);
+	if (IS_ERR(pages))
+		return PTR_ERR(pages);
 
-	gt->pages = kmalloc(pages * sizeof(struct page *), GFP_KERNEL);
-	if (gt->pages == NULL)
-		return -ENOMEM;
-	gt->npage = pages;
+	gt->pages = pages;
 
-	for (i = 0; i < pages; i++) {
-		p = shmem_read_mapping_page(mapping, i);
-		if (IS_ERR(p))
-			goto err;
-		gt->pages[i] = p;
-	}
 	return 0;
-
-err:
-	while (i--)
-		page_cache_release(gt->pages[i]);
-	kfree(gt->pages);
-	gt->pages = NULL;
-	return PTR_ERR(p);
 }
 
 /**
@@ -240,13 +220,7 @@
  */
 static void psb_gtt_detach_pages(struct gtt_range *gt)
 {
-	int i;
-	for (i = 0; i < gt->npage; i++) {
-		/* FIXME: do we need to force dirty */
-		set_page_dirty(gt->pages[i]);
-		page_cache_release(gt->pages[i]);
-	}
-	kfree(gt->pages);
+	drm_gem_put_pages(&gt->gem, gt->pages, true, false);
 	gt->pages = NULL;
 }
 
diff --git a/drivers/gpu/drm/gma500/mdfld_dsi_output.c b/drivers/gpu/drm/gma500/mdfld_dsi_output.c
index 3abf831..860a4ee 100644
--- a/drivers/gpu/drm/gma500/mdfld_dsi_output.c
+++ b/drivers/gpu/drm/gma500/mdfld_dsi_output.c
@@ -249,12 +249,11 @@
 	struct drm_encoder *encoder = connector->encoder;
 
 	if (!strcmp(property->name, "scaling mode") && encoder) {
-		struct psb_intel_crtc *psb_crtc =
-					to_psb_intel_crtc(encoder->crtc);
+		struct gma_crtc *gma_crtc = to_gma_crtc(encoder->crtc);
 		bool centerechange;
 		uint64_t val;
 
-		if (!psb_crtc)
+		if (!gma_crtc)
 			goto set_prop_error;
 
 		switch (value) {
@@ -281,11 +280,11 @@
 		centerechange = (val == DRM_MODE_SCALE_NO_SCALE) ||
 			(value == DRM_MODE_SCALE_NO_SCALE);
 
-		if (psb_crtc->saved_mode.hdisplay != 0 &&
-		    psb_crtc->saved_mode.vdisplay != 0) {
+		if (gma_crtc->saved_mode.hdisplay != 0 &&
+		    gma_crtc->saved_mode.vdisplay != 0) {
 			if (centerechange) {
 				if (!drm_crtc_helper_set_mode(encoder->crtc,
-						&psb_crtc->saved_mode,
+						&gma_crtc->saved_mode,
 						encoder->crtc->x,
 						encoder->crtc->y,
 						encoder->crtc->fb))
@@ -294,8 +293,8 @@
 				struct drm_encoder_helper_funcs *funcs =
 						encoder->helper_private;
 				funcs->mode_set(encoder,
-					&psb_crtc->saved_mode,
-					&psb_crtc->saved_adjusted_mode);
+					&gma_crtc->saved_mode,
+					&gma_crtc->saved_adjusted_mode);
 			}
 		}
 	} else if (!strcmp(property->name, "backlight") && encoder) {
diff --git a/drivers/gpu/drm/gma500/mdfld_dsi_output.h b/drivers/gpu/drm/gma500/mdfld_dsi_output.h
index 36eb074..45d5af0 100644
--- a/drivers/gpu/drm/gma500/mdfld_dsi_output.h
+++ b/drivers/gpu/drm/gma500/mdfld_dsi_output.h
@@ -227,7 +227,7 @@
 #define DSI_DPI_DISABLE_BTA					BIT(3)
 
 struct mdfld_dsi_connector {
-	struct psb_intel_connector base;
+	struct gma_connector base;
 
 	int pipe;
 	void *private;
@@ -238,7 +238,7 @@
 };
 
 struct mdfld_dsi_encoder {
-	struct psb_intel_encoder base;
+	struct gma_encoder base;
 	void *private;
 };
 
@@ -269,21 +269,21 @@
 static inline struct mdfld_dsi_connector *mdfld_dsi_connector(
 		struct drm_connector *connector)
 {
-	struct psb_intel_connector *psb_connector;
+	struct gma_connector *gma_connector;
 
-	psb_connector = to_psb_intel_connector(connector);
+	gma_connector = to_gma_connector(connector);
 
-	return container_of(psb_connector, struct mdfld_dsi_connector, base);
+	return container_of(gma_connector, struct mdfld_dsi_connector, base);
 }
 
 static inline struct mdfld_dsi_encoder *mdfld_dsi_encoder(
 		struct drm_encoder *encoder)
 {
-	struct psb_intel_encoder *psb_encoder;
+	struct gma_encoder *gma_encoder;
 
-	psb_encoder = to_psb_intel_encoder(encoder);
+	gma_encoder = to_gma_encoder(encoder);
 
-	return container_of(psb_encoder, struct mdfld_dsi_encoder, base);
+	return container_of(gma_encoder, struct mdfld_dsi_encoder, base);
 }
 
 static inline struct mdfld_dsi_config *
diff --git a/drivers/gpu/drm/gma500/mdfld_intel_display.c b/drivers/gpu/drm/gma500/mdfld_intel_display.c
index 74485dc..321c00a 100644
--- a/drivers/gpu/drm/gma500/mdfld_intel_display.c
+++ b/drivers/gpu/drm/gma500/mdfld_intel_display.c
@@ -23,7 +23,7 @@
 
 #include <drm/drmP.h>
 #include "psb_intel_reg.h"
-#include "psb_intel_display.h"
+#include "gma_display.h"
 #include "framebuffer.h"
 #include "mdfld_output.h"
 #include "mdfld_dsi_output.h"
@@ -65,7 +65,7 @@
 	}
 
 	/* FIXME JLIU7_PO */
-	psb_intel_wait_for_vblank(dev);
+	gma_wait_for_vblank(dev);
 	return;
 
 	/* Wait for for the pipe disable to take effect. */
@@ -93,7 +93,7 @@
 	}
 
 	/* FIXME JLIU7_PO */
-	psb_intel_wait_for_vblank(dev);
+	gma_wait_for_vblank(dev);
 	return;
 
 	/* Wait for for the pipe enable to take effect. */
@@ -104,25 +104,6 @@
 	}
 }
 
-static void psb_intel_crtc_prepare(struct drm_crtc *crtc)
-{
-	struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
-	crtc_funcs->dpms(crtc, DRM_MODE_DPMS_OFF);
-}
-
-static void psb_intel_crtc_commit(struct drm_crtc *crtc)
-{
-	struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
-	crtc_funcs->dpms(crtc, DRM_MODE_DPMS_ON);
-}
-
-static bool psb_intel_crtc_mode_fixup(struct drm_crtc *crtc,
-				  const struct drm_display_mode *mode,
-				  struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 /**
  * Return the pipe currently connected to the panel fitter,
  * or -1 if the panel fitter is not present or not in use
@@ -184,9 +165,9 @@
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
 	struct psb_framebuffer *psbfb = to_psb_fb(crtc->fb);
-	int pipe = psb_intel_crtc->pipe;
+	int pipe = gma_crtc->pipe;
 	const struct psb_offset *map = &dev_priv->regmap[pipe];
 	unsigned long start, offset;
 	u32 dspcntr;
@@ -324,8 +305,8 @@
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
-	int pipe = psb_intel_crtc->pipe;
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
+	int pipe = gma_crtc->pipe;
 	const struct psb_offset *map = &dev_priv->regmap[pipe];
 	u32 pipeconf = dev_priv->pipeconf[pipe];
 	u32 temp;
@@ -436,7 +417,7 @@
 			}
 		}
 
-		psb_intel_crtc_load_lut(crtc);
+		gma_crtc_load_lut(crtc);
 
 		/* Give the overlay scaler a chance to enable
 		   if it's on this pipe */
@@ -611,8 +592,8 @@
 	struct drm_device *dev = crtc->dev;
 	struct drm_psb_private *dev_priv = dev->dev_private;
 
-	if (psb_intel_pipe_has_type(crtc, INTEL_OUTPUT_MIPI)
-	    || psb_intel_pipe_has_type(crtc, INTEL_OUTPUT_MIPI2)) {
+	if (gma_pipe_has_type(crtc, INTEL_OUTPUT_MIPI)
+	    || gma_pipe_has_type(crtc, INTEL_OUTPUT_MIPI2)) {
 		if ((ksel == KSEL_CRYSTAL_19) || (ksel == KSEL_BYPASS_19))
 			limit = &mdfld_limits[MDFLD_LIMT_DSIPLL_19];
 		else if (ksel == KSEL_BYPASS_25)
@@ -624,7 +605,7 @@
 			 (dev_priv->core_freq == 100 ||
 				dev_priv->core_freq == 200))
 			limit = &mdfld_limits[MDFLD_LIMT_DSIPLL_100];
-	} else if (psb_intel_pipe_has_type(crtc, INTEL_OUTPUT_HDMI)) {
+	} else if (gma_pipe_has_type(crtc, INTEL_OUTPUT_HDMI)) {
 		if ((ksel == KSEL_CRYSTAL_19) || (ksel == KSEL_BYPASS_19))
 			limit = &mdfld_limits[MDFLD_LIMT_DPLL_19];
 		else if (ksel == KSEL_BYPASS_25)
@@ -688,9 +669,9 @@
 			      struct drm_framebuffer *old_fb)
 {
 	struct drm_device *dev = crtc->dev;
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
 	struct drm_psb_private *dev_priv = dev->dev_private;
-	int pipe = psb_intel_crtc->pipe;
+	int pipe = gma_crtc->pipe;
 	const struct psb_offset *map = &dev_priv->regmap[pipe];
 	int refclk = 0;
 	int clk_n = 0, clk_p2 = 0, clk_byte = 1, clk = 0, m_conv = 0,
@@ -700,7 +681,7 @@
 	u32 dpll = 0, fp = 0;
 	bool is_mipi = false, is_mipi2 = false, is_hdmi = false;
 	struct drm_mode_config *mode_config = &dev->mode_config;
-	struct psb_intel_encoder *psb_intel_encoder = NULL;
+	struct gma_encoder *gma_encoder = NULL;
 	uint64_t scalingType = DRM_MODE_SCALE_FULLSCREEN;
 	struct drm_encoder *encoder;
 	struct drm_connector *connector;
@@ -749,9 +730,9 @@
 	if (!gma_power_begin(dev, true))
 		return 0;
 
-	memcpy(&psb_intel_crtc->saved_mode, mode,
+	memcpy(&gma_crtc->saved_mode, mode,
 					sizeof(struct drm_display_mode));
-	memcpy(&psb_intel_crtc->saved_adjusted_mode, adjusted_mode,
+	memcpy(&gma_crtc->saved_adjusted_mode, adjusted_mode,
 					sizeof(struct drm_display_mode));
 
 	list_for_each_entry(connector, &mode_config->connector_list, head) {
@@ -766,9 +747,9 @@
 		if (encoder->crtc != crtc)
 			continue;
 
-		psb_intel_encoder = psb_intel_attached_encoder(connector);
+		gma_encoder = gma_attached_encoder(connector);
 
-		switch (psb_intel_encoder->type) {
+		switch (gma_encoder->type) {
 		case INTEL_OUTPUT_MIPI:
 			is_mipi = true;
 			break;
@@ -819,7 +800,7 @@
 
 	REG_WRITE(map->pos, 0);
 
-	if (psb_intel_encoder)
+	if (gma_encoder)
 		drm_object_property_get_value(&connector->base,
 			dev->mode_config.scaling_mode_property, &scalingType);
 
@@ -1034,7 +1015,7 @@
 
 	/* Wait for for the pipe enable to take effect. */
 	REG_WRITE(map->cntr, dev_priv->dspcntr[pipe]);
-	psb_intel_wait_for_vblank(dev);
+	gma_wait_for_vblank(dev);
 
 mrst_crtc_mode_set_exit:
 
@@ -1045,10 +1026,10 @@
 
 const struct drm_crtc_helper_funcs mdfld_helper_funcs = {
 	.dpms = mdfld_crtc_dpms,
-	.mode_fixup = psb_intel_crtc_mode_fixup,
+	.mode_fixup = gma_crtc_mode_fixup,
 	.mode_set = mdfld_crtc_mode_set,
 	.mode_set_base = mdfld__intel_pipe_set_base,
-	.prepare = psb_intel_crtc_prepare,
-	.commit = psb_intel_crtc_commit,
+	.prepare = gma_crtc_prepare,
+	.commit = gma_crtc_commit,
 };
 
diff --git a/drivers/gpu/drm/gma500/oaktrail_crtc.c b/drivers/gpu/drm/gma500/oaktrail_crtc.c
index 3071526..54c9896 100644
--- a/drivers/gpu/drm/gma500/oaktrail_crtc.c
+++ b/drivers/gpu/drm/gma500/oaktrail_crtc.c
@@ -23,7 +23,7 @@
 #include "psb_drv.h"
 #include "psb_intel_drv.h"
 #include "psb_intel_reg.h"
-#include "psb_intel_display.h"
+#include "gma_display.h"
 #include "power.h"
 
 struct psb_intel_range_t {
@@ -88,8 +88,8 @@
 	struct drm_device *dev = crtc->dev;
 	struct drm_psb_private *dev_priv = dev->dev_private;
 
-	if (psb_intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)
-	    || psb_intel_pipe_has_type(crtc, INTEL_OUTPUT_MIPI)) {
+	if (gma_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)
+	    || gma_pipe_has_type(crtc, INTEL_OUTPUT_MIPI)) {
 		switch (dev_priv->core_freq) {
 		case 100:
 			limit = &oaktrail_limits[MRST_LIMIT_LVDS_100L];
@@ -163,8 +163,8 @@
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
-	int pipe = psb_intel_crtc->pipe;
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
+	int pipe = gma_crtc->pipe;
 	const struct psb_offset *map = &dev_priv->regmap[pipe];
 	u32 temp;
 
@@ -212,7 +212,7 @@
 			REG_WRITE(map->base, REG_READ(map->base));
 		}
 
-		psb_intel_crtc_load_lut(crtc);
+		gma_crtc_load_lut(crtc);
 
 		/* Give the overlay scaler a chance to enable
 		   if it's on this pipe */
@@ -242,7 +242,7 @@
 			REG_READ(map->conf);
 		}
 		/* Wait for for the pipe disable to take effect. */
-		psb_intel_wait_for_vblank(dev);
+		gma_wait_for_vblank(dev);
 
 		temp = REG_READ(map->dpll);
 		if ((temp & DPLL_VCO_ENABLE) != 0) {
@@ -292,9 +292,9 @@
 			      struct drm_framebuffer *old_fb)
 {
 	struct drm_device *dev = crtc->dev;
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
 	struct drm_psb_private *dev_priv = dev->dev_private;
-	int pipe = psb_intel_crtc->pipe;
+	int pipe = gma_crtc->pipe;
 	const struct psb_offset *map = &dev_priv->regmap[pipe];
 	int refclk = 0;
 	struct oaktrail_clock_t clock;
@@ -303,7 +303,7 @@
 	bool is_lvds = false;
 	bool is_mipi = false;
 	struct drm_mode_config *mode_config = &dev->mode_config;
-	struct psb_intel_encoder *psb_intel_encoder = NULL;
+	struct gma_encoder *gma_encoder = NULL;
 	uint64_t scalingType = DRM_MODE_SCALE_FULLSCREEN;
 	struct drm_connector *connector;
 
@@ -313,10 +313,10 @@
 	if (!gma_power_begin(dev, true))
 		return 0;
 
-	memcpy(&psb_intel_crtc->saved_mode,
+	memcpy(&gma_crtc->saved_mode,
 		mode,
 		sizeof(struct drm_display_mode));
-	memcpy(&psb_intel_crtc->saved_adjusted_mode,
+	memcpy(&gma_crtc->saved_adjusted_mode,
 		adjusted_mode,
 		sizeof(struct drm_display_mode));
 
@@ -324,9 +324,9 @@
 		if (!connector->encoder || connector->encoder->crtc != crtc)
 			continue;
 
-		psb_intel_encoder = psb_intel_attached_encoder(connector);
+		gma_encoder = gma_attached_encoder(connector);
 
-		switch (psb_intel_encoder->type) {
+		switch (gma_encoder->type) {
 		case INTEL_OUTPUT_LVDS:
 			is_lvds = true;
 			break;
@@ -350,7 +350,7 @@
 		  ((mode->crtc_hdisplay - 1) << 16) |
 		  (mode->crtc_vdisplay - 1));
 
-	if (psb_intel_encoder)
+	if (gma_encoder)
 		drm_object_property_get_value(&connector->base,
 			dev->mode_config.scaling_mode_property, &scalingType);
 
@@ -484,31 +484,24 @@
 
 	REG_WRITE(map->conf, pipeconf);
 	REG_READ(map->conf);
-	psb_intel_wait_for_vblank(dev);
+	gma_wait_for_vblank(dev);
 
 	REG_WRITE(map->cntr, dspcntr);
-	psb_intel_wait_for_vblank(dev);
+	gma_wait_for_vblank(dev);
 
 oaktrail_crtc_mode_set_exit:
 	gma_power_end(dev);
 	return 0;
 }
 
-static bool oaktrail_crtc_mode_fixup(struct drm_crtc *crtc,
-				  const struct drm_display_mode *mode,
-				  struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
 static int oaktrail_pipe_set_base(struct drm_crtc *crtc,
 			    int x, int y, struct drm_framebuffer *old_fb)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
 	struct psb_framebuffer *psbfb = to_psb_fb(crtc->fb);
-	int pipe = psb_intel_crtc->pipe;
+	int pipe = gma_crtc->pipe;
 	const struct psb_offset *map = &dev_priv->regmap[pipe];
 	unsigned long start, offset;
 
@@ -563,24 +556,12 @@
 	return ret;
 }
 
-static void oaktrail_crtc_prepare(struct drm_crtc *crtc)
-{
-	struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
-	crtc_funcs->dpms(crtc, DRM_MODE_DPMS_OFF);
-}
-
-static void oaktrail_crtc_commit(struct drm_crtc *crtc)
-{
-	struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
-	crtc_funcs->dpms(crtc, DRM_MODE_DPMS_ON);
-}
-
 const struct drm_crtc_helper_funcs oaktrail_helper_funcs = {
 	.dpms = oaktrail_crtc_dpms,
-	.mode_fixup = oaktrail_crtc_mode_fixup,
+	.mode_fixup = gma_crtc_mode_fixup,
 	.mode_set = oaktrail_crtc_mode_set,
 	.mode_set_base = oaktrail_pipe_set_base,
-	.prepare = oaktrail_crtc_prepare,
-	.commit = oaktrail_crtc_commit,
+	.prepare = gma_crtc_prepare,
+	.commit = gma_crtc_commit,
 };
 
diff --git a/drivers/gpu/drm/gma500/oaktrail_hdmi.c b/drivers/gpu/drm/gma500/oaktrail_hdmi.c
index f036f1f..3815314 100644
--- a/drivers/gpu/drm/gma500/oaktrail_hdmi.c
+++ b/drivers/gpu/drm/gma500/oaktrail_hdmi.c
@@ -155,12 +155,6 @@
 	HDMI_READ(HDMI_HCR);
 }
 
-static void wait_for_vblank(struct drm_device *dev)
-{
-	/* Wait for 20ms, i.e. one cycle at 50hz. */
-	mdelay(20);
-}
-
 static unsigned int htotal_calculate(struct drm_display_mode *mode)
 {
 	u32 htotal, new_crtc_htotal;
@@ -372,10 +366,10 @@
 
 	REG_WRITE(PCH_PIPEBCONF, pipeconf);
 	REG_READ(PCH_PIPEBCONF);
-	wait_for_vblank(dev);
+	gma_wait_for_vblank(dev);
 
 	REG_WRITE(dspcntr_reg, dspcntr);
-	wait_for_vblank(dev);
+	gma_wait_for_vblank(dev);
 
 	gma_power_end(dev);
 
@@ -459,7 +453,7 @@
 			REG_READ(PCH_PIPEBCONF);
 		}
 
-		wait_for_vblank(dev);
+		gma_wait_for_vblank(dev);
 
 		/* Enable plane */
 		temp = REG_READ(DSPBCNTR);
@@ -470,7 +464,7 @@
 			REG_READ(DSPBSURF);
 		}
 
-		psb_intel_crtc_load_lut(crtc);
+		gma_crtc_load_lut(crtc);
 	}
 
 	/* DSPARB */
@@ -615,16 +609,16 @@
 static const struct drm_encoder_helper_funcs oaktrail_hdmi_helper_funcs = {
 	.dpms = oaktrail_hdmi_dpms,
 	.mode_fixup = oaktrail_hdmi_mode_fixup,
-	.prepare = psb_intel_encoder_prepare,
+	.prepare = gma_encoder_prepare,
 	.mode_set = oaktrail_hdmi_mode_set,
-	.commit = psb_intel_encoder_commit,
+	.commit = gma_encoder_commit,
 };
 
 static const struct drm_connector_helper_funcs
 					oaktrail_hdmi_connector_helper_funcs = {
 	.get_modes = oaktrail_hdmi_get_modes,
 	.mode_valid = oaktrail_hdmi_mode_valid,
-	.best_encoder = psb_intel_best_encoder,
+	.best_encoder = gma_best_encoder,
 };
 
 static const struct drm_connector_funcs oaktrail_hdmi_connector_funcs = {
@@ -646,21 +640,21 @@
 void oaktrail_hdmi_init(struct drm_device *dev,
 					struct psb_intel_mode_device *mode_dev)
 {
-	struct psb_intel_encoder *psb_intel_encoder;
-	struct psb_intel_connector *psb_intel_connector;
+	struct gma_encoder *gma_encoder;
+	struct gma_connector *gma_connector;
 	struct drm_connector *connector;
 	struct drm_encoder *encoder;
 
-	psb_intel_encoder = kzalloc(sizeof(struct psb_intel_encoder), GFP_KERNEL);
-	if (!psb_intel_encoder)
+	gma_encoder = kzalloc(sizeof(struct gma_encoder), GFP_KERNEL);
+	if (!gma_encoder)
 		return;
 
-	psb_intel_connector = kzalloc(sizeof(struct psb_intel_connector), GFP_KERNEL);
-	if (!psb_intel_connector)
+	gma_connector = kzalloc(sizeof(struct gma_connector), GFP_KERNEL);
+	if (!gma_connector)
 		goto failed_connector;
 
-	connector = &psb_intel_connector->base;
-	encoder = &psb_intel_encoder->base;
+	connector = &gma_connector->base;
+	encoder = &gma_encoder->base;
 	drm_connector_init(dev, connector,
 			   &oaktrail_hdmi_connector_funcs,
 			   DRM_MODE_CONNECTOR_DVID);
@@ -669,10 +663,9 @@
 			 &oaktrail_hdmi_enc_funcs,
 			 DRM_MODE_ENCODER_TMDS);
 
-	psb_intel_connector_attach_encoder(psb_intel_connector,
-					   psb_intel_encoder);
+	gma_connector_attach_encoder(gma_connector, gma_encoder);
 
-	psb_intel_encoder->type = INTEL_OUTPUT_HDMI;
+	gma_encoder->type = INTEL_OUTPUT_HDMI;
 	drm_encoder_helper_add(encoder, &oaktrail_hdmi_helper_funcs);
 	drm_connector_helper_add(connector, &oaktrail_hdmi_connector_helper_funcs);
 
@@ -685,7 +678,7 @@
 	return;
 
 failed_connector:
-	kfree(psb_intel_encoder);
+	kfree(gma_encoder);
 }
 
 static DEFINE_PCI_DEVICE_TABLE(hdmi_ids) = {
diff --git a/drivers/gpu/drm/gma500/oaktrail_lvds.c b/drivers/gpu/drm/gma500/oaktrail_lvds.c
index 325013a..e77d721 100644
--- a/drivers/gpu/drm/gma500/oaktrail_lvds.c
+++ b/drivers/gpu/drm/gma500/oaktrail_lvds.c
@@ -43,7 +43,7 @@
  * Sets the power state for the panel.
  */
 static void oaktrail_lvds_set_power(struct drm_device *dev,
-				struct psb_intel_encoder *psb_intel_encoder,
+				struct gma_encoder *gma_encoder,
 				bool on)
 {
 	u32 pp_status;
@@ -78,13 +78,12 @@
 static void oaktrail_lvds_dpms(struct drm_encoder *encoder, int mode)
 {
 	struct drm_device *dev = encoder->dev;
-	struct psb_intel_encoder *psb_intel_encoder =
-						to_psb_intel_encoder(encoder);
+	struct gma_encoder *gma_encoder = to_gma_encoder(encoder);
 
 	if (mode == DRM_MODE_DPMS_ON)
-		oaktrail_lvds_set_power(dev, psb_intel_encoder, true);
+		oaktrail_lvds_set_power(dev, gma_encoder, true);
 	else
-		oaktrail_lvds_set_power(dev, psb_intel_encoder, false);
+		oaktrail_lvds_set_power(dev, gma_encoder, false);
 
 	/* XXX: We never power down the LVDS pairs. */
 }
@@ -166,8 +165,7 @@
 {
 	struct drm_device *dev = encoder->dev;
 	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct psb_intel_encoder *psb_intel_encoder =
-						to_psb_intel_encoder(encoder);
+	struct gma_encoder *gma_encoder = to_gma_encoder(encoder);
 	struct psb_intel_mode_device *mode_dev = &dev_priv->mode_dev;
 
 	if (!gma_power_begin(dev, true))
@@ -176,7 +174,7 @@
 	mode_dev->saveBLC_PWM_CTL = REG_READ(BLC_PWM_CTL);
 	mode_dev->backlight_duty_cycle = (mode_dev->saveBLC_PWM_CTL &
 					  BACKLIGHT_DUTY_CYCLE_MASK);
-	oaktrail_lvds_set_power(dev, psb_intel_encoder, false);
+	oaktrail_lvds_set_power(dev, gma_encoder, false);
 	gma_power_end(dev);
 }
 
@@ -203,14 +201,13 @@
 {
 	struct drm_device *dev = encoder->dev;
 	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct psb_intel_encoder *psb_intel_encoder =
-						to_psb_intel_encoder(encoder);
+	struct gma_encoder *gma_encoder = to_gma_encoder(encoder);
 	struct psb_intel_mode_device *mode_dev = &dev_priv->mode_dev;
 
 	if (mode_dev->backlight_duty_cycle == 0)
 		mode_dev->backlight_duty_cycle =
 					oaktrail_lvds_get_max_backlight(dev);
-	oaktrail_lvds_set_power(dev, psb_intel_encoder, true);
+	oaktrail_lvds_set_power(dev, gma_encoder, true);
 }
 
 static const struct drm_encoder_helper_funcs oaktrail_lvds_helper_funcs = {
@@ -325,8 +322,8 @@
 void oaktrail_lvds_init(struct drm_device *dev,
 		    struct psb_intel_mode_device *mode_dev)
 {
-	struct psb_intel_encoder *psb_intel_encoder;
-	struct psb_intel_connector *psb_intel_connector;
+	struct gma_encoder *gma_encoder;
+	struct gma_connector *gma_connector;
 	struct drm_connector *connector;
 	struct drm_encoder *encoder;
 	struct drm_psb_private *dev_priv = dev->dev_private;
@@ -334,16 +331,16 @@
 	struct i2c_adapter *i2c_adap;
 	struct drm_display_mode *scan;	/* *modes, *bios_mode; */
 
-	psb_intel_encoder = kzalloc(sizeof(struct psb_intel_encoder), GFP_KERNEL);
-	if (!psb_intel_encoder)
+	gma_encoder = kzalloc(sizeof(struct gma_encoder), GFP_KERNEL);
+	if (!gma_encoder)
 		return;
 
-	psb_intel_connector = kzalloc(sizeof(struct psb_intel_connector), GFP_KERNEL);
-	if (!psb_intel_connector)
+	gma_connector = kzalloc(sizeof(struct gma_connector), GFP_KERNEL);
+	if (!gma_connector)
 		goto failed_connector;
 
-	connector = &psb_intel_connector->base;
-	encoder = &psb_intel_encoder->base;
+	connector = &gma_connector->base;
+	encoder = &gma_encoder->base;
 	dev_priv->is_lvds_on = true;
 	drm_connector_init(dev, connector,
 			   &psb_intel_lvds_connector_funcs,
@@ -352,9 +349,8 @@
 	drm_encoder_init(dev, encoder, &psb_intel_lvds_enc_funcs,
 			 DRM_MODE_ENCODER_LVDS);
 
-	psb_intel_connector_attach_encoder(psb_intel_connector,
-					   psb_intel_encoder);
-	psb_intel_encoder->type = INTEL_OUTPUT_LVDS;
+	gma_connector_attach_encoder(gma_connector, gma_encoder);
+	gma_encoder->type = INTEL_OUTPUT_LVDS;
 
 	drm_encoder_helper_add(encoder, &oaktrail_lvds_helper_funcs);
 	drm_connector_helper_add(connector,
@@ -434,15 +430,15 @@
 
 failed_find:
 	dev_dbg(dev->dev, "No LVDS modes found, disabling.\n");
-	if (psb_intel_encoder->ddc_bus)
-		psb_intel_i2c_destroy(psb_intel_encoder->ddc_bus);
+	if (gma_encoder->ddc_bus)
+		psb_intel_i2c_destroy(gma_encoder->ddc_bus);
 
 /* failed_ddc: */
 
 	drm_encoder_cleanup(encoder);
 	drm_connector_cleanup(connector);
-	kfree(psb_intel_connector);
+	kfree(gma_connector);
 failed_connector:
-	kfree(psb_intel_encoder);
+	kfree(gma_encoder);
 }
 
diff --git a/drivers/gpu/drm/gma500/psb_device.c b/drivers/gpu/drm/gma500/psb_device.c
index f6f534b..6976786 100644
--- a/drivers/gpu/drm/gma500/psb_device.c
+++ b/drivers/gpu/drm/gma500/psb_device.c
@@ -25,7 +25,7 @@
 #include "psb_reg.h"
 #include "psb_intel_reg.h"
 #include "intel_bios.h"
-
+#include "psb_device.h"
 
 static int psb_output_init(struct drm_device *dev)
 {
@@ -380,6 +380,7 @@
 
 	.crtc_helper = &psb_intel_helper_funcs,
 	.crtc_funcs = &psb_intel_crtc_funcs,
+	.clock_funcs = &psb_clock_funcs,
 
 	.output_init = psb_output_init,
 
diff --git a/drivers/gpu/drm/gma500/psb_intel_display.h b/drivers/gpu/drm/gma500/psb_device.h
similarity index 76%
rename from drivers/gpu/drm/gma500/psb_intel_display.h
rename to drivers/gpu/drm/gma500/psb_device.h
index 3724b97..35e304c 100644
--- a/drivers/gpu/drm/gma500/psb_intel_display.h
+++ b/drivers/gpu/drm/gma500/psb_device.h
@@ -1,4 +1,6 @@
-/* copyright (c) 2008, Intel Corporation
+/*
+ * Copyright © 2013 Patrik Jakobsson
+ * Copyright © 2011 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -12,14 +14,11 @@
  * You should have received a copy of the GNU General Public License along with
  * this program; if not, write to the Free Software Foundation, Inc.,
  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Authors:
- * Eric Anholt <eric@anholt.net>
  */
 
-#ifndef _INTEL_DISPLAY_H_
-#define _INTEL_DISPLAY_H_
+#ifndef _PSB_DEVICE_H_
+#define _PSB_DEVICE_H_
 
-bool psb_intel_pipe_has_type(struct drm_crtc *crtc, int type);
+extern const struct gma_clock_funcs psb_clock_funcs;
 
 #endif
diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c
index bddea58..fcb4e9f 100644
--- a/drivers/gpu/drm/gma500/psb_drv.c
+++ b/drivers/gpu/drm/gma500/psb_drv.c
@@ -131,7 +131,7 @@
 static int psb_dpst_bl_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_priv);
 
-static struct drm_ioctl_desc psb_ioctls[] = {
+static const struct drm_ioctl_desc psb_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(GMA_ADB, psb_adb_ioctl, DRM_AUTH),
 	DRM_IOCTL_DEF_DRV(GMA_MODE_OPERATION, psb_mode_operation_ioctl,
 		      DRM_AUTH),
@@ -270,7 +270,7 @@
 	unsigned long irqflags;
 	int ret = -ENOMEM;
 	struct drm_connector *connector;
-	struct psb_intel_encoder *psb_intel_encoder;
+	struct gma_encoder *gma_encoder;
 
 	dev_priv = kzalloc(sizeof(*dev_priv), GFP_KERNEL);
 	if (dev_priv == NULL)
@@ -372,9 +372,9 @@
 	/* Only add backlight support if we have LVDS output */
 	list_for_each_entry(connector, &dev->mode_config.connector_list,
 			    head) {
-		psb_intel_encoder = psb_intel_attached_encoder(connector);
+		gma_encoder = gma_attached_encoder(connector);
 
-		switch (psb_intel_encoder->type) {
+		switch (gma_encoder->type) {
 		case INTEL_OUTPUT_LVDS:
 		case INTEL_OUTPUT_MIPI:
 			ret = gma_backlight_init(dev);
@@ -441,7 +441,7 @@
 	struct drm_mode_object *obj;
 	struct drm_crtc *crtc;
 	struct drm_connector *connector;
-	struct psb_intel_crtc *psb_intel_crtc;
+	struct gma_crtc *gma_crtc;
 	int i = 0;
 	int32_t obj_id;
 
@@ -454,12 +454,12 @@
 
 	connector = obj_to_connector(obj);
 	crtc = connector->encoder->crtc;
-	psb_intel_crtc = to_psb_intel_crtc(crtc);
+	gma_crtc = to_gma_crtc(crtc);
 
 	for (i = 0; i < 256; i++)
-		psb_intel_crtc->lut_adj[i] = lut_arg->lut[i];
+		gma_crtc->lut_adj[i] = lut_arg->lut[i];
 
-	psb_intel_crtc_load_lut(crtc);
+	gma_crtc_load_lut(crtc);
 
 	return 0;
 }
@@ -622,13 +622,12 @@
 	.unlocked_ioctl = psb_unlocked_ioctl,
 	.mmap = drm_gem_mmap,
 	.poll = drm_poll,
-	.fasync = drm_fasync,
 	.read = drm_read,
 };
 
 static struct drm_driver driver = {
 	.driver_features = DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | \
-			   DRIVER_IRQ_VBL | DRIVER_MODESET | DRIVER_GEM ,
+			   DRIVER_MODESET | DRIVER_GEM ,
 	.load = psb_driver_load,
 	.unload = psb_driver_unload,
 
@@ -652,7 +651,7 @@
 	.gem_vm_ops = &psb_gem_vm_ops,
 	.dumb_create = psb_gem_dumb_create,
 	.dumb_map_offset = psb_gem_dumb_map_gtt,
-	.dumb_destroy = psb_gem_dumb_destroy,
+	.dumb_destroy = drm_gem_dumb_destroy,
 	.fops = &psb_gem_fops,
 	.name = DRIVER_NAME,
 	.desc = DRIVER_DESC,
diff --git a/drivers/gpu/drm/gma500/psb_drv.h b/drivers/gpu/drm/gma500/psb_drv.h
index 6053b8a..4535ac7 100644
--- a/drivers/gpu/drm/gma500/psb_drv.h
+++ b/drivers/gpu/drm/gma500/psb_drv.h
@@ -27,6 +27,7 @@
 #include <drm/gma_drm.h>
 #include "psb_reg.h"
 #include "psb_intel_drv.h"
+#include "gma_display.h"
 #include "intel_bios.h"
 #include "gtt.h"
 #include "power.h"
@@ -46,6 +47,7 @@
 #define IS_PSB(dev) (((dev)->pci_device & 0xfffe) == 0x8108)
 #define IS_MRST(dev) (((dev)->pci_device & 0xfffc) == 0x4100)
 #define IS_MFLD(dev) (((dev)->pci_device & 0xfff8) == 0x0130)
+#define IS_CDV(dev) (((dev)->pci_device & 0xfff0) == 0x0be0)
 
 /*
  * Driver definitions
@@ -675,6 +677,7 @@
 	/* Sub functions */
 	struct drm_crtc_helper_funcs const *crtc_helper;
 	struct drm_crtc_funcs const *crtc_funcs;
+	const struct gma_clock_funcs *clock_funcs;
 
 	/* Setup hooks */
 	int (*chip_setup)(struct drm_device *dev);
@@ -692,6 +695,8 @@
 	int (*restore_regs)(struct drm_device *dev);
 	int (*power_up)(struct drm_device *dev);
 	int (*power_down)(struct drm_device *dev);
+	void (*update_wm)(struct drm_device *dev, struct drm_crtc *crtc);
+	void (*disable_sr)(struct drm_device *dev);
 
 	void (*lvds_bl_power)(struct drm_device *dev, bool on);
 #ifdef CONFIG_BACKLIGHT_CLASS_DEVICE
@@ -838,8 +843,6 @@
 			struct drm_file *file);
 extern int psb_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
 			struct drm_mode_create_dumb *args);
-extern int psb_gem_dumb_destroy(struct drm_file *file, struct drm_device *dev,
-			uint32_t handle);
 extern int psb_gem_dumb_map_gtt(struct drm_file *file, struct drm_device *dev,
 			uint32_t handle, uint64_t *offset);
 extern int psb_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
diff --git a/drivers/gpu/drm/gma500/psb_intel_display.c b/drivers/gpu/drm/gma500/psb_intel_display.c
index 6666493..97f8a03 100644
--- a/drivers/gpu/drm/gma500/psb_intel_display.c
+++ b/drivers/gpu/drm/gma500/psb_intel_display.c
@@ -19,46 +19,19 @@
  */
 
 #include <linux/i2c.h>
-#include <linux/pm_runtime.h>
 
 #include <drm/drmP.h>
 #include "framebuffer.h"
 #include "psb_drv.h"
 #include "psb_intel_drv.h"
 #include "psb_intel_reg.h"
-#include "psb_intel_display.h"
+#include "gma_display.h"
 #include "power.h"
 
-struct psb_intel_clock_t {
-	/* given values */
-	int n;
-	int m1, m2;
-	int p1, p2;
-	/* derived values */
-	int dot;
-	int vco;
-	int m;
-	int p;
-};
-
-struct psb_intel_range_t {
-	int min, max;
-};
-
-struct psb_intel_p2_t {
-	int dot_limit;
-	int p2_slow, p2_fast;
-};
-
-struct psb_intel_limit_t {
-	struct psb_intel_range_t dot, vco, n, m, m1, m2, p, p1;
-	struct psb_intel_p2_t p2;
-};
-
 #define INTEL_LIMIT_I9XX_SDVO_DAC   0
 #define INTEL_LIMIT_I9XX_LVDS	    1
 
-static const struct psb_intel_limit_t psb_intel_limits[] = {
+static const struct gma_limit_t psb_intel_limits[] = {
 	{			/* INTEL_LIMIT_I9XX_SDVO_DAC */
 	 .dot = {.min = 20000, .max = 400000},
 	 .vco = {.min = 1400000, .max = 2800000},
@@ -68,8 +41,8 @@
 	 .m2 = {.min = 3, .max = 7},
 	 .p = {.min = 5, .max = 80},
 	 .p1 = {.min = 1, .max = 8},
-	 .p2 = {.dot_limit = 200000,
-		.p2_slow = 10, .p2_fast = 5},
+	 .p2 = {.dot_limit = 200000, .p2_slow = 10, .p2_fast = 5},
+	 .find_pll = gma_find_best_pll,
 	 },
 	{			/* INTEL_LIMIT_I9XX_LVDS */
 	 .dot = {.min = 20000, .max = 400000},
@@ -83,23 +56,24 @@
 	 /* The single-channel range is 25-112Mhz, and dual-channel
 	  * is 80-224Mhz.  Prefer single channel as much as possible.
 	  */
-	 .p2 = {.dot_limit = 112000,
-		.p2_slow = 14, .p2_fast = 7},
+	 .p2 = {.dot_limit = 112000, .p2_slow = 14, .p2_fast = 7},
+	 .find_pll = gma_find_best_pll,
 	 },
 };
 
-static const struct psb_intel_limit_t *psb_intel_limit(struct drm_crtc *crtc)
+static const struct gma_limit_t *psb_intel_limit(struct drm_crtc *crtc,
+						 int refclk)
 {
-	const struct psb_intel_limit_t *limit;
+	const struct gma_limit_t *limit;
 
-	if (psb_intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS))
+	if (gma_pipe_has_type(crtc, INTEL_OUTPUT_LVDS))
 		limit = &psb_intel_limits[INTEL_LIMIT_I9XX_LVDS];
 	else
 		limit = &psb_intel_limits[INTEL_LIMIT_I9XX_SDVO_DAC];
 	return limit;
 }
 
-static void psb_intel_clock(int refclk, struct psb_intel_clock_t *clock)
+static void psb_intel_clock(int refclk, struct gma_clock_t *clock)
 {
 	clock->m = 5 * (clock->m1 + 2) + (clock->m2 + 2);
 	clock->p = clock->p1 * clock->p2;
@@ -108,353 +82,6 @@
 }
 
 /**
- * Returns whether any output on the specified pipe is of the specified type
- */
-bool psb_intel_pipe_has_type(struct drm_crtc *crtc, int type)
-{
-	struct drm_device *dev = crtc->dev;
-	struct drm_mode_config *mode_config = &dev->mode_config;
-	struct drm_connector *l_entry;
-
-	list_for_each_entry(l_entry, &mode_config->connector_list, head) {
-		if (l_entry->encoder && l_entry->encoder->crtc == crtc) {
-			struct psb_intel_encoder *psb_intel_encoder =
-					psb_intel_attached_encoder(l_entry);
-			if (psb_intel_encoder->type == type)
-				return true;
-		}
-	}
-	return false;
-}
-
-#define INTELPllInvalid(s)   { /* ErrorF (s) */; return false; }
-/**
- * Returns whether the given set of divisors are valid for a given refclk with
- * the given connectors.
- */
-
-static bool psb_intel_PLL_is_valid(struct drm_crtc *crtc,
-			       struct psb_intel_clock_t *clock)
-{
-	const struct psb_intel_limit_t *limit = psb_intel_limit(crtc);
-
-	if (clock->p1 < limit->p1.min || limit->p1.max < clock->p1)
-		INTELPllInvalid("p1 out of range\n");
-	if (clock->p < limit->p.min || limit->p.max < clock->p)
-		INTELPllInvalid("p out of range\n");
-	if (clock->m2 < limit->m2.min || limit->m2.max < clock->m2)
-		INTELPllInvalid("m2 out of range\n");
-	if (clock->m1 < limit->m1.min || limit->m1.max < clock->m1)
-		INTELPllInvalid("m1 out of range\n");
-	if (clock->m1 <= clock->m2)
-		INTELPllInvalid("m1 <= m2\n");
-	if (clock->m < limit->m.min || limit->m.max < clock->m)
-		INTELPllInvalid("m out of range\n");
-	if (clock->n < limit->n.min || limit->n.max < clock->n)
-		INTELPllInvalid("n out of range\n");
-	if (clock->vco < limit->vco.min || limit->vco.max < clock->vco)
-		INTELPllInvalid("vco out of range\n");
-	/* XXX: We may need to be checking "Dot clock"
-	 * depending on the multiplier, connector, etc.,
-	 * rather than just a single range.
-	 */
-	if (clock->dot < limit->dot.min || limit->dot.max < clock->dot)
-		INTELPllInvalid("dot out of range\n");
-
-	return true;
-}
-
-/**
- * Returns a set of divisors for the desired target clock with the given
- * refclk, or FALSE.  The returned values represent the clock equation:
- * reflck * (5 * (m1 + 2) + (m2 + 2)) / (n + 2) / p1 / p2.
- */
-static bool psb_intel_find_best_PLL(struct drm_crtc *crtc, int target,
-				int refclk,
-				struct psb_intel_clock_t *best_clock)
-{
-	struct drm_device *dev = crtc->dev;
-	struct psb_intel_clock_t clock;
-	const struct psb_intel_limit_t *limit = psb_intel_limit(crtc);
-	int err = target;
-
-	if (psb_intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS) &&
-	    (REG_READ(LVDS) & LVDS_PORT_EN) != 0) {
-		/*
-		 * For LVDS, if the panel is on, just rely on its current
-		 * settings for dual-channel.  We haven't figured out how to
-		 * reliably set up different single/dual channel state, if we
-		 * even can.
-		 */
-		if ((REG_READ(LVDS) & LVDS_CLKB_POWER_MASK) ==
-		    LVDS_CLKB_POWER_UP)
-			clock.p2 = limit->p2.p2_fast;
-		else
-			clock.p2 = limit->p2.p2_slow;
-	} else {
-		if (target < limit->p2.dot_limit)
-			clock.p2 = limit->p2.p2_slow;
-		else
-			clock.p2 = limit->p2.p2_fast;
-	}
-
-	memset(best_clock, 0, sizeof(*best_clock));
-
-	for (clock.m1 = limit->m1.min; clock.m1 <= limit->m1.max;
-	     clock.m1++) {
-		for (clock.m2 = limit->m2.min;
-		     clock.m2 < clock.m1 && clock.m2 <= limit->m2.max;
-		     clock.m2++) {
-			for (clock.n = limit->n.min;
-			     clock.n <= limit->n.max; clock.n++) {
-				for (clock.p1 = limit->p1.min;
-				     clock.p1 <= limit->p1.max;
-				     clock.p1++) {
-					int this_err;
-
-					psb_intel_clock(refclk, &clock);
-
-					if (!psb_intel_PLL_is_valid
-					    (crtc, &clock))
-						continue;
-
-					this_err = abs(clock.dot - target);
-					if (this_err < err) {
-						*best_clock = clock;
-						err = this_err;
-					}
-				}
-			}
-		}
-	}
-
-	return err != target;
-}
-
-void psb_intel_wait_for_vblank(struct drm_device *dev)
-{
-	/* Wait for 20ms, i.e. one cycle at 50hz. */
-	mdelay(20);
-}
-
-static int psb_intel_pipe_set_base(struct drm_crtc *crtc,
-			    int x, int y, struct drm_framebuffer *old_fb)
-{
-	struct drm_device *dev = crtc->dev;
-	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
-	struct psb_framebuffer *psbfb = to_psb_fb(crtc->fb);
-	int pipe = psb_intel_crtc->pipe;
-	const struct psb_offset *map = &dev_priv->regmap[pipe];
-	unsigned long start, offset;
-	u32 dspcntr;
-	int ret = 0;
-
-	if (!gma_power_begin(dev, true))
-		return 0;
-
-	/* no fb bound */
-	if (!crtc->fb) {
-		dev_dbg(dev->dev, "No FB bound\n");
-		goto psb_intel_pipe_cleaner;
-	}
-
-	/* We are displaying this buffer, make sure it is actually loaded
-	   into the GTT */
-	ret = psb_gtt_pin(psbfb->gtt);
-	if (ret < 0)
-		goto psb_intel_pipe_set_base_exit;
-	start = psbfb->gtt->offset;
-
-	offset = y * crtc->fb->pitches[0] + x * (crtc->fb->bits_per_pixel / 8);
-
-	REG_WRITE(map->stride, crtc->fb->pitches[0]);
-
-	dspcntr = REG_READ(map->cntr);
-	dspcntr &= ~DISPPLANE_PIXFORMAT_MASK;
-
-	switch (crtc->fb->bits_per_pixel) {
-	case 8:
-		dspcntr |= DISPPLANE_8BPP;
-		break;
-	case 16:
-		if (crtc->fb->depth == 15)
-			dspcntr |= DISPPLANE_15_16BPP;
-		else
-			dspcntr |= DISPPLANE_16BPP;
-		break;
-	case 24:
-	case 32:
-		dspcntr |= DISPPLANE_32BPP_NO_ALPHA;
-		break;
-	default:
-		dev_err(dev->dev, "Unknown color depth\n");
-		ret = -EINVAL;
-		psb_gtt_unpin(psbfb->gtt);
-		goto psb_intel_pipe_set_base_exit;
-	}
-	REG_WRITE(map->cntr, dspcntr);
-
-	REG_WRITE(map->base, start + offset);
-	REG_READ(map->base);
-
-psb_intel_pipe_cleaner:
-	/* If there was a previous display we can now unpin it */
-	if (old_fb)
-		psb_gtt_unpin(to_psb_fb(old_fb)->gtt);
-
-psb_intel_pipe_set_base_exit:
-	gma_power_end(dev);
-	return ret;
-}
-
-/**
- * Sets the power management mode of the pipe and plane.
- *
- * This code should probably grow support for turning the cursor off and back
- * on appropriately at the same time as we're turning the pipe off/on.
- */
-static void psb_intel_crtc_dpms(struct drm_crtc *crtc, int mode)
-{
-	struct drm_device *dev = crtc->dev;
-	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
-	int pipe = psb_intel_crtc->pipe;
-	const struct psb_offset *map = &dev_priv->regmap[pipe];
-	u32 temp;
-
-	/* XXX: When our outputs are all unaware of DPMS modes other than off
-	 * and on, we should map those modes to DRM_MODE_DPMS_OFF in the CRTC.
-	 */
-	switch (mode) {
-	case DRM_MODE_DPMS_ON:
-	case DRM_MODE_DPMS_STANDBY:
-	case DRM_MODE_DPMS_SUSPEND:
-		/* Enable the DPLL */
-		temp = REG_READ(map->dpll);
-		if ((temp & DPLL_VCO_ENABLE) == 0) {
-			REG_WRITE(map->dpll, temp);
-			REG_READ(map->dpll);
-			/* Wait for the clocks to stabilize. */
-			udelay(150);
-			REG_WRITE(map->dpll, temp | DPLL_VCO_ENABLE);
-			REG_READ(map->dpll);
-			/* Wait for the clocks to stabilize. */
-			udelay(150);
-			REG_WRITE(map->dpll, temp | DPLL_VCO_ENABLE);
-			REG_READ(map->dpll);
-			/* Wait for the clocks to stabilize. */
-			udelay(150);
-		}
-
-		/* Enable the pipe */
-		temp = REG_READ(map->conf);
-		if ((temp & PIPEACONF_ENABLE) == 0)
-			REG_WRITE(map->conf, temp | PIPEACONF_ENABLE);
-
-		/* Enable the plane */
-		temp = REG_READ(map->cntr);
-		if ((temp & DISPLAY_PLANE_ENABLE) == 0) {
-			REG_WRITE(map->cntr,
-				  temp | DISPLAY_PLANE_ENABLE);
-			/* Flush the plane changes */
-			REG_WRITE(map->base, REG_READ(map->base));
-		}
-
-		psb_intel_crtc_load_lut(crtc);
-
-		/* Give the overlay scaler a chance to enable
-		 * if it's on this pipe */
-		/* psb_intel_crtc_dpms_video(crtc, true); TODO */
-		break;
-	case DRM_MODE_DPMS_OFF:
-		/* Give the overlay scaler a chance to disable
-		 * if it's on this pipe */
-		/* psb_intel_crtc_dpms_video(crtc, FALSE); TODO */
-
-		/* Disable the VGA plane that we never use */
-		REG_WRITE(VGACNTRL, VGA_DISP_DISABLE);
-
-		/* Disable display plane */
-		temp = REG_READ(map->cntr);
-		if ((temp & DISPLAY_PLANE_ENABLE) != 0) {
-			REG_WRITE(map->cntr,
-				  temp & ~DISPLAY_PLANE_ENABLE);
-			/* Flush the plane changes */
-			REG_WRITE(map->base, REG_READ(map->base));
-			REG_READ(map->base);
-		}
-
-		/* Next, disable display pipes */
-		temp = REG_READ(map->conf);
-		if ((temp & PIPEACONF_ENABLE) != 0) {
-			REG_WRITE(map->conf, temp & ~PIPEACONF_ENABLE);
-			REG_READ(map->conf);
-		}
-
-		/* Wait for vblank for the disable to take effect. */
-		psb_intel_wait_for_vblank(dev);
-
-		temp = REG_READ(map->dpll);
-		if ((temp & DPLL_VCO_ENABLE) != 0) {
-			REG_WRITE(map->dpll, temp & ~DPLL_VCO_ENABLE);
-			REG_READ(map->dpll);
-		}
-
-		/* Wait for the clocks to turn off. */
-		udelay(150);
-		break;
-	}
-
-	/*Set FIFO Watermarks*/
-	REG_WRITE(DSPARB, 0x3F3E);
-}
-
-static void psb_intel_crtc_prepare(struct drm_crtc *crtc)
-{
-	struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
-	crtc_funcs->dpms(crtc, DRM_MODE_DPMS_OFF);
-}
-
-static void psb_intel_crtc_commit(struct drm_crtc *crtc)
-{
-	struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
-	crtc_funcs->dpms(crtc, DRM_MODE_DPMS_ON);
-}
-
-void psb_intel_encoder_prepare(struct drm_encoder *encoder)
-{
-	struct drm_encoder_helper_funcs *encoder_funcs =
-	    encoder->helper_private;
-	/* lvds has its own version of prepare see psb_intel_lvds_prepare */
-	encoder_funcs->dpms(encoder, DRM_MODE_DPMS_OFF);
-}
-
-void psb_intel_encoder_commit(struct drm_encoder *encoder)
-{
-	struct drm_encoder_helper_funcs *encoder_funcs =
-	    encoder->helper_private;
-	/* lvds has its own version of commit see psb_intel_lvds_commit */
-	encoder_funcs->dpms(encoder, DRM_MODE_DPMS_ON);
-}
-
-void psb_intel_encoder_destroy(struct drm_encoder *encoder)
-{
-	struct psb_intel_encoder *intel_encoder = to_psb_intel_encoder(encoder);
-
-	drm_encoder_cleanup(encoder);
-	kfree(intel_encoder);
-}
-
-static bool psb_intel_crtc_mode_fixup(struct drm_crtc *crtc,
-				  const struct drm_display_mode *mode,
-				  struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
-
-/**
  * Return the pipe currently connected to the panel fitter,
  * or -1 if the panel fitter is not present or not in use
  */
@@ -479,17 +106,18 @@
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
 	struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
-	int pipe = psb_intel_crtc->pipe;
+	int pipe = gma_crtc->pipe;
 	const struct psb_offset *map = &dev_priv->regmap[pipe];
 	int refclk;
-	struct psb_intel_clock_t clock;
+	struct gma_clock_t clock;
 	u32 dpll = 0, fp = 0, dspcntr, pipeconf;
 	bool ok, is_sdvo = false;
 	bool is_lvds = false, is_tv = false;
 	struct drm_mode_config *mode_config = &dev->mode_config;
 	struct drm_connector *connector;
+	const struct gma_limit_t *limit;
 
 	/* No scan out no play */
 	if (crtc->fb == NULL) {
@@ -498,14 +126,13 @@
 	}
 
 	list_for_each_entry(connector, &mode_config->connector_list, head) {
-		struct psb_intel_encoder *psb_intel_encoder =
-					psb_intel_attached_encoder(connector);
+		struct gma_encoder *gma_encoder = gma_attached_encoder(connector);
 
 		if (!connector->encoder
 		    || connector->encoder->crtc != crtc)
 			continue;
 
-		switch (psb_intel_encoder->type) {
+		switch (gma_encoder->type) {
 		case INTEL_OUTPUT_LVDS:
 			is_lvds = true;
 			break;
@@ -520,10 +147,13 @@
 
 	refclk = 96000;
 
-	ok = psb_intel_find_best_PLL(crtc, adjusted_mode->clock, refclk,
+	limit = gma_crtc->clock_funcs->limit(crtc, refclk);
+
+	ok = limit->find_pll(limit, crtc, adjusted_mode->clock, refclk,
 				 &clock);
 	if (!ok) {
-		dev_err(dev->dev, "Couldn't find PLL settings for mode!\n");
+		DRM_ERROR("Couldn't find PLL settings for mode! target: %d, actual: %d",
+			  adjusted_mode->clock, clock.dot);
 		return 0;
 	}
 
@@ -661,368 +291,29 @@
 	REG_WRITE(map->conf, pipeconf);
 	REG_READ(map->conf);
 
-	psb_intel_wait_for_vblank(dev);
+	gma_wait_for_vblank(dev);
 
 	REG_WRITE(map->cntr, dspcntr);
 
 	/* Flush the plane changes */
 	crtc_funcs->mode_set_base(crtc, x, y, old_fb);
 
-	psb_intel_wait_for_vblank(dev);
+	gma_wait_for_vblank(dev);
 
 	return 0;
 }
 
-/** Loads the palette/gamma unit for the CRTC with the prepared values */
-void psb_intel_crtc_load_lut(struct drm_crtc *crtc)
-{
-	struct drm_device *dev = crtc->dev;
-	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
-	const struct psb_offset *map = &dev_priv->regmap[psb_intel_crtc->pipe];
-	int palreg = map->palette;
-	int i;
-
-	/* The clocks have to be on to load the palette. */
-	if (!crtc->enabled)
-		return;
-
-	switch (psb_intel_crtc->pipe) {
-	case 0:
-	case 1:
-		break;
-	default:
-		dev_err(dev->dev, "Illegal Pipe Number.\n");
-		return;
-	}
-
-	if (gma_power_begin(dev, false)) {
-		for (i = 0; i < 256; i++) {
-			REG_WRITE(palreg + 4 * i,
-				  ((psb_intel_crtc->lut_r[i] +
-				  psb_intel_crtc->lut_adj[i]) << 16) |
-				  ((psb_intel_crtc->lut_g[i] +
-				  psb_intel_crtc->lut_adj[i]) << 8) |
-				  (psb_intel_crtc->lut_b[i] +
-				  psb_intel_crtc->lut_adj[i]));
-		}
-		gma_power_end(dev);
-	} else {
-		for (i = 0; i < 256; i++) {
-			dev_priv->regs.pipe[0].palette[i] =
-				  ((psb_intel_crtc->lut_r[i] +
-				  psb_intel_crtc->lut_adj[i]) << 16) |
-				  ((psb_intel_crtc->lut_g[i] +
-				  psb_intel_crtc->lut_adj[i]) << 8) |
-				  (psb_intel_crtc->lut_b[i] +
-				  psb_intel_crtc->lut_adj[i]);
-		}
-
-	}
-}
-
-/**
- * Save HW states of giving crtc
- */
-static void psb_intel_crtc_save(struct drm_crtc *crtc)
-{
-	struct drm_device *dev = crtc->dev;
-	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
-	struct psb_intel_crtc_state *crtc_state = psb_intel_crtc->crtc_state;
-	const struct psb_offset *map = &dev_priv->regmap[psb_intel_crtc->pipe];
-	uint32_t paletteReg;
-	int i;
-
-	if (!crtc_state) {
-		dev_err(dev->dev, "No CRTC state found\n");
-		return;
-	}
-
-	crtc_state->saveDSPCNTR = REG_READ(map->cntr);
-	crtc_state->savePIPECONF = REG_READ(map->conf);
-	crtc_state->savePIPESRC = REG_READ(map->src);
-	crtc_state->saveFP0 = REG_READ(map->fp0);
-	crtc_state->saveFP1 = REG_READ(map->fp1);
-	crtc_state->saveDPLL = REG_READ(map->dpll);
-	crtc_state->saveHTOTAL = REG_READ(map->htotal);
-	crtc_state->saveHBLANK = REG_READ(map->hblank);
-	crtc_state->saveHSYNC = REG_READ(map->hsync);
-	crtc_state->saveVTOTAL = REG_READ(map->vtotal);
-	crtc_state->saveVBLANK = REG_READ(map->vblank);
-	crtc_state->saveVSYNC = REG_READ(map->vsync);
-	crtc_state->saveDSPSTRIDE = REG_READ(map->stride);
-
-	/*NOTE: DSPSIZE DSPPOS only for psb*/
-	crtc_state->saveDSPSIZE = REG_READ(map->size);
-	crtc_state->saveDSPPOS = REG_READ(map->pos);
-
-	crtc_state->saveDSPBASE = REG_READ(map->base);
-
-	paletteReg = map->palette;
-	for (i = 0; i < 256; ++i)
-		crtc_state->savePalette[i] = REG_READ(paletteReg + (i << 2));
-}
-
-/**
- * Restore HW states of giving crtc
- */
-static void psb_intel_crtc_restore(struct drm_crtc *crtc)
-{
-	struct drm_device *dev = crtc->dev;
-	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct psb_intel_crtc *psb_intel_crtc =  to_psb_intel_crtc(crtc);
-	struct psb_intel_crtc_state *crtc_state = psb_intel_crtc->crtc_state;
-	const struct psb_offset *map = &dev_priv->regmap[psb_intel_crtc->pipe];
-	uint32_t paletteReg;
-	int i;
-
-	if (!crtc_state) {
-		dev_err(dev->dev, "No crtc state\n");
-		return;
-	}
-
-	if (crtc_state->saveDPLL & DPLL_VCO_ENABLE) {
-		REG_WRITE(map->dpll,
-			crtc_state->saveDPLL & ~DPLL_VCO_ENABLE);
-		REG_READ(map->dpll);
-		udelay(150);
-	}
-
-	REG_WRITE(map->fp0, crtc_state->saveFP0);
-	REG_READ(map->fp0);
-
-	REG_WRITE(map->fp1, crtc_state->saveFP1);
-	REG_READ(map->fp1);
-
-	REG_WRITE(map->dpll, crtc_state->saveDPLL);
-	REG_READ(map->dpll);
-	udelay(150);
-
-	REG_WRITE(map->htotal, crtc_state->saveHTOTAL);
-	REG_WRITE(map->hblank, crtc_state->saveHBLANK);
-	REG_WRITE(map->hsync, crtc_state->saveHSYNC);
-	REG_WRITE(map->vtotal, crtc_state->saveVTOTAL);
-	REG_WRITE(map->vblank, crtc_state->saveVBLANK);
-	REG_WRITE(map->vsync, crtc_state->saveVSYNC);
-	REG_WRITE(map->stride, crtc_state->saveDSPSTRIDE);
-
-	REG_WRITE(map->size, crtc_state->saveDSPSIZE);
-	REG_WRITE(map->pos, crtc_state->saveDSPPOS);
-
-	REG_WRITE(map->src, crtc_state->savePIPESRC);
-	REG_WRITE(map->base, crtc_state->saveDSPBASE);
-	REG_WRITE(map->conf, crtc_state->savePIPECONF);
-
-	psb_intel_wait_for_vblank(dev);
-
-	REG_WRITE(map->cntr, crtc_state->saveDSPCNTR);
-	REG_WRITE(map->base, crtc_state->saveDSPBASE);
-
-	psb_intel_wait_for_vblank(dev);
-
-	paletteReg = map->palette;
-	for (i = 0; i < 256; ++i)
-		REG_WRITE(paletteReg + (i << 2), crtc_state->savePalette[i]);
-}
-
-static int psb_intel_crtc_cursor_set(struct drm_crtc *crtc,
-				 struct drm_file *file_priv,
-				 uint32_t handle,
-				 uint32_t width, uint32_t height)
-{
-	struct drm_device *dev = crtc->dev;
-	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
-	int pipe = psb_intel_crtc->pipe;
-	uint32_t control = (pipe == 0) ? CURACNTR : CURBCNTR;
-	uint32_t base = (pipe == 0) ? CURABASE : CURBBASE;
-	uint32_t temp;
-	size_t addr = 0;
-	struct gtt_range *gt;
-	struct gtt_range *cursor_gt = psb_intel_crtc->cursor_gt;
-	struct drm_gem_object *obj;
-	void *tmp_dst, *tmp_src;
-	int ret = 0, i, cursor_pages;
-
-	/* if we want to turn of the cursor ignore width and height */
-	if (!handle) {
-		/* turn off the cursor */
-		temp = CURSOR_MODE_DISABLE;
-
-		if (gma_power_begin(dev, false)) {
-			REG_WRITE(control, temp);
-			REG_WRITE(base, 0);
-			gma_power_end(dev);
-		}
-
-		/* Unpin the old GEM object */
-		if (psb_intel_crtc->cursor_obj) {
-			gt = container_of(psb_intel_crtc->cursor_obj,
-							struct gtt_range, gem);
-			psb_gtt_unpin(gt);
-			drm_gem_object_unreference(psb_intel_crtc->cursor_obj);
-			psb_intel_crtc->cursor_obj = NULL;
-		}
-
-		return 0;
-	}
-
-	/* Currently we only support 64x64 cursors */
-	if (width != 64 || height != 64) {
-		dev_dbg(dev->dev, "we currently only support 64x64 cursors\n");
-		return -EINVAL;
-	}
-
-	obj = drm_gem_object_lookup(dev, file_priv, handle);
-	if (!obj)
-		return -ENOENT;
-
-	if (obj->size < width * height * 4) {
-		dev_dbg(dev->dev, "buffer is to small\n");
-		ret = -ENOMEM;
-		goto unref_cursor;
-	}
-
-	gt = container_of(obj, struct gtt_range, gem);
-
-	/* Pin the memory into the GTT */
-	ret = psb_gtt_pin(gt);
-	if (ret) {
-		dev_err(dev->dev, "Can not pin down handle 0x%x\n", handle);
-		goto unref_cursor;
-	}
-
-	if (dev_priv->ops->cursor_needs_phys) {
-		if (cursor_gt == NULL) {
-			dev_err(dev->dev, "No hardware cursor mem available");
-			ret = -ENOMEM;
-			goto unref_cursor;
-		}
-
-		/* Prevent overflow */
-		if (gt->npage > 4)
-			cursor_pages = 4;
-		else
-			cursor_pages = gt->npage;
-
-		/* Copy the cursor to cursor mem */
-		tmp_dst = dev_priv->vram_addr + cursor_gt->offset;
-		for (i = 0; i < cursor_pages; i++) {
-			tmp_src = kmap(gt->pages[i]);
-			memcpy(tmp_dst, tmp_src, PAGE_SIZE);
-			kunmap(gt->pages[i]);
-			tmp_dst += PAGE_SIZE;
-		}
-
-		addr = psb_intel_crtc->cursor_addr;
-	} else {
-		addr = gt->offset;      /* Or resource.start ??? */
-		psb_intel_crtc->cursor_addr = addr;
-	}
-
-	temp = 0;
-	/* set the pipe for the cursor */
-	temp |= (pipe << 28);
-	temp |= CURSOR_MODE_64_ARGB_AX | MCURSOR_GAMMA_ENABLE;
-
-	if (gma_power_begin(dev, false)) {
-		REG_WRITE(control, temp);
-		REG_WRITE(base, addr);
-		gma_power_end(dev);
-	}
-
-	/* unpin the old bo */
-	if (psb_intel_crtc->cursor_obj) {
-		gt = container_of(psb_intel_crtc->cursor_obj,
-							struct gtt_range, gem);
-		psb_gtt_unpin(gt);
-		drm_gem_object_unreference(psb_intel_crtc->cursor_obj);
-	}
-
-	psb_intel_crtc->cursor_obj = obj;
-	return ret;
-
-unref_cursor:
-	drm_gem_object_unreference(obj);
-	return ret;
-}
-
-static int psb_intel_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
-{
-	struct drm_device *dev = crtc->dev;
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
-	int pipe = psb_intel_crtc->pipe;
-	uint32_t temp = 0;
-	uint32_t addr;
-
-
-	if (x < 0) {
-		temp |= (CURSOR_POS_SIGN << CURSOR_X_SHIFT);
-		x = -x;
-	}
-	if (y < 0) {
-		temp |= (CURSOR_POS_SIGN << CURSOR_Y_SHIFT);
-		y = -y;
-	}
-
-	temp |= ((x & CURSOR_POS_MASK) << CURSOR_X_SHIFT);
-	temp |= ((y & CURSOR_POS_MASK) << CURSOR_Y_SHIFT);
-
-	addr = psb_intel_crtc->cursor_addr;
-
-	if (gma_power_begin(dev, false)) {
-		REG_WRITE((pipe == 0) ? CURAPOS : CURBPOS, temp);
-		REG_WRITE((pipe == 0) ? CURABASE : CURBBASE, addr);
-		gma_power_end(dev);
-	}
-	return 0;
-}
-
-static void psb_intel_crtc_gamma_set(struct drm_crtc *crtc, u16 *red,
-			 u16 *green, u16 *blue, uint32_t type, uint32_t size)
-{
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
-	int i;
-
-	if (size != 256)
-		return;
-
-	for (i = 0; i < 256; i++) {
-		psb_intel_crtc->lut_r[i] = red[i] >> 8;
-		psb_intel_crtc->lut_g[i] = green[i] >> 8;
-		psb_intel_crtc->lut_b[i] = blue[i] >> 8;
-	}
-
-	psb_intel_crtc_load_lut(crtc);
-}
-
-static int psb_crtc_set_config(struct drm_mode_set *set)
-{
-	int ret;
-	struct drm_device *dev = set->crtc->dev;
-	struct drm_psb_private *dev_priv = dev->dev_private;
-
-	if (!dev_priv->rpm_enabled)
-		return drm_crtc_helper_set_config(set);
-
-	pm_runtime_forbid(&dev->pdev->dev);
-	ret = drm_crtc_helper_set_config(set);
-	pm_runtime_allow(&dev->pdev->dev);
-	return ret;
-}
-
 /* Returns the clock of the currently programmed mode of the given pipe. */
 static int psb_intel_crtc_clock_get(struct drm_device *dev,
 				struct drm_crtc *crtc)
 {
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
 	struct drm_psb_private *dev_priv = dev->dev_private;
-	int pipe = psb_intel_crtc->pipe;
+	int pipe = gma_crtc->pipe;
 	const struct psb_offset *map = &dev_priv->regmap[pipe];
 	u32 dpll;
 	u32 fp;
-	struct psb_intel_clock_t clock;
+	struct gma_clock_t clock;
 	bool is_lvds;
 	struct psb_pipe *p = &dev_priv->regs.pipe[pipe];
 
@@ -1092,8 +383,8 @@
 struct drm_display_mode *psb_intel_crtc_mode_get(struct drm_device *dev,
 					     struct drm_crtc *crtc)
 {
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
-	int pipe = psb_intel_crtc->pipe;
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
+	int pipe = gma_crtc->pipe;
 	struct drm_display_mode *mode;
 	int htot;
 	int hsync;
@@ -1136,58 +427,30 @@
 	return mode;
 }
 
-static void psb_intel_crtc_destroy(struct drm_crtc *crtc)
-{
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
-	struct gtt_range *gt;
-
-	/* Unpin the old GEM object */
-	if (psb_intel_crtc->cursor_obj) {
-		gt = container_of(psb_intel_crtc->cursor_obj,
-						struct gtt_range, gem);
-		psb_gtt_unpin(gt);
-		drm_gem_object_unreference(psb_intel_crtc->cursor_obj);
-		psb_intel_crtc->cursor_obj = NULL;
-	}
-
-	if (psb_intel_crtc->cursor_gt != NULL)
-		psb_gtt_free_range(crtc->dev, psb_intel_crtc->cursor_gt);
-	kfree(psb_intel_crtc->crtc_state);
-	drm_crtc_cleanup(crtc);
-	kfree(psb_intel_crtc);
-}
-
-static void psb_intel_crtc_disable(struct drm_crtc *crtc)
-{
-	struct gtt_range *gt;
-	struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
-
-	crtc_funcs->dpms(crtc, DRM_MODE_DPMS_OFF);
-
-	if (crtc->fb) {
-		gt = to_psb_fb(crtc->fb)->gtt;
-		psb_gtt_unpin(gt);
-	}
-}
-
 const struct drm_crtc_helper_funcs psb_intel_helper_funcs = {
-	.dpms = psb_intel_crtc_dpms,
-	.mode_fixup = psb_intel_crtc_mode_fixup,
+	.dpms = gma_crtc_dpms,
+	.mode_fixup = gma_crtc_mode_fixup,
 	.mode_set = psb_intel_crtc_mode_set,
-	.mode_set_base = psb_intel_pipe_set_base,
-	.prepare = psb_intel_crtc_prepare,
-	.commit = psb_intel_crtc_commit,
-	.disable = psb_intel_crtc_disable,
+	.mode_set_base = gma_pipe_set_base,
+	.prepare = gma_crtc_prepare,
+	.commit = gma_crtc_commit,
+	.disable = gma_crtc_disable,
 };
 
 const struct drm_crtc_funcs psb_intel_crtc_funcs = {
-	.save = psb_intel_crtc_save,
-	.restore = psb_intel_crtc_restore,
-	.cursor_set = psb_intel_crtc_cursor_set,
-	.cursor_move = psb_intel_crtc_cursor_move,
-	.gamma_set = psb_intel_crtc_gamma_set,
-	.set_config = psb_crtc_set_config,
-	.destroy = psb_intel_crtc_destroy,
+	.save = gma_crtc_save,
+	.restore = gma_crtc_restore,
+	.cursor_set = gma_crtc_cursor_set,
+	.cursor_move = gma_crtc_cursor_move,
+	.gamma_set = gma_crtc_gamma_set,
+	.set_config = gma_crtc_set_config,
+	.destroy = gma_crtc_destroy,
+};
+
+const struct gma_clock_funcs psb_clock_funcs = {
+	.clock = psb_intel_clock,
+	.limit = psb_intel_limit,
+	.pll_is_valid = gma_pll_is_valid,
 };
 
 /*
@@ -1195,7 +458,7 @@
  * to zero. This is a workaround for h/w defect on Oaktrail
  */
 static void psb_intel_cursor_init(struct drm_device *dev,
-				  struct psb_intel_crtc *psb_intel_crtc)
+				  struct gma_crtc *gma_crtc)
 {
 	struct drm_psb_private *dev_priv = dev->dev_private;
 	u32 control[3] = { CURACNTR, CURBCNTR, CURCCNTR };
@@ -1208,88 +471,87 @@
 		 */
 		cursor_gt = psb_gtt_alloc_range(dev, 4 * PAGE_SIZE, "cursor", 1);
 		if (!cursor_gt) {
-			psb_intel_crtc->cursor_gt = NULL;
+			gma_crtc->cursor_gt = NULL;
 			goto out;
 		}
-		psb_intel_crtc->cursor_gt = cursor_gt;
-		psb_intel_crtc->cursor_addr = dev_priv->stolen_base +
+		gma_crtc->cursor_gt = cursor_gt;
+		gma_crtc->cursor_addr = dev_priv->stolen_base +
 							cursor_gt->offset;
 	} else {
-		psb_intel_crtc->cursor_gt = NULL;
+		gma_crtc->cursor_gt = NULL;
 	}
 
 out:
-	REG_WRITE(control[psb_intel_crtc->pipe], 0);
-	REG_WRITE(base[psb_intel_crtc->pipe], 0);
+	REG_WRITE(control[gma_crtc->pipe], 0);
+	REG_WRITE(base[gma_crtc->pipe], 0);
 }
 
 void psb_intel_crtc_init(struct drm_device *dev, int pipe,
 		     struct psb_intel_mode_device *mode_dev)
 {
 	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct psb_intel_crtc *psb_intel_crtc;
+	struct gma_crtc *gma_crtc;
 	int i;
 	uint16_t *r_base, *g_base, *b_base;
 
 	/* We allocate a extra array of drm_connector pointers
 	 * for fbdev after the crtc */
-	psb_intel_crtc =
-	    kzalloc(sizeof(struct psb_intel_crtc) +
-		    (INTELFB_CONN_LIMIT * sizeof(struct drm_connector *)),
-		    GFP_KERNEL);
-	if (psb_intel_crtc == NULL)
+	gma_crtc = kzalloc(sizeof(struct gma_crtc) +
+			(INTELFB_CONN_LIMIT * sizeof(struct drm_connector *)),
+			GFP_KERNEL);
+	if (gma_crtc == NULL)
 		return;
 
-	psb_intel_crtc->crtc_state =
+	gma_crtc->crtc_state =
 		kzalloc(sizeof(struct psb_intel_crtc_state), GFP_KERNEL);
-	if (!psb_intel_crtc->crtc_state) {
+	if (!gma_crtc->crtc_state) {
 		dev_err(dev->dev, "Crtc state error: No memory\n");
-		kfree(psb_intel_crtc);
+		kfree(gma_crtc);
 		return;
 	}
 
 	/* Set the CRTC operations from the chip specific data */
-	drm_crtc_init(dev, &psb_intel_crtc->base, dev_priv->ops->crtc_funcs);
+	drm_crtc_init(dev, &gma_crtc->base, dev_priv->ops->crtc_funcs);
 
-	drm_mode_crtc_set_gamma_size(&psb_intel_crtc->base, 256);
-	psb_intel_crtc->pipe = pipe;
-	psb_intel_crtc->plane = pipe;
+	/* Set the CRTC clock functions from chip specific data */
+	gma_crtc->clock_funcs = dev_priv->ops->clock_funcs;
 
-	r_base = psb_intel_crtc->base.gamma_store;
+	drm_mode_crtc_set_gamma_size(&gma_crtc->base, 256);
+	gma_crtc->pipe = pipe;
+	gma_crtc->plane = pipe;
+
+	r_base = gma_crtc->base.gamma_store;
 	g_base = r_base + 256;
 	b_base = g_base + 256;
 	for (i = 0; i < 256; i++) {
-		psb_intel_crtc->lut_r[i] = i;
-		psb_intel_crtc->lut_g[i] = i;
-		psb_intel_crtc->lut_b[i] = i;
+		gma_crtc->lut_r[i] = i;
+		gma_crtc->lut_g[i] = i;
+		gma_crtc->lut_b[i] = i;
 		r_base[i] = i << 8;
 		g_base[i] = i << 8;
 		b_base[i] = i << 8;
 
-		psb_intel_crtc->lut_adj[i] = 0;
+		gma_crtc->lut_adj[i] = 0;
 	}
 
-	psb_intel_crtc->mode_dev = mode_dev;
-	psb_intel_crtc->cursor_addr = 0;
+	gma_crtc->mode_dev = mode_dev;
+	gma_crtc->cursor_addr = 0;
 
-	drm_crtc_helper_add(&psb_intel_crtc->base,
+	drm_crtc_helper_add(&gma_crtc->base,
 						dev_priv->ops->crtc_helper);
 
 	/* Setup the array of drm_connector pointer array */
-	psb_intel_crtc->mode_set.crtc = &psb_intel_crtc->base;
+	gma_crtc->mode_set.crtc = &gma_crtc->base;
 	BUG_ON(pipe >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) ||
-	       dev_priv->plane_to_crtc_mapping[psb_intel_crtc->plane] != NULL);
-	dev_priv->plane_to_crtc_mapping[psb_intel_crtc->plane] =
-							&psb_intel_crtc->base;
-	dev_priv->pipe_to_crtc_mapping[psb_intel_crtc->pipe] =
-							&psb_intel_crtc->base;
-	psb_intel_crtc->mode_set.connectors =
-	    (struct drm_connector **) (psb_intel_crtc + 1);
-	psb_intel_crtc->mode_set.num_connectors = 0;
-	psb_intel_cursor_init(dev, psb_intel_crtc);
+	       dev_priv->plane_to_crtc_mapping[gma_crtc->plane] != NULL);
+	dev_priv->plane_to_crtc_mapping[gma_crtc->plane] = &gma_crtc->base;
+	dev_priv->pipe_to_crtc_mapping[gma_crtc->pipe] = &gma_crtc->base;
+	gma_crtc->mode_set.connectors = (struct drm_connector **)(gma_crtc + 1);
+	gma_crtc->mode_set.num_connectors = 0;
+	psb_intel_cursor_init(dev, gma_crtc);
 
 	/* Set to true so that the pipe is forced off on initial config. */
-	psb_intel_crtc->active = true;
+	gma_crtc->active = true;
 }
 
 int psb_intel_get_pipe_from_crtc_id(struct drm_device *dev, void *data,
@@ -1298,7 +560,7 @@
 	struct drm_psb_private *dev_priv = dev->dev_private;
 	struct drm_psb_get_pipe_from_crtc_id_arg *pipe_from_crtc_id = data;
 	struct drm_mode_object *drmmode_obj;
-	struct psb_intel_crtc *crtc;
+	struct gma_crtc *crtc;
 
 	if (!dev_priv) {
 		dev_err(dev->dev, "called with no initialization\n");
@@ -1313,7 +575,7 @@
 		return -EINVAL;
 	}
 
-	crtc = to_psb_intel_crtc(obj_to_crtc(drmmode_obj));
+	crtc = to_gma_crtc(obj_to_crtc(drmmode_obj));
 	pipe_from_crtc_id->pipe = crtc->pipe;
 
 	return 0;
@@ -1324,14 +586,14 @@
 	struct drm_crtc *crtc = NULL;
 
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
-		if (psb_intel_crtc->pipe == pipe)
+		struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
+		if (gma_crtc->pipe == pipe)
 			break;
 	}
 	return crtc;
 }
 
-int psb_intel_connector_clones(struct drm_device *dev, int type_mask)
+int gma_connector_clones(struct drm_device *dev, int type_mask)
 {
 	int index_mask = 0;
 	struct drm_connector *connector;
@@ -1339,30 +601,10 @@
 
 	list_for_each_entry(connector, &dev->mode_config.connector_list,
 			    head) {
-		struct psb_intel_encoder *psb_intel_encoder =
-					psb_intel_attached_encoder(connector);
-		if (type_mask & (1 << psb_intel_encoder->type))
+		struct gma_encoder *gma_encoder = gma_attached_encoder(connector);
+		if (type_mask & (1 << gma_encoder->type))
 			index_mask |= (1 << entry);
 		entry++;
 	}
 	return index_mask;
 }
-
-/* current intel driver doesn't take advantage of encoders
-   always give back the encoder for the connector
-*/
-struct drm_encoder *psb_intel_best_encoder(struct drm_connector *connector)
-{
-	struct psb_intel_encoder *psb_intel_encoder =
-					psb_intel_attached_encoder(connector);
-
-	return &psb_intel_encoder->base;
-}
-
-void psb_intel_connector_attach_encoder(struct psb_intel_connector *connector,
-					struct psb_intel_encoder *encoder)
-{
-	connector->encoder = encoder;
-	drm_mode_connector_attach_encoder(&connector->base,
-					  &encoder->base);
-}
diff --git a/drivers/gpu/drm/gma500/psb_intel_drv.h b/drivers/gpu/drm/gma500/psb_intel_drv.h
index 4dcae42..bde27fd 100644
--- a/drivers/gpu/drm/gma500/psb_intel_drv.h
+++ b/drivers/gpu/drm/gma500/psb_intel_drv.h
@@ -24,6 +24,7 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
 #include <linux/gpio.h>
+#include "gma_display.h"
 
 /*
  * Display related stuff
@@ -116,11 +117,11 @@
 	u8 slave_addr;
 };
 
-struct psb_intel_encoder {
+struct gma_encoder {
 	struct drm_encoder base;
 	int type;
 	bool needs_tv_clock;
-	void (*hot_plug)(struct psb_intel_encoder *);
+	void (*hot_plug)(struct gma_encoder *);
 	int crtc_mask;
 	int clone_mask;
 	u32 ddi_select;	/* Channel info */
@@ -136,9 +137,9 @@
 	struct psb_intel_i2c_chan *ddc_bus;
 };
 
-struct psb_intel_connector {
+struct gma_connector {
 	struct drm_connector base;
-	struct psb_intel_encoder *encoder;
+	struct gma_encoder *encoder;
 };
 
 struct psb_intel_crtc_state {
@@ -161,7 +162,7 @@
 	uint32_t savePalette[256];
 };
 
-struct psb_intel_crtc {
+struct gma_crtc {
 	struct drm_crtc base;
 	int pipe;
 	int plane;
@@ -188,14 +189,16 @@
 
 	/* Saved Crtc HW states */
 	struct psb_intel_crtc_state *crtc_state;
+
+	const struct gma_clock_funcs *clock_funcs;
 };
 
-#define to_psb_intel_crtc(x)	\
-		container_of(x, struct psb_intel_crtc, base)
-#define to_psb_intel_connector(x) \
-		container_of(x, struct psb_intel_connector, base)
-#define to_psb_intel_encoder(x)	\
-		container_of(x, struct psb_intel_encoder, base)
+#define to_gma_crtc(x)	\
+		container_of(x, struct gma_crtc, base)
+#define to_gma_connector(x) \
+		container_of(x, struct gma_connector, base)
+#define to_gma_encoder(x)	\
+		container_of(x, struct gma_encoder, base)
 #define to_psb_intel_framebuffer(x)	\
 		container_of(x, struct psb_intel_framebuffer, base)
 
@@ -223,27 +226,18 @@
 extern void mid_dsi_init(struct drm_device *dev,
 		    struct psb_intel_mode_device *mode_dev, int dsi_num);
 
-extern void psb_intel_crtc_load_lut(struct drm_crtc *crtc);
-extern void psb_intel_encoder_prepare(struct drm_encoder *encoder);
-extern void psb_intel_encoder_commit(struct drm_encoder *encoder);
-extern void psb_intel_encoder_destroy(struct drm_encoder *encoder);
+extern struct drm_encoder *gma_best_encoder(struct drm_connector *connector);
+extern void gma_connector_attach_encoder(struct gma_connector *connector,
+					 struct gma_encoder *encoder);
 
-static inline struct psb_intel_encoder *psb_intel_attached_encoder(
+static inline struct gma_encoder *gma_attached_encoder(
 						struct drm_connector *connector)
 {
-	return to_psb_intel_connector(connector)->encoder;
+	return to_gma_connector(connector)->encoder;
 }
 
-extern void psb_intel_connector_attach_encoder(
-					struct psb_intel_connector *connector,
-					struct psb_intel_encoder *encoder);
-
-extern struct drm_encoder *psb_intel_best_encoder(struct drm_connector
-					      *connector);
-
 extern struct drm_display_mode *psb_intel_crtc_mode_get(struct drm_device *dev,
 						    struct drm_crtc *crtc);
-extern void psb_intel_wait_for_vblank(struct drm_device *dev);
 extern int psb_intel_get_pipe_from_crtc_id(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
 extern struct drm_crtc *psb_intel_get_crtc_from_pipe(struct drm_device *dev,
diff --git a/drivers/gpu/drm/gma500/psb_intel_lvds.c b/drivers/gpu/drm/gma500/psb_intel_lvds.c
index 9fa5fa2..32342f6 100644
--- a/drivers/gpu/drm/gma500/psb_intel_lvds.c
+++ b/drivers/gpu/drm/gma500/psb_intel_lvds.c
@@ -267,10 +267,9 @@
 	struct drm_device *dev = connector->dev;
 	struct drm_psb_private *dev_priv =
 		(struct drm_psb_private *)dev->dev_private;
-	struct psb_intel_encoder *psb_intel_encoder =
-					psb_intel_attached_encoder(connector);
+	struct gma_encoder *gma_encoder = gma_attached_encoder(connector);
 	struct psb_intel_lvds_priv *lvds_priv =
-		(struct psb_intel_lvds_priv *)psb_intel_encoder->dev_priv;
+		(struct psb_intel_lvds_priv *)gma_encoder->dev_priv;
 
 	lvds_priv->savePP_ON = REG_READ(LVDSPP_ON);
 	lvds_priv->savePP_OFF = REG_READ(LVDSPP_OFF);
@@ -307,10 +306,9 @@
 {
 	struct drm_device *dev = connector->dev;
 	u32 pp_status;
-	struct psb_intel_encoder *psb_intel_encoder =
-					psb_intel_attached_encoder(connector);
+	struct gma_encoder *gma_encoder = gma_attached_encoder(connector);
 	struct psb_intel_lvds_priv *lvds_priv =
-		(struct psb_intel_lvds_priv *)psb_intel_encoder->dev_priv;
+		(struct psb_intel_lvds_priv *)gma_encoder->dev_priv;
 
 	dev_dbg(dev->dev, "(0x%x, 0x%x, 0x%x, 0x%x, 0x%x, 0x%x)\n",
 			lvds_priv->savePP_ON,
@@ -349,12 +347,11 @@
 				 struct drm_display_mode *mode)
 {
 	struct drm_psb_private *dev_priv = connector->dev->dev_private;
-	struct psb_intel_encoder *psb_intel_encoder =
-					psb_intel_attached_encoder(connector);
+	struct gma_encoder *gma_encoder = gma_attached_encoder(connector);
 	struct drm_display_mode *fixed_mode =
 					dev_priv->mode_dev.panel_fixed_mode;
 
-	if (psb_intel_encoder->type == INTEL_OUTPUT_MIPI2)
+	if (gma_encoder->type == INTEL_OUTPUT_MIPI2)
 		fixed_mode = dev_priv->mode_dev.panel_fixed_mode2;
 
 	/* just in case */
@@ -381,22 +378,20 @@
 	struct drm_device *dev = encoder->dev;
 	struct drm_psb_private *dev_priv = dev->dev_private;
 	struct psb_intel_mode_device *mode_dev = &dev_priv->mode_dev;
-	struct psb_intel_crtc *psb_intel_crtc =
-				to_psb_intel_crtc(encoder->crtc);
+	struct gma_crtc *gma_crtc = to_gma_crtc(encoder->crtc);
 	struct drm_encoder *tmp_encoder;
 	struct drm_display_mode *panel_fixed_mode = mode_dev->panel_fixed_mode;
-	struct psb_intel_encoder *psb_intel_encoder =
-						to_psb_intel_encoder(encoder);
+	struct gma_encoder *gma_encoder = to_gma_encoder(encoder);
 
-	if (psb_intel_encoder->type == INTEL_OUTPUT_MIPI2)
+	if (gma_encoder->type == INTEL_OUTPUT_MIPI2)
 		panel_fixed_mode = mode_dev->panel_fixed_mode2;
 
 	/* PSB requires the LVDS is on pipe B, MRST has only one pipe anyway */
-	if (!IS_MRST(dev) && psb_intel_crtc->pipe == 0) {
+	if (!IS_MRST(dev) && gma_crtc->pipe == 0) {
 		printk(KERN_ERR "Can't support LVDS on pipe A\n");
 		return false;
 	}
-	if (IS_MRST(dev) && psb_intel_crtc->pipe != 0) {
+	if (IS_MRST(dev) && gma_crtc->pipe != 0) {
 		printk(KERN_ERR "Must use PIPE A\n");
 		return false;
 	}
@@ -525,9 +520,8 @@
 	struct drm_device *dev = connector->dev;
 	struct drm_psb_private *dev_priv = dev->dev_private;
 	struct psb_intel_mode_device *mode_dev = &dev_priv->mode_dev;
-	struct psb_intel_encoder *psb_intel_encoder =
-					psb_intel_attached_encoder(connector);
-	struct psb_intel_lvds_priv *lvds_priv = psb_intel_encoder->dev_priv;
+	struct gma_encoder *gma_encoder = gma_attached_encoder(connector);
+	struct psb_intel_lvds_priv *lvds_priv = gma_encoder->dev_priv;
 	int ret = 0;
 
 	if (!IS_MRST(dev))
@@ -564,9 +558,8 @@
  */
 void psb_intel_lvds_destroy(struct drm_connector *connector)
 {
-	struct psb_intel_encoder *psb_intel_encoder =
-					psb_intel_attached_encoder(connector);
-	struct psb_intel_lvds_priv *lvds_priv = psb_intel_encoder->dev_priv;
+	struct gma_encoder *gma_encoder = gma_attached_encoder(connector);
+	struct psb_intel_lvds_priv *lvds_priv = gma_encoder->dev_priv;
 
 	if (lvds_priv->ddc_bus)
 		psb_intel_i2c_destroy(lvds_priv->ddc_bus);
@@ -585,8 +578,7 @@
 		return -1;
 
 	if (!strcmp(property->name, "scaling mode")) {
-		struct psb_intel_crtc *crtc =
-					to_psb_intel_crtc(encoder->crtc);
+		struct gma_crtc *crtc = to_gma_crtc(encoder->crtc);
 		uint64_t curval;
 
 		if (!crtc)
@@ -656,7 +648,7 @@
 				psb_intel_lvds_connector_helper_funcs = {
 	.get_modes = psb_intel_lvds_get_modes,
 	.mode_valid = psb_intel_lvds_mode_valid,
-	.best_encoder = psb_intel_best_encoder,
+	.best_encoder = gma_best_encoder,
 };
 
 const struct drm_connector_funcs psb_intel_lvds_connector_funcs = {
@@ -691,8 +683,8 @@
 void psb_intel_lvds_init(struct drm_device *dev,
 			 struct psb_intel_mode_device *mode_dev)
 {
-	struct psb_intel_encoder *psb_intel_encoder;
-	struct psb_intel_connector *psb_intel_connector;
+	struct gma_encoder *gma_encoder;
+	struct gma_connector *gma_connector;
 	struct psb_intel_lvds_priv *lvds_priv;
 	struct drm_connector *connector;
 	struct drm_encoder *encoder;
@@ -702,17 +694,15 @@
 	u32 lvds;
 	int pipe;
 
-	psb_intel_encoder =
-			kzalloc(sizeof(struct psb_intel_encoder), GFP_KERNEL);
-	if (!psb_intel_encoder) {
-		dev_err(dev->dev, "psb_intel_encoder allocation error\n");
+	gma_encoder = kzalloc(sizeof(struct gma_encoder), GFP_KERNEL);
+	if (!gma_encoder) {
+		dev_err(dev->dev, "gma_encoder allocation error\n");
 		return;
 	}
 
-	psb_intel_connector =
-		kzalloc(sizeof(struct psb_intel_connector), GFP_KERNEL);
-	if (!psb_intel_connector) {
-		dev_err(dev->dev, "psb_intel_connector allocation error\n");
+	gma_connector = kzalloc(sizeof(struct gma_connector), GFP_KERNEL);
+	if (!gma_connector) {
+		dev_err(dev->dev, "gma_connector allocation error\n");
 		goto failed_encoder;
 	}
 
@@ -722,10 +712,10 @@
 		goto failed_connector;
 	}
 
-	psb_intel_encoder->dev_priv = lvds_priv;
+	gma_encoder->dev_priv = lvds_priv;
 
-	connector = &psb_intel_connector->base;
-	encoder = &psb_intel_encoder->base;
+	connector = &gma_connector->base;
+	encoder = &gma_encoder->base;
 	drm_connector_init(dev, connector,
 			   &psb_intel_lvds_connector_funcs,
 			   DRM_MODE_CONNECTOR_LVDS);
@@ -734,9 +724,8 @@
 			 &psb_intel_lvds_enc_funcs,
 			 DRM_MODE_ENCODER_LVDS);
 
-	psb_intel_connector_attach_encoder(psb_intel_connector,
-					   psb_intel_encoder);
-	psb_intel_encoder->type = INTEL_OUTPUT_LVDS;
+	gma_connector_attach_encoder(gma_connector, gma_encoder);
+	gma_encoder->type = INTEL_OUTPUT_LVDS;
 
 	drm_encoder_helper_add(encoder, &psb_intel_lvds_helper_funcs);
 	drm_connector_helper_add(connector,
@@ -851,8 +840,8 @@
 	drm_encoder_cleanup(encoder);
 	drm_connector_cleanup(connector);
 failed_connector:
-	kfree(psb_intel_connector);
+	kfree(gma_connector);
 failed_encoder:
-	kfree(psb_intel_encoder);
+	kfree(gma_encoder);
 }
 
diff --git a/drivers/gpu/drm/gma500/psb_intel_sdvo.c b/drivers/gpu/drm/gma500/psb_intel_sdvo.c
index 19e3660..6f01cdf 100644
--- a/drivers/gpu/drm/gma500/psb_intel_sdvo.c
+++ b/drivers/gpu/drm/gma500/psb_intel_sdvo.c
@@ -65,7 +65,7 @@
 #define TV_FORMAT_NUM  (sizeof(tv_format_names) / sizeof(*tv_format_names))
 
 struct psb_intel_sdvo {
-	struct psb_intel_encoder base;
+	struct gma_encoder base;
 
 	struct i2c_adapter *i2c;
 	u8 slave_addr;
@@ -140,7 +140,7 @@
 };
 
 struct psb_intel_sdvo_connector {
-	struct psb_intel_connector base;
+	struct gma_connector base;
 
 	/* Mark the type of connector */
 	uint16_t output_flag;
@@ -200,13 +200,13 @@
 
 static struct psb_intel_sdvo *intel_attached_sdvo(struct drm_connector *connector)
 {
-	return container_of(psb_intel_attached_encoder(connector),
+	return container_of(gma_attached_encoder(connector),
 			    struct psb_intel_sdvo, base);
 }
 
 static struct psb_intel_sdvo_connector *to_psb_intel_sdvo_connector(struct drm_connector *connector)
 {
-	return container_of(to_psb_intel_connector(connector), struct psb_intel_sdvo_connector, base);
+	return container_of(to_gma_connector(connector), struct psb_intel_sdvo_connector, base);
 }
 
 static bool
@@ -500,7 +500,8 @@
 				  &status))
 		goto log_fail;
 
-	while (status == SDVO_CMD_STATUS_PENDING && retry--) {
+	while ((status == SDVO_CMD_STATUS_PENDING ||
+		status == SDVO_CMD_STATUS_TARGET_NOT_SPECIFIED) && retry--) {
 		udelay(15);
 		if (!psb_intel_sdvo_read_byte(psb_intel_sdvo,
 					  SDVO_I2C_CMD_STATUS,
@@ -987,7 +988,7 @@
 {
 	struct drm_device *dev = encoder->dev;
 	struct drm_crtc *crtc = encoder->crtc;
-	struct psb_intel_crtc *psb_intel_crtc = to_psb_intel_crtc(crtc);
+	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
 	struct psb_intel_sdvo *psb_intel_sdvo = to_psb_intel_sdvo(encoder);
 	u32 sdvox;
 	struct psb_intel_sdvo_in_out_map in_out;
@@ -1070,7 +1071,7 @@
 	}
 	sdvox |= (9 << 19) | SDVO_BORDER_ENABLE;
 
-	if (psb_intel_crtc->pipe == 1)
+	if (gma_crtc->pipe == 1)
 		sdvox |= SDVO_PIPE_B_SELECT;
 	if (psb_intel_sdvo->has_hdmi_audio)
 		sdvox |= SDVO_AUDIO_ENABLE;
@@ -1121,7 +1122,7 @@
 		if ((temp & SDVO_ENABLE) == 0)
 			psb_intel_sdvo_write_sdvox(psb_intel_sdvo, temp | SDVO_ENABLE);
 		for (i = 0; i < 2; i++)
-			psb_intel_wait_for_vblank(dev);
+			gma_wait_for_vblank(dev);
 
 		status = psb_intel_sdvo_get_trained_inputs(psb_intel_sdvo, &input1, &input2);
 		/* Warn if the device reported failure to sync.
@@ -1836,10 +1837,8 @@
 static void psb_intel_sdvo_save(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
-	struct psb_intel_encoder *psb_intel_encoder =
-					psb_intel_attached_encoder(connector);
-	struct psb_intel_sdvo *sdvo =
-				to_psb_intel_sdvo(&psb_intel_encoder->base);
+	struct gma_encoder *gma_encoder = gma_attached_encoder(connector);
+	struct psb_intel_sdvo *sdvo = to_psb_intel_sdvo(&gma_encoder->base);
 
 	sdvo->saveSDVO = REG_READ(sdvo->sdvo_reg);
 }
@@ -1847,8 +1846,7 @@
 static void psb_intel_sdvo_restore(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
-	struct drm_encoder *encoder =
-				&psb_intel_attached_encoder(connector)->base;
+	struct drm_encoder *encoder = &gma_attached_encoder(connector)->base;
 	struct psb_intel_sdvo *sdvo = to_psb_intel_sdvo(encoder);
 	struct drm_crtc *crtc = encoder->crtc;
 
@@ -1864,9 +1862,9 @@
 static const struct drm_encoder_helper_funcs psb_intel_sdvo_helper_funcs = {
 	.dpms = psb_intel_sdvo_dpms,
 	.mode_fixup = psb_intel_sdvo_mode_fixup,
-	.prepare = psb_intel_encoder_prepare,
+	.prepare = gma_encoder_prepare,
 	.mode_set = psb_intel_sdvo_mode_set,
-	.commit = psb_intel_encoder_commit,
+	.commit = gma_encoder_commit,
 };
 
 static const struct drm_connector_funcs psb_intel_sdvo_connector_funcs = {
@@ -1882,7 +1880,7 @@
 static const struct drm_connector_helper_funcs psb_intel_sdvo_connector_helper_funcs = {
 	.get_modes = psb_intel_sdvo_get_modes,
 	.mode_valid = psb_intel_sdvo_mode_valid,
-	.best_encoder = psb_intel_best_encoder,
+	.best_encoder = gma_best_encoder,
 };
 
 static void psb_intel_sdvo_enc_destroy(struct drm_encoder *encoder)
@@ -1894,7 +1892,7 @@
 				 psb_intel_sdvo->sdvo_lvds_fixed_mode);
 
 	i2c_del_adapter(&psb_intel_sdvo->ddc);
-	psb_intel_encoder_destroy(encoder);
+	gma_encoder_destroy(encoder);
 }
 
 static const struct drm_encoder_funcs psb_intel_sdvo_enc_funcs = {
@@ -2055,7 +2053,7 @@
 	connector->base.base.doublescan_allowed = 0;
 	connector->base.base.display_info.subpixel_order = SubPixelHorizontalRGB;
 
-	psb_intel_connector_attach_encoder(&connector->base, &encoder->base);
+	gma_connector_attach_encoder(&connector->base, &encoder->base);
 	drm_sysfs_connector_add(&connector->base.base);
 }
 
@@ -2075,7 +2073,7 @@
 {
 	struct drm_encoder *encoder = &psb_intel_sdvo->base.base;
 	struct drm_connector *connector;
-	struct psb_intel_connector *intel_connector;
+	struct gma_connector *intel_connector;
 	struct psb_intel_sdvo_connector *psb_intel_sdvo_connector;
 
 	psb_intel_sdvo_connector = kzalloc(sizeof(struct psb_intel_sdvo_connector), GFP_KERNEL);
@@ -2115,7 +2113,7 @@
 {
 	struct drm_encoder *encoder = &psb_intel_sdvo->base.base;
 	struct drm_connector *connector;
-	struct psb_intel_connector *intel_connector;
+	struct gma_connector *intel_connector;
 	struct psb_intel_sdvo_connector *psb_intel_sdvo_connector;
 
 	psb_intel_sdvo_connector = kzalloc(sizeof(struct psb_intel_sdvo_connector), GFP_KERNEL);
@@ -2154,7 +2152,7 @@
 {
 	struct drm_encoder *encoder = &psb_intel_sdvo->base.base;
 	struct drm_connector *connector;
-	struct psb_intel_connector *intel_connector;
+	struct gma_connector *intel_connector;
 	struct psb_intel_sdvo_connector *psb_intel_sdvo_connector;
 
 	psb_intel_sdvo_connector = kzalloc(sizeof(struct psb_intel_sdvo_connector), GFP_KERNEL);
@@ -2188,7 +2186,7 @@
 {
 	struct drm_encoder *encoder = &psb_intel_sdvo->base.base;
 	struct drm_connector *connector;
-	struct psb_intel_connector *intel_connector;
+	struct gma_connector *intel_connector;
 	struct psb_intel_sdvo_connector *psb_intel_sdvo_connector;
 
 	psb_intel_sdvo_connector = kzalloc(sizeof(struct psb_intel_sdvo_connector), GFP_KERNEL);
@@ -2540,7 +2538,7 @@
 bool psb_intel_sdvo_init(struct drm_device *dev, int sdvo_reg)
 {
 	struct drm_psb_private *dev_priv = dev->dev_private;
-	struct psb_intel_encoder *psb_intel_encoder;
+	struct gma_encoder *gma_encoder;
 	struct psb_intel_sdvo *psb_intel_sdvo;
 	int i;
 
@@ -2557,9 +2555,9 @@
 	}
 
 	/* encoder type will be decided later */
-	psb_intel_encoder = &psb_intel_sdvo->base;
-	psb_intel_encoder->type = INTEL_OUTPUT_SDVO;
-	drm_encoder_init(dev, &psb_intel_encoder->base, &psb_intel_sdvo_enc_funcs, 0);
+	gma_encoder = &psb_intel_sdvo->base;
+	gma_encoder->type = INTEL_OUTPUT_SDVO;
+	drm_encoder_init(dev, &gma_encoder->base, &psb_intel_sdvo_enc_funcs, 0);
 
 	/* Read the regs to test if we can talk to the device */
 	for (i = 0; i < 0x40; i++) {
@@ -2577,7 +2575,7 @@
 	else
 		dev_priv->hotplug_supported_mask |= SDVOC_HOTPLUG_INT_STATUS;
 
-	drm_encoder_helper_add(&psb_intel_encoder->base, &psb_intel_sdvo_helper_funcs);
+	drm_encoder_helper_add(&gma_encoder->base, &psb_intel_sdvo_helper_funcs);
 
 	/* In default case sdvo lvds is false */
 	if (!psb_intel_sdvo_get_capabilities(psb_intel_sdvo, &psb_intel_sdvo->caps))
@@ -2620,7 +2618,7 @@
 	return true;
 
 err:
-	drm_encoder_cleanup(&psb_intel_encoder->base);
+	drm_encoder_cleanup(&gma_encoder->base);
 	i2c_del_adapter(&psb_intel_sdvo->ddc);
 	kfree(psb_intel_sdvo);
 
diff --git a/drivers/gpu/drm/i2c/tda998x_drv.c b/drivers/gpu/drm/i2c/tda998x_drv.c
index e68b58a..c2bd711 100644
--- a/drivers/gpu/drm/i2c/tda998x_drv.c
+++ b/drivers/gpu/drm/i2c/tda998x_drv.c
@@ -23,7 +23,7 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_encoder_slave.h>
 #include <drm/drm_edid.h>
-
+#include <drm/i2c/tda998x.h>
 
 #define DBG(fmt, ...) DRM_DEBUG(fmt"\n", ##__VA_ARGS__)
 
@@ -32,6 +32,11 @@
 	uint16_t rev;
 	uint8_t current_page;
 	int dpms;
+	bool is_hdmi_sink;
+	u8 vip_cntrl_0;
+	u8 vip_cntrl_1;
+	u8 vip_cntrl_2;
+	struct tda998x_encoder_params params;
 };
 
 #define to_tda998x_priv(x)  ((struct tda998x_priv *)to_encoder_slave(x)->slave_priv)
@@ -68,10 +73,13 @@
 # define I2C_MASTER_DIS_MM        (1 << 0)
 # define I2C_MASTER_DIS_FILT      (1 << 1)
 # define I2C_MASTER_APP_STRT_LAT  (1 << 2)
+#define REG_FEAT_POWERDOWN        REG(0x00, 0x0e)     /* read/write */
+# define FEAT_POWERDOWN_SPDIF     (1 << 3)
 #define REG_INT_FLAGS_0           REG(0x00, 0x0f)     /* read/write */
 #define REG_INT_FLAGS_1           REG(0x00, 0x10)     /* read/write */
 #define REG_INT_FLAGS_2           REG(0x00, 0x11)     /* read/write */
 # define INT_FLAGS_2_EDID_BLK_RD  (1 << 1)
+#define REG_ENA_ACLK              REG(0x00, 0x16)     /* read/write */
 #define REG_ENA_VP_0              REG(0x00, 0x18)     /* read/write */
 #define REG_ENA_VP_1              REG(0x00, 0x19)     /* read/write */
 #define REG_ENA_VP_2              REG(0x00, 0x1a)     /* read/write */
@@ -110,6 +118,8 @@
 #define REG_VIP_CNTRL_5           REG(0x00, 0x25)     /* write */
 # define VIP_CNTRL_5_CKCASE       (1 << 0)
 # define VIP_CNTRL_5_SP_CNT(x)    (((x) & 3) << 1)
+#define REG_MUX_AP                REG(0x00, 0x26)     /* read/write */
+#define REG_MUX_VP_VIP_OUT        REG(0x00, 0x27)     /* read/write */
 #define REG_MAT_CONTRL            REG(0x00, 0x80)     /* write */
 # define MAT_CONTRL_MAT_SC(x)     (((x) & 3) << 0)
 # define MAT_CONTRL_MAT_BP        (1 << 2)
@@ -130,8 +140,12 @@
 #define REG_VS_LINE_END_1_LSB     REG(0x00, 0xae)     /* write */
 #define REG_VS_PIX_END_1_MSB      REG(0x00, 0xaf)     /* write */
 #define REG_VS_PIX_END_1_LSB      REG(0x00, 0xb0)     /* write */
+#define REG_VS_LINE_STRT_2_MSB    REG(0x00, 0xb1)     /* write */
+#define REG_VS_LINE_STRT_2_LSB    REG(0x00, 0xb2)     /* write */
 #define REG_VS_PIX_STRT_2_MSB     REG(0x00, 0xb3)     /* write */
 #define REG_VS_PIX_STRT_2_LSB     REG(0x00, 0xb4)     /* write */
+#define REG_VS_LINE_END_2_MSB     REG(0x00, 0xb5)     /* write */
+#define REG_VS_LINE_END_2_LSB     REG(0x00, 0xb6)     /* write */
 #define REG_VS_PIX_END_2_MSB      REG(0x00, 0xb7)     /* write */
 #define REG_VS_PIX_END_2_LSB      REG(0x00, 0xb8)     /* write */
 #define REG_HS_PIX_START_MSB      REG(0x00, 0xb9)     /* write */
@@ -142,21 +156,29 @@
 #define REG_VWIN_START_1_LSB      REG(0x00, 0xbe)     /* write */
 #define REG_VWIN_END_1_MSB        REG(0x00, 0xbf)     /* write */
 #define REG_VWIN_END_1_LSB        REG(0x00, 0xc0)     /* write */
+#define REG_VWIN_START_2_MSB      REG(0x00, 0xc1)     /* write */
+#define REG_VWIN_START_2_LSB      REG(0x00, 0xc2)     /* write */
+#define REG_VWIN_END_2_MSB        REG(0x00, 0xc3)     /* write */
+#define REG_VWIN_END_2_LSB        REG(0x00, 0xc4)     /* write */
 #define REG_DE_START_MSB          REG(0x00, 0xc5)     /* write */
 #define REG_DE_START_LSB          REG(0x00, 0xc6)     /* write */
 #define REG_DE_STOP_MSB           REG(0x00, 0xc7)     /* write */
 #define REG_DE_STOP_LSB           REG(0x00, 0xc8)     /* write */
 #define REG_TBG_CNTRL_0           REG(0x00, 0xca)     /* write */
+# define TBG_CNTRL_0_TOP_TGL      (1 << 0)
+# define TBG_CNTRL_0_TOP_SEL      (1 << 1)
+# define TBG_CNTRL_0_DE_EXT       (1 << 2)
+# define TBG_CNTRL_0_TOP_EXT      (1 << 3)
 # define TBG_CNTRL_0_FRAME_DIS    (1 << 5)
 # define TBG_CNTRL_0_SYNC_MTHD    (1 << 6)
 # define TBG_CNTRL_0_SYNC_ONCE    (1 << 7)
 #define REG_TBG_CNTRL_1           REG(0x00, 0xcb)     /* write */
-# define TBG_CNTRL_1_VH_TGL_0     (1 << 0)
-# define TBG_CNTRL_1_VH_TGL_1     (1 << 1)
-# define TBG_CNTRL_1_VH_TGL_2     (1 << 2)
-# define TBG_CNTRL_1_VHX_EXT_DE   (1 << 3)
-# define TBG_CNTRL_1_VHX_EXT_HS   (1 << 4)
-# define TBG_CNTRL_1_VHX_EXT_VS   (1 << 5)
+# define TBG_CNTRL_1_H_TGL        (1 << 0)
+# define TBG_CNTRL_1_V_TGL        (1 << 1)
+# define TBG_CNTRL_1_TGL_EN       (1 << 2)
+# define TBG_CNTRL_1_X_EXT        (1 << 3)
+# define TBG_CNTRL_1_H_EXT        (1 << 4)
+# define TBG_CNTRL_1_V_EXT        (1 << 5)
 # define TBG_CNTRL_1_DWIN_DIS     (1 << 6)
 #define REG_ENABLE_SPACE          REG(0x00, 0xd6)     /* write */
 #define REG_HVF_CNTRL_0           REG(0x00, 0xe4)     /* write */
@@ -171,6 +193,12 @@
 # define HVF_CNTRL_1_PAD(x)       (((x) & 3) << 4)
 # define HVF_CNTRL_1_SEMI_PLANAR  (1 << 6)
 #define REG_RPT_CNTRL             REG(0x00, 0xf0)     /* write */
+#define REG_I2S_FORMAT            REG(0x00, 0xfc)     /* read/write */
+# define I2S_FORMAT(x)            (((x) & 3) << 0)
+#define REG_AIP_CLKSEL            REG(0x00, 0xfd)     /* write */
+# define AIP_CLKSEL_FS(x)         (((x) & 3) << 0)
+# define AIP_CLKSEL_CLK_POL(x)    (((x) & 1) << 2)
+# define AIP_CLKSEL_AIP(x)        (((x) & 7) << 3)
 
 
 /* Page 02h: PLL settings */
@@ -194,6 +222,12 @@
 #define REG_PLL_SCGR1             REG(0x02, 0x09)     /* read/write */
 #define REG_PLL_SCGR2             REG(0x02, 0x0a)     /* read/write */
 #define REG_AUDIO_DIV             REG(0x02, 0x0e)     /* read/write */
+# define AUDIO_DIV_SERCLK_1       0
+# define AUDIO_DIV_SERCLK_2       1
+# define AUDIO_DIV_SERCLK_4       2
+# define AUDIO_DIV_SERCLK_8       3
+# define AUDIO_DIV_SERCLK_16      4
+# define AUDIO_DIV_SERCLK_32      5
 #define REG_SEL_CLK               REG(0x02, 0x11)     /* read/write */
 # define SEL_CLK_SEL_CLK1         (1 << 0)
 # define SEL_CLK_SEL_VRF_CLK(x)   (((x) & 3) << 1)
@@ -212,6 +246,11 @@
 
 
 /* Page 10h: information frames and packets */
+#define REG_IF1_HB0               REG(0x10, 0x20)     /* read/write */
+#define REG_IF2_HB0               REG(0x10, 0x40)     /* read/write */
+#define REG_IF3_HB0               REG(0x10, 0x60)     /* read/write */
+#define REG_IF4_HB0               REG(0x10, 0x80)     /* read/write */
+#define REG_IF5_HB0               REG(0x10, 0xa0)     /* read/write */
 
 
 /* Page 11h: audio settings and content info packets */
@@ -221,14 +260,39 @@
 # define AIP_CNTRL_0_LAYOUT       (1 << 2)
 # define AIP_CNTRL_0_ACR_MAN      (1 << 5)
 # define AIP_CNTRL_0_RST_CTS      (1 << 6)
+#define REG_CA_I2S                REG(0x11, 0x01)     /* read/write */
+# define CA_I2S_CA_I2S(x)         (((x) & 31) << 0)
+# define CA_I2S_HBR_CHSTAT        (1 << 6)
+#define REG_LATENCY_RD            REG(0x11, 0x04)     /* read/write */
+#define REG_ACR_CTS_0             REG(0x11, 0x05)     /* read/write */
+#define REG_ACR_CTS_1             REG(0x11, 0x06)     /* read/write */
+#define REG_ACR_CTS_2             REG(0x11, 0x07)     /* read/write */
+#define REG_ACR_N_0               REG(0x11, 0x08)     /* read/write */
+#define REG_ACR_N_1               REG(0x11, 0x09)     /* read/write */
+#define REG_ACR_N_2               REG(0x11, 0x0a)     /* read/write */
+#define REG_CTS_N                 REG(0x11, 0x0c)     /* read/write */
+# define CTS_N_K(x)               (((x) & 7) << 0)
+# define CTS_N_M(x)               (((x) & 3) << 4)
 #define REG_ENC_CNTRL             REG(0x11, 0x0d)     /* read/write */
 # define ENC_CNTRL_RST_ENC        (1 << 0)
 # define ENC_CNTRL_RST_SEL        (1 << 1)
 # define ENC_CNTRL_CTL_CODE(x)    (((x) & 3) << 2)
+#define REG_DIP_FLAGS             REG(0x11, 0x0e)     /* read/write */
+# define DIP_FLAGS_ACR            (1 << 0)
+# define DIP_FLAGS_GC             (1 << 1)
+#define REG_DIP_IF_FLAGS          REG(0x11, 0x0f)     /* read/write */
+# define DIP_IF_FLAGS_IF1         (1 << 1)
+# define DIP_IF_FLAGS_IF2         (1 << 2)
+# define DIP_IF_FLAGS_IF3         (1 << 3)
+# define DIP_IF_FLAGS_IF4         (1 << 4)
+# define DIP_IF_FLAGS_IF5         (1 << 5)
+#define REG_CH_STAT_B(x)          REG(0x11, 0x14 + (x)) /* read/write */
 
 
 /* Page 12h: HDCP and OTP */
 #define REG_TX3                   REG(0x12, 0x9a)     /* read/write */
+#define REG_TX4                   REG(0x12, 0x9b)     /* read/write */
+# define TX4_PD_RAM               (1 << 1)
 #define REG_TX33                  REG(0x12, 0xb8)     /* read/write */
 # define TX33_HDMI                (1 << 1)
 
@@ -338,6 +402,23 @@
 	return ret;
 }
 
+static void
+reg_write_range(struct drm_encoder *encoder, uint16_t reg, uint8_t *p, int cnt)
+{
+	struct i2c_client *client = drm_i2c_encoder_get_client(encoder);
+	uint8_t buf[cnt+1];
+	int ret;
+
+	buf[0] = REG2ADDR(reg);
+	memcpy(&buf[1], p, cnt);
+
+	set_page(encoder, reg);
+
+	ret = i2c_master_send(client, buf, cnt + 1);
+	if (ret < 0)
+		dev_err(&client->dev, "Error %d writing to 0x%x\n", ret, reg);
+}
+
 static uint8_t
 reg_read(struct drm_encoder *encoder, uint16_t reg)
 {
@@ -406,13 +487,172 @@
 	reg_write(encoder, REG_SERIALIZER,   0x00);
 	reg_write(encoder, REG_BUFFER_OUT,   0x00);
 	reg_write(encoder, REG_PLL_SCG1,     0x00);
-	reg_write(encoder, REG_AUDIO_DIV,    0x03);
+	reg_write(encoder, REG_AUDIO_DIV,    AUDIO_DIV_SERCLK_8);
 	reg_write(encoder, REG_SEL_CLK,      SEL_CLK_SEL_CLK1 | SEL_CLK_ENA_SC_CLK);
 	reg_write(encoder, REG_PLL_SCGN1,    0xfa);
 	reg_write(encoder, REG_PLL_SCGN2,    0x00);
 	reg_write(encoder, REG_PLL_SCGR1,    0x5b);
 	reg_write(encoder, REG_PLL_SCGR2,    0x00);
 	reg_write(encoder, REG_PLL_SCG2,     0x10);
+
+	/* Write the default value MUX register */
+	reg_write(encoder, REG_MUX_VP_VIP_OUT, 0x24);
+}
+
+static uint8_t tda998x_cksum(uint8_t *buf, size_t bytes)
+{
+	uint8_t sum = 0;
+
+	while (bytes--)
+		sum += *buf++;
+	return (255 - sum) + 1;
+}
+
+#define HB(x) (x)
+#define PB(x) (HB(2) + 1 + (x))
+
+static void
+tda998x_write_if(struct drm_encoder *encoder, uint8_t bit, uint16_t addr,
+		 uint8_t *buf, size_t size)
+{
+	buf[PB(0)] = tda998x_cksum(buf, size);
+
+	reg_clear(encoder, REG_DIP_IF_FLAGS, bit);
+	reg_write_range(encoder, addr, buf, size);
+	reg_set(encoder, REG_DIP_IF_FLAGS, bit);
+}
+
+static void
+tda998x_write_aif(struct drm_encoder *encoder, struct tda998x_encoder_params *p)
+{
+	uint8_t buf[PB(5) + 1];
+
+	buf[HB(0)] = 0x84;
+	buf[HB(1)] = 0x01;
+	buf[HB(2)] = 10;
+	buf[PB(0)] = 0;
+	buf[PB(1)] = p->audio_frame[1] & 0x07; /* CC */
+	buf[PB(2)] = p->audio_frame[2] & 0x1c; /* SF */
+	buf[PB(4)] = p->audio_frame[4];
+	buf[PB(5)] = p->audio_frame[5] & 0xf8; /* DM_INH + LSV */
+
+	tda998x_write_if(encoder, DIP_IF_FLAGS_IF4, REG_IF4_HB0, buf,
+			 sizeof(buf));
+}
+
+static void
+tda998x_write_avi(struct drm_encoder *encoder, struct drm_display_mode *mode)
+{
+	uint8_t buf[PB(13) + 1];
+
+	memset(buf, 0, sizeof(buf));
+	buf[HB(0)] = 0x82;
+	buf[HB(1)] = 0x02;
+	buf[HB(2)] = 13;
+	buf[PB(4)] = drm_match_cea_mode(mode);
+
+	tda998x_write_if(encoder, DIP_IF_FLAGS_IF2, REG_IF2_HB0, buf,
+			 sizeof(buf));
+}
+
+static void tda998x_audio_mute(struct drm_encoder *encoder, bool on)
+{
+	if (on) {
+		reg_set(encoder, REG_SOFTRESET, SOFTRESET_AUDIO);
+		reg_clear(encoder, REG_SOFTRESET, SOFTRESET_AUDIO);
+		reg_set(encoder, REG_AIP_CNTRL_0, AIP_CNTRL_0_RST_FIFO);
+	} else {
+		reg_clear(encoder, REG_AIP_CNTRL_0, AIP_CNTRL_0_RST_FIFO);
+	}
+}
+
+static void
+tda998x_configure_audio(struct drm_encoder *encoder,
+		struct drm_display_mode *mode, struct tda998x_encoder_params *p)
+{
+	uint8_t buf[6], clksel_aip, clksel_fs, ca_i2s, cts_n, adiv;
+	uint32_t n;
+
+	/* Enable audio ports */
+	reg_write(encoder, REG_ENA_AP, p->audio_cfg);
+	reg_write(encoder, REG_ENA_ACLK, p->audio_clk_cfg);
+
+	/* Set audio input source */
+	switch (p->audio_format) {
+	case AFMT_SPDIF:
+		reg_write(encoder, REG_MUX_AP, 0x40);
+		clksel_aip = AIP_CLKSEL_AIP(0);
+		/* FS64SPDIF */
+		clksel_fs = AIP_CLKSEL_FS(2);
+		cts_n = CTS_N_M(3) | CTS_N_K(3);
+		ca_i2s = 0;
+		break;
+
+	case AFMT_I2S:
+		reg_write(encoder, REG_MUX_AP, 0x64);
+		clksel_aip = AIP_CLKSEL_AIP(1);
+		/* ACLK */
+		clksel_fs = AIP_CLKSEL_FS(0);
+		cts_n = CTS_N_M(3) | CTS_N_K(3);
+		ca_i2s = CA_I2S_CA_I2S(0);
+		break;
+	}
+
+	reg_write(encoder, REG_AIP_CLKSEL, clksel_aip);
+	reg_clear(encoder, REG_AIP_CNTRL_0, AIP_CNTRL_0_LAYOUT);
+
+	/* Enable automatic CTS generation */
+	reg_clear(encoder, REG_AIP_CNTRL_0, AIP_CNTRL_0_ACR_MAN);
+	reg_write(encoder, REG_CTS_N, cts_n);
+
+	/*
+	 * Audio input somehow depends on HDMI line rate which is
+	 * related to pixclk. Testing showed that modes with pixclk
+	 * >100MHz need a larger divider while <40MHz need the default.
+	 * There is no detailed info in the datasheet, so we just
+	 * assume 100MHz requires larger divider.
+	 */
+	if (mode->clock > 100000)
+		adiv = AUDIO_DIV_SERCLK_16;
+	else
+		adiv = AUDIO_DIV_SERCLK_8;
+	reg_write(encoder, REG_AUDIO_DIV, adiv);
+
+	/*
+	 * This is the approximate value of N, which happens to be
+	 * the recommended values for non-coherent clocks.
+	 */
+	n = 128 * p->audio_sample_rate / 1000;
+
+	/* Write the CTS and N values */
+	buf[0] = 0x44;
+	buf[1] = 0x42;
+	buf[2] = 0x01;
+	buf[3] = n;
+	buf[4] = n >> 8;
+	buf[5] = n >> 16;
+	reg_write_range(encoder, REG_ACR_CTS_0, buf, 6);
+
+	/* Set CTS clock reference */
+	reg_write(encoder, REG_AIP_CLKSEL, clksel_aip | clksel_fs);
+
+	/* Reset CTS generator */
+	reg_set(encoder, REG_AIP_CNTRL_0, AIP_CNTRL_0_RST_CTS);
+	reg_clear(encoder, REG_AIP_CNTRL_0, AIP_CNTRL_0_RST_CTS);
+
+	/* Write the channel status */
+	buf[0] = 0x04;
+	buf[1] = 0x00;
+	buf[2] = 0x00;
+	buf[3] = 0xf1;
+	reg_write_range(encoder, REG_CH_STAT_B(0), buf, 4);
+
+	tda998x_audio_mute(encoder, true);
+	mdelay(20);
+	tda998x_audio_mute(encoder, false);
+
+	/* Write the audio information packet */
+	tda998x_write_aif(encoder, p);
 }
 
 /* DRM encoder functions */
@@ -420,6 +660,23 @@
 static void
 tda998x_encoder_set_config(struct drm_encoder *encoder, void *params)
 {
+	struct tda998x_priv *priv = to_tda998x_priv(encoder);
+	struct tda998x_encoder_params *p = params;
+
+	priv->vip_cntrl_0 = VIP_CNTRL_0_SWAP_A(p->swap_a) |
+			    (p->mirr_a ? VIP_CNTRL_0_MIRR_A : 0) |
+			    VIP_CNTRL_0_SWAP_B(p->swap_b) |
+			    (p->mirr_b ? VIP_CNTRL_0_MIRR_B : 0);
+	priv->vip_cntrl_1 = VIP_CNTRL_1_SWAP_C(p->swap_c) |
+			    (p->mirr_c ? VIP_CNTRL_1_MIRR_C : 0) |
+			    VIP_CNTRL_1_SWAP_D(p->swap_d) |
+			    (p->mirr_d ? VIP_CNTRL_1_MIRR_D : 0);
+	priv->vip_cntrl_2 = VIP_CNTRL_2_SWAP_E(p->swap_e) |
+			    (p->mirr_e ? VIP_CNTRL_2_MIRR_E : 0) |
+			    VIP_CNTRL_2_SWAP_F(p->swap_f) |
+			    (p->mirr_f ? VIP_CNTRL_2_MIRR_F : 0);
+
+	priv->params = *p;
 }
 
 static void
@@ -436,18 +693,14 @@
 
 	switch (mode) {
 	case DRM_MODE_DPMS_ON:
-		/* enable audio and video ports */
-		reg_write(encoder, REG_ENA_AP, 0xff);
+		/* enable video ports, audio will be enabled later */
 		reg_write(encoder, REG_ENA_VP_0, 0xff);
 		reg_write(encoder, REG_ENA_VP_1, 0xff);
 		reg_write(encoder, REG_ENA_VP_2, 0xff);
 		/* set muxing after enabling ports: */
-		reg_write(encoder, REG_VIP_CNTRL_0,
-				VIP_CNTRL_0_SWAP_A(2) | VIP_CNTRL_0_SWAP_B(3));
-		reg_write(encoder, REG_VIP_CNTRL_1,
-				VIP_CNTRL_1_SWAP_C(0) | VIP_CNTRL_1_SWAP_D(1));
-		reg_write(encoder, REG_VIP_CNTRL_2,
-				VIP_CNTRL_2_SWAP_E(4) | VIP_CNTRL_2_SWAP_F(5));
+		reg_write(encoder, REG_VIP_CNTRL_0, priv->vip_cntrl_0);
+		reg_write(encoder, REG_VIP_CNTRL_1, priv->vip_cntrl_1);
+		reg_write(encoder, REG_VIP_CNTRL_2, priv->vip_cntrl_2);
 		break;
 	case DRM_MODE_DPMS_OFF:
 		/* disable audio and video ports */
@@ -494,43 +747,78 @@
 			struct drm_display_mode *adjusted_mode)
 {
 	struct tda998x_priv *priv = to_tda998x_priv(encoder);
-	uint16_t hs_start, hs_end, line_start, line_end;
-	uint16_t vwin_start, vwin_end, de_start, de_end;
-	uint16_t ref_pix, ref_line, pix_start2;
+	uint16_t ref_pix, ref_line, n_pix, n_line;
+	uint16_t hs_pix_s, hs_pix_e;
+	uint16_t vs1_pix_s, vs1_pix_e, vs1_line_s, vs1_line_e;
+	uint16_t vs2_pix_s, vs2_pix_e, vs2_line_s, vs2_line_e;
+	uint16_t vwin1_line_s, vwin1_line_e;
+	uint16_t vwin2_line_s, vwin2_line_e;
+	uint16_t de_pix_s, de_pix_e;
 	uint8_t reg, div, rep;
 
-	hs_start   = mode->hsync_start - mode->hdisplay;
-	hs_end     = mode->hsync_end - mode->hdisplay;
-	line_start = 1;
-	line_end   = 1 + mode->vsync_end - mode->vsync_start;
-	vwin_start = mode->vtotal - mode->vsync_start;
-	vwin_end   = vwin_start + mode->vdisplay;
-	de_start   = mode->htotal - mode->hdisplay;
-	de_end     = mode->htotal;
-
-	pix_start2 = 0;
-	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
-		pix_start2 = (mode->htotal / 2) + hs_start;
-
-	/* TODO how is this value calculated?  It is 2 for all common
-	 * formats in the tables in out of tree nxp driver (assuming
-	 * I've properly deciphered their byzantine table system)
+	/*
+	 * Internally TDA998x is using ITU-R BT.656 style sync but
+	 * we get VESA style sync. TDA998x is using a reference pixel
+	 * relative to ITU to sync to the input frame and for output
+	 * sync generation. Currently, we are using reference detection
+	 * from HS/VS, i.e. REFPIX/REFLINE denote frame start sync point
+	 * which is position of rising VS with coincident rising HS.
+	 *
+	 * Now there is some issues to take care of:
+	 * - HDMI data islands require sync-before-active
+	 * - TDA998x register values must be > 0 to be enabled
+	 * - REFLINE needs an additional offset of +1
+	 * - REFPIX needs an addtional offset of +1 for UYUV and +3 for RGB
+	 *
+	 * So we add +1 to all horizontal and vertical register values,
+	 * plus an additional +3 for REFPIX as we are using RGB input only.
 	 */
-	ref_line = 2;
+	n_pix        = mode->htotal;
+	n_line       = mode->vtotal;
 
-	/* this might changes for other color formats from the CRTC: */
-	ref_pix = 3 + hs_start;
+	hs_pix_e     = mode->hsync_end - mode->hdisplay;
+	hs_pix_s     = mode->hsync_start - mode->hdisplay;
+	de_pix_e     = mode->htotal;
+	de_pix_s     = mode->htotal - mode->hdisplay;
+	ref_pix      = 3 + hs_pix_s;
+
+	/*
+	 * Attached LCD controllers may generate broken sync. Allow
+	 * those to adjust the position of the rising VS edge by adding
+	 * HSKEW to ref_pix.
+	 */
+	if (adjusted_mode->flags & DRM_MODE_FLAG_HSKEW)
+		ref_pix += adjusted_mode->hskew;
+
+	if ((mode->flags & DRM_MODE_FLAG_INTERLACE) == 0) {
+		ref_line     = 1 + mode->vsync_start - mode->vdisplay;
+		vwin1_line_s = mode->vtotal - mode->vdisplay - 1;
+		vwin1_line_e = vwin1_line_s + mode->vdisplay;
+		vs1_pix_s    = vs1_pix_e = hs_pix_s;
+		vs1_line_s   = mode->vsync_start - mode->vdisplay;
+		vs1_line_e   = vs1_line_s +
+			       mode->vsync_end - mode->vsync_start;
+		vwin2_line_s = vwin2_line_e = 0;
+		vs2_pix_s    = vs2_pix_e  = 0;
+		vs2_line_s   = vs2_line_e = 0;
+	} else {
+		ref_line     = 1 + (mode->vsync_start - mode->vdisplay)/2;
+		vwin1_line_s = (mode->vtotal - mode->vdisplay)/2;
+		vwin1_line_e = vwin1_line_s + mode->vdisplay/2;
+		vs1_pix_s    = vs1_pix_e = hs_pix_s;
+		vs1_line_s   = (mode->vsync_start - mode->vdisplay)/2;
+		vs1_line_e   = vs1_line_s +
+			       (mode->vsync_end - mode->vsync_start)/2;
+		vwin2_line_s = vwin1_line_s + mode->vtotal/2;
+		vwin2_line_e = vwin2_line_s + mode->vdisplay/2;
+		vs2_pix_s    = vs2_pix_e = hs_pix_s + mode->htotal/2;
+		vs2_line_s   = vs1_line_s + mode->vtotal/2 ;
+		vs2_line_e   = vs2_line_s +
+			       (mode->vsync_end - mode->vsync_start)/2;
+	}
 
 	div = 148500 / mode->clock;
 
-	DBG("clock=%d, div=%u", mode->clock, div);
-	DBG("hs_start=%u, hs_end=%u, line_start=%u, line_end=%u",
-			hs_start, hs_end, line_start, line_end);
-	DBG("vwin_start=%u, vwin_end=%u, de_start=%u, de_end=%u",
-			vwin_start, vwin_end, de_start, de_end);
-	DBG("ref_line=%u, ref_pix=%u, pix_start2=%u",
-			ref_line, ref_pix, pix_start2);
-
 	/* mute the audio FIFO: */
 	reg_set(encoder, REG_AIP_CNTRL_0, AIP_CNTRL_0_RST_FIFO);
 
@@ -561,9 +849,6 @@
 	reg_write(encoder, REG_PLL_SERIAL_2, PLL_SERIAL_2_SRL_NOSC(div) |
 			PLL_SERIAL_2_SRL_PR(rep));
 
-	reg_write16(encoder, REG_VS_PIX_STRT_2_MSB, pix_start2);
-	reg_write16(encoder, REG_VS_PIX_END_2_MSB, pix_start2);
-
 	/* set color matrix bypass flag: */
 	reg_set(encoder, REG_MAT_CONTRL, MAT_CONTRL_MAT_BP);
 
@@ -572,47 +857,75 @@
 
 	reg_clear(encoder, REG_TBG_CNTRL_0, TBG_CNTRL_0_SYNC_MTHD);
 
+	/*
+	 * Sync on rising HSYNC/VSYNC
+	 */
 	reg_write(encoder, REG_VIP_CNTRL_3, 0);
 	reg_set(encoder, REG_VIP_CNTRL_3, VIP_CNTRL_3_SYNC_HS);
+
+	/*
+	 * TDA19988 requires high-active sync at input stage,
+	 * so invert low-active sync provided by master encoder here
+	 */
+	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+		reg_set(encoder, REG_VIP_CNTRL_3, VIP_CNTRL_3_H_TGL);
 	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
 		reg_set(encoder, REG_VIP_CNTRL_3, VIP_CNTRL_3_V_TGL);
 
+	/*
+	 * Always generate sync polarity relative to input sync and
+	 * revert input stage toggled sync at output stage
+	 */
+	reg = TBG_CNTRL_1_TGL_EN;
 	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
-		reg_set(encoder, REG_VIP_CNTRL_3, VIP_CNTRL_3_H_TGL);
+		reg |= TBG_CNTRL_1_H_TGL;
+	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+		reg |= TBG_CNTRL_1_V_TGL;
+	reg_write(encoder, REG_TBG_CNTRL_1, reg);
 
 	reg_write(encoder, REG_VIDFORMAT, 0x00);
-	reg_write16(encoder, REG_NPIX_MSB, mode->hdisplay - 1);
-	reg_write16(encoder, REG_NLINE_MSB, mode->vdisplay - 1);
-	reg_write16(encoder, REG_VS_LINE_STRT_1_MSB, line_start);
-	reg_write16(encoder, REG_VS_LINE_END_1_MSB, line_end);
-	reg_write16(encoder, REG_VS_PIX_STRT_1_MSB, hs_start);
-	reg_write16(encoder, REG_VS_PIX_END_1_MSB, hs_start);
-	reg_write16(encoder, REG_HS_PIX_START_MSB, hs_start);
-	reg_write16(encoder, REG_HS_PIX_STOP_MSB, hs_end);
-	reg_write16(encoder, REG_VWIN_START_1_MSB, vwin_start);
-	reg_write16(encoder, REG_VWIN_END_1_MSB, vwin_end);
-	reg_write16(encoder, REG_DE_START_MSB, de_start);
-	reg_write16(encoder, REG_DE_STOP_MSB, de_end);
+	reg_write16(encoder, REG_REFPIX_MSB, ref_pix);
+	reg_write16(encoder, REG_REFLINE_MSB, ref_line);
+	reg_write16(encoder, REG_NPIX_MSB, n_pix);
+	reg_write16(encoder, REG_NLINE_MSB, n_line);
+	reg_write16(encoder, REG_VS_LINE_STRT_1_MSB, vs1_line_s);
+	reg_write16(encoder, REG_VS_PIX_STRT_1_MSB, vs1_pix_s);
+	reg_write16(encoder, REG_VS_LINE_END_1_MSB, vs1_line_e);
+	reg_write16(encoder, REG_VS_PIX_END_1_MSB, vs1_pix_e);
+	reg_write16(encoder, REG_VS_LINE_STRT_2_MSB, vs2_line_s);
+	reg_write16(encoder, REG_VS_PIX_STRT_2_MSB, vs2_pix_s);
+	reg_write16(encoder, REG_VS_LINE_END_2_MSB, vs2_line_e);
+	reg_write16(encoder, REG_VS_PIX_END_2_MSB, vs2_pix_e);
+	reg_write16(encoder, REG_HS_PIX_START_MSB, hs_pix_s);
+	reg_write16(encoder, REG_HS_PIX_STOP_MSB, hs_pix_e);
+	reg_write16(encoder, REG_VWIN_START_1_MSB, vwin1_line_s);
+	reg_write16(encoder, REG_VWIN_END_1_MSB, vwin1_line_e);
+	reg_write16(encoder, REG_VWIN_START_2_MSB, vwin2_line_s);
+	reg_write16(encoder, REG_VWIN_END_2_MSB, vwin2_line_e);
+	reg_write16(encoder, REG_DE_START_MSB, de_pix_s);
+	reg_write16(encoder, REG_DE_STOP_MSB, de_pix_e);
 
 	if (priv->rev == TDA19988) {
 		/* let incoming pixels fill the active space (if any) */
 		reg_write(encoder, REG_ENABLE_SPACE, 0x01);
 	}
 
-	reg_write16(encoder, REG_REFPIX_MSB, ref_pix);
-	reg_write16(encoder, REG_REFLINE_MSB, ref_line);
-
-	reg = TBG_CNTRL_1_VHX_EXT_DE |
-			TBG_CNTRL_1_VHX_EXT_HS |
-			TBG_CNTRL_1_VHX_EXT_VS |
-			TBG_CNTRL_1_DWIN_DIS | /* HDCP off */
-			TBG_CNTRL_1_VH_TGL_2;
-	if (mode->flags & (DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC))
-		reg |= TBG_CNTRL_1_VH_TGL_0;
-	reg_set(encoder, REG_TBG_CNTRL_1, reg);
-
 	/* must be last register set: */
 	reg_clear(encoder, REG_TBG_CNTRL_0, TBG_CNTRL_0_SYNC_ONCE);
+
+	/* Only setup the info frames if the sink is HDMI */
+	if (priv->is_hdmi_sink) {
+		/* We need to turn HDMI HDCP stuff on to get audio through */
+		reg_clear(encoder, REG_TBG_CNTRL_1, TBG_CNTRL_1_DWIN_DIS);
+		reg_write(encoder, REG_ENC_CNTRL, ENC_CNTRL_CTL_CODE(1));
+		reg_set(encoder, REG_TX33, TX33_HDMI);
+
+		tda998x_write_avi(encoder, adjusted_mode);
+
+		if (priv->params.audio_cfg)
+			tda998x_configure_audio(encoder, adjusted_mode,
+						&priv->params);
+	}
 }
 
 static enum drm_connector_status
@@ -673,6 +986,7 @@
 static uint8_t *
 do_get_edid(struct drm_encoder *encoder)
 {
+	struct tda998x_priv *priv = to_tda998x_priv(encoder);
 	int j = 0, valid_extensions = 0;
 	uint8_t *block, *new;
 	bool print_bad_edid = drm_debug & DRM_UT_KMS;
@@ -680,6 +994,9 @@
 	if ((block = kmalloc(EDID_LENGTH, GFP_KERNEL)) == NULL)
 		return NULL;
 
+	if (priv->rev == TDA19988)
+		reg_clear(encoder, REG_TX4, TX4_PD_RAM);
+
 	/* base block fetch */
 	if (read_edid_block(encoder, block, 0))
 		goto fail;
@@ -689,7 +1006,7 @@
 
 	/* if there's no extensions, we're done */
 	if (block[0x7e] == 0)
-		return block;
+		goto done;
 
 	new = krealloc(block, (block[0x7e] + 1) * EDID_LENGTH, GFP_KERNEL);
 	if (!new)
@@ -716,9 +1033,15 @@
 		block = new;
 	}
 
+done:
+	if (priv->rev == TDA19988)
+		reg_set(encoder, REG_TX4, TX4_PD_RAM);
+
 	return block;
 
 fail:
+	if (priv->rev == TDA19988)
+		reg_set(encoder, REG_TX4, TX4_PD_RAM);
 	dev_warn(encoder->dev->dev, "failed to read EDID\n");
 	kfree(block);
 	return NULL;
@@ -728,12 +1051,14 @@
 tda998x_encoder_get_modes(struct drm_encoder *encoder,
 			 struct drm_connector *connector)
 {
+	struct tda998x_priv *priv = to_tda998x_priv(encoder);
 	struct edid *edid = (struct edid *)do_get_edid(encoder);
 	int n = 0;
 
 	if (edid) {
 		drm_mode_connector_update_edid_property(connector, edid);
 		n = drm_add_edid_modes(connector, edid);
+		priv->is_hdmi_sink = drm_detect_hdmi_monitor(edid);
 		kfree(edid);
 	}
 
@@ -807,6 +1132,10 @@
 	if (!priv)
 		return -ENOMEM;
 
+	priv->vip_cntrl_0 = VIP_CNTRL_0_SWAP_A(2) | VIP_CNTRL_0_SWAP_B(3);
+	priv->vip_cntrl_1 = VIP_CNTRL_1_SWAP_C(0) | VIP_CNTRL_1_SWAP_D(1);
+	priv->vip_cntrl_2 = VIP_CNTRL_2_SWAP_E(4) | VIP_CNTRL_2_SWAP_F(5);
+
 	priv->current_page = 0;
 	priv->cec = i2c_new_dummy(client->adapter, 0x34);
 	priv->dpms = DRM_MODE_DPMS_OFF;
diff --git a/drivers/gpu/drm/i810/i810_dma.c b/drivers/gpu/drm/i810/i810_dma.c
index ada49ed..ab1892eb 100644
--- a/drivers/gpu/drm/i810/i810_dma.c
+++ b/drivers/gpu/drm/i810/i810_dma.c
@@ -113,7 +113,6 @@
 	.release = drm_release,
 	.unlocked_ioctl = drm_ioctl,
 	.mmap = i810_mmap_buffers,
-	.fasync = drm_fasync,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = drm_compat_ioctl,
 #endif
@@ -1241,7 +1240,7 @@
 	return 0;
 }
 
-struct drm_ioctl_desc i810_ioctls[] = {
+const struct drm_ioctl_desc i810_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I810_INIT, i810_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I810_VERTEX, i810_dma_vertex, DRM_AUTH|DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I810_CLEAR, i810_clear_bufs, DRM_AUTH|DRM_UNLOCKED),
diff --git a/drivers/gpu/drm/i810/i810_drv.c b/drivers/gpu/drm/i810/i810_drv.c
index 2e91fc3..d8180d2 100644
--- a/drivers/gpu/drm/i810/i810_drv.c
+++ b/drivers/gpu/drm/i810/i810_drv.c
@@ -49,7 +49,6 @@
 	.unlocked_ioctl = drm_ioctl,
 	.mmap = drm_mmap,
 	.poll = drm_poll,
-	.fasync = drm_fasync,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = drm_compat_ioctl,
 #endif
@@ -58,7 +57,7 @@
 
 static struct drm_driver driver = {
 	.driver_features =
-	    DRIVER_USE_AGP | DRIVER_REQUIRE_AGP | DRIVER_USE_MTRR |
+	    DRIVER_USE_AGP | DRIVER_REQUIRE_AGP |
 	    DRIVER_HAVE_DMA,
 	.dev_priv_size = sizeof(drm_i810_buf_priv_t),
 	.load = i810_driver_load,
diff --git a/drivers/gpu/drm/i810/i810_drv.h b/drivers/gpu/drm/i810/i810_drv.h
index 6e0acad..d4d16ed 100644
--- a/drivers/gpu/drm/i810/i810_drv.h
+++ b/drivers/gpu/drm/i810/i810_drv.h
@@ -125,7 +125,7 @@
 extern int i810_driver_device_is_agp(struct drm_device *dev);
 
 extern long i810_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
-extern struct drm_ioctl_desc i810_ioctls[];
+extern const struct drm_ioctl_desc i810_ioctls[];
 extern int i810_max_ioctl;
 
 #define I810_BASE(reg)		((unsigned long) \
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 40034ec..b8449a8 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -5,6 +5,7 @@
 ccflags-y := -Iinclude/drm
 i915-y := i915_drv.o i915_dma.o i915_irq.o \
 	  i915_debugfs.o \
+	  i915_gpu_error.o \
           i915_suspend.o \
 	  i915_gem.o \
 	  i915_gem_context.o \
@@ -37,6 +38,7 @@
 	  intel_sprite.o \
 	  intel_opregion.o \
 	  intel_sideband.o \
+	  intel_uncore.o \
 	  dvo_ch7xxx.o \
 	  dvo_ch7017.o \
 	  dvo_ivch.o \
diff --git a/drivers/gpu/drm/i915/dvo_ch7xxx.c b/drivers/gpu/drm/i915/dvo_ch7xxx.c
index 757e0fa..af42e94 100644
--- a/drivers/gpu/drm/i915/dvo_ch7xxx.c
+++ b/drivers/gpu/drm/i915/dvo_ch7xxx.c
@@ -307,7 +307,7 @@
 		idf |= CH7xxx_IDF_HSP;
 
 	if (mode->flags & DRM_MODE_FLAG_PVSYNC)
-		idf |= CH7xxx_IDF_HSP;
+		idf |= CH7xxx_IDF_VSP;
 
 	ch7xxx_writeb(dvo, CH7xxx_IDF, idf);
 }
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 47d6c74..55ab924 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -30,7 +30,8 @@
 #include <linux/debugfs.h>
 #include <linux/slab.h>
 #include <linux/export.h>
-#include <generated/utsrelease.h>
+#include <linux/list_sort.h>
+#include <asm/msr-index.h>
 #include <drm/drmP.h>
 #include "intel_drv.h"
 #include "intel_ringbuffer.h"
@@ -90,41 +91,45 @@
 	}
 }
 
-static const char *cache_level_str(int type)
+static inline const char *get_global_flag(struct drm_i915_gem_object *obj)
 {
-	switch (type) {
-	case I915_CACHE_NONE: return " uncached";
-	case I915_CACHE_LLC: return " snooped (LLC)";
-	case I915_CACHE_LLC_MLC: return " snooped (LLC+MLC)";
-	default: return "";
-	}
+	return obj->has_global_gtt_mapping ? "g" : " ";
 }
 
 static void
 describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 {
-	seq_printf(m, "%pK: %s%s %8zdKiB %02x %02x %d %d %d%s%s%s",
+	struct i915_vma *vma;
+	seq_printf(m, "%pK: %s%s%s %8zdKiB %02x %02x %u %u %u%s%s%s",
 		   &obj->base,
 		   get_pin_flag(obj),
 		   get_tiling_flag(obj),
+		   get_global_flag(obj),
 		   obj->base.size / 1024,
 		   obj->base.read_domains,
 		   obj->base.write_domain,
 		   obj->last_read_seqno,
 		   obj->last_write_seqno,
 		   obj->last_fenced_seqno,
-		   cache_level_str(obj->cache_level),
+		   i915_cache_level_str(obj->cache_level),
 		   obj->dirty ? " dirty" : "",
 		   obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
 	if (obj->base.name)
 		seq_printf(m, " (name: %d)", obj->base.name);
 	if (obj->pin_count)
 		seq_printf(m, " (pinned x %d)", obj->pin_count);
+	if (obj->pin_display)
+		seq_printf(m, " (display)");
 	if (obj->fence_reg != I915_FENCE_REG_NONE)
 		seq_printf(m, " (fence: %d)", obj->fence_reg);
-	if (obj->gtt_space != NULL)
-		seq_printf(m, " (gtt offset: %08x, size: %08x)",
-			   obj->gtt_offset, (unsigned int)obj->gtt_space->size);
+	list_for_each_entry(vma, &obj->vma_list, vma_link) {
+		if (!i915_is_ggtt(vma->vm))
+			seq_puts(m, " (pp");
+		else
+			seq_puts(m, " (g");
+		seq_printf(m, "gtt offset: %08lx, size: %08lx)",
+			   vma->node.start, vma->node.size);
+	}
 	if (obj->stolen)
 		seq_printf(m, " (stolen: %08lx)", obj->stolen->start);
 	if (obj->pin_mappable || obj->fault_mappable) {
@@ -146,8 +151,9 @@
 	uintptr_t list = (uintptr_t) node->info_ent->data;
 	struct list_head *head;
 	struct drm_device *dev = node->minor->dev;
-	drm_i915_private_t *dev_priv = dev->dev_private;
-	struct drm_i915_gem_object *obj;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_address_space *vm = &dev_priv->gtt.base;
+	struct i915_vma *vma;
 	size_t total_obj_size, total_gtt_size;
 	int count, ret;
 
@@ -155,14 +161,15 @@
 	if (ret)
 		return ret;
 
+	/* FIXME: the user of this interface might want more than just GGTT */
 	switch (list) {
 	case ACTIVE_LIST:
-		seq_printf(m, "Active:\n");
-		head = &dev_priv->mm.active_list;
+		seq_puts(m, "Active:\n");
+		head = &vm->active_list;
 		break;
 	case INACTIVE_LIST:
-		seq_printf(m, "Inactive:\n");
-		head = &dev_priv->mm.inactive_list;
+		seq_puts(m, "Inactive:\n");
+		head = &vm->inactive_list;
 		break;
 	default:
 		mutex_unlock(&dev->struct_mutex);
@@ -170,12 +177,12 @@
 	}
 
 	total_obj_size = total_gtt_size = count = 0;
-	list_for_each_entry(obj, head, mm_list) {
+	list_for_each_entry(vma, head, mm_list) {
 		seq_printf(m, "   ");
-		describe_obj(m, obj);
+		describe_obj(m, vma->obj);
 		seq_printf(m, "\n");
-		total_obj_size += obj->base.size;
-		total_gtt_size += obj->gtt_space->size;
+		total_obj_size += vma->obj->base.size;
+		total_gtt_size += vma->node.size;
 		count++;
 	}
 	mutex_unlock(&dev->struct_mutex);
@@ -185,12 +192,73 @@
 	return 0;
 }
 
+static int obj_rank_by_stolen(void *priv,
+			      struct list_head *A, struct list_head *B)
+{
+	struct drm_i915_gem_object *a =
+		container_of(A, struct drm_i915_gem_object, obj_exec_link);
+	struct drm_i915_gem_object *b =
+		container_of(B, struct drm_i915_gem_object, obj_exec_link);
+
+	return a->stolen->start - b->stolen->start;
+}
+
+static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_object *obj;
+	size_t total_obj_size, total_gtt_size;
+	LIST_HEAD(stolen);
+	int count, ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	total_obj_size = total_gtt_size = count = 0;
+	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
+		if (obj->stolen == NULL)
+			continue;
+
+		list_add(&obj->obj_exec_link, &stolen);
+
+		total_obj_size += obj->base.size;
+		total_gtt_size += i915_gem_obj_ggtt_size(obj);
+		count++;
+	}
+	list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) {
+		if (obj->stolen == NULL)
+			continue;
+
+		list_add(&obj->obj_exec_link, &stolen);
+
+		total_obj_size += obj->base.size;
+		count++;
+	}
+	list_sort(NULL, &stolen, obj_rank_by_stolen);
+	seq_puts(m, "Stolen:\n");
+	while (!list_empty(&stolen)) {
+		obj = list_first_entry(&stolen, typeof(*obj), obj_exec_link);
+		seq_puts(m, "   ");
+		describe_obj(m, obj);
+		seq_putc(m, '\n');
+		list_del_init(&obj->obj_exec_link);
+	}
+	mutex_unlock(&dev->struct_mutex);
+
+	seq_printf(m, "Total %d objects, %zu bytes, %zu GTT size\n",
+		   count, total_obj_size, total_gtt_size);
+	return 0;
+}
+
 #define count_objects(list, member) do { \
 	list_for_each_entry(obj, list, member) { \
-		size += obj->gtt_space->size; \
+		size += i915_gem_obj_ggtt_size(obj); \
 		++count; \
 		if (obj->map_and_fenceable) { \
-			mappable_size += obj->gtt_space->size; \
+			mappable_size += i915_gem_obj_ggtt_size(obj); \
 			++mappable_count; \
 		} \
 	} \
@@ -209,7 +277,7 @@
 	stats->count++;
 	stats->total += obj->base.size;
 
-	if (obj->gtt_space) {
+	if (i915_gem_obj_ggtt_bound(obj)) {
 		if (!list_empty(&obj->ring_list))
 			stats->active += obj->base.size;
 		else
@@ -222,6 +290,17 @@
 	return 0;
 }
 
+#define count_vmas(list, member) do { \
+	list_for_each_entry(vma, list, member) { \
+		size += i915_gem_obj_ggtt_size(vma->obj); \
+		++count; \
+		if (vma->obj->map_and_fenceable) { \
+			mappable_size += i915_gem_obj_ggtt_size(vma->obj); \
+			++mappable_count; \
+		} \
+	} \
+} while (0)
+
 static int i915_gem_object_info(struct seq_file *m, void* data)
 {
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
@@ -230,7 +309,9 @@
 	u32 count, mappable_count, purgeable_count;
 	size_t size, mappable_size, purgeable_size;
 	struct drm_i915_gem_object *obj;
+	struct i915_address_space *vm = &dev_priv->gtt.base;
 	struct drm_file *file;
+	struct i915_vma *vma;
 	int ret;
 
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
@@ -247,12 +328,12 @@
 		   count, mappable_count, size, mappable_size);
 
 	size = count = mappable_size = mappable_count = 0;
-	count_objects(&dev_priv->mm.active_list, mm_list);
+	count_vmas(&vm->active_list, mm_list);
 	seq_printf(m, "  %u [%u] active objects, %zu [%zu] bytes\n",
 		   count, mappable_count, size, mappable_size);
 
 	size = count = mappable_size = mappable_count = 0;
-	count_objects(&dev_priv->mm.inactive_list, mm_list);
+	count_vmas(&vm->inactive_list, mm_list);
 	seq_printf(m, "  %u [%u] inactive objects, %zu [%zu] bytes\n",
 		   count, mappable_count, size, mappable_size);
 
@@ -267,11 +348,11 @@
 	size = count = mappable_size = mappable_count = 0;
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
 		if (obj->fault_mappable) {
-			size += obj->gtt_space->size;
+			size += i915_gem_obj_ggtt_size(obj);
 			++count;
 		}
 		if (obj->pin_mappable) {
-			mappable_size += obj->gtt_space->size;
+			mappable_size += i915_gem_obj_ggtt_size(obj);
 			++mappable_count;
 		}
 		if (obj->madv == I915_MADV_DONTNEED) {
@@ -287,10 +368,10 @@
 		   count, size);
 
 	seq_printf(m, "%zu [%lu] gtt total\n",
-		   dev_priv->gtt.total,
-		   dev_priv->gtt.mappable_end - dev_priv->gtt.start);
+		   dev_priv->gtt.base.total,
+		   dev_priv->gtt.mappable_end - dev_priv->gtt.base.start);
 
-	seq_printf(m, "\n");
+	seq_putc(m, '\n');
 	list_for_each_entry_reverse(file, &dev->filelist, lhead) {
 		struct file_stats stats;
 
@@ -310,7 +391,7 @@
 	return 0;
 }
 
-static int i915_gem_gtt_info(struct seq_file *m, void* data)
+static int i915_gem_gtt_info(struct seq_file *m, void *data)
 {
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
 	struct drm_device *dev = node->minor->dev;
@@ -329,11 +410,11 @@
 		if (list == PINNED_LIST && obj->pin_count == 0)
 			continue;
 
-		seq_printf(m, "   ");
+		seq_puts(m, "   ");
 		describe_obj(m, obj);
-		seq_printf(m, "\n");
+		seq_putc(m, '\n');
 		total_obj_size += obj->base.size;
-		total_gtt_size += obj->gtt_space->size;
+		total_gtt_size += i915_gem_obj_ggtt_size(obj);
 		count++;
 	}
 
@@ -371,20 +452,22 @@
 					   pipe, plane);
 			}
 			if (work->enable_stall_check)
-				seq_printf(m, "Stall check enabled, ");
+				seq_puts(m, "Stall check enabled, ");
 			else
-				seq_printf(m, "Stall check waiting for page flip ioctl, ");
+				seq_puts(m, "Stall check waiting for page flip ioctl, ");
 			seq_printf(m, "%d prepares\n", atomic_read(&work->pending));
 
 			if (work->old_fb_obj) {
 				struct drm_i915_gem_object *obj = work->old_fb_obj;
 				if (obj)
-					seq_printf(m, "Old framebuffer gtt_offset 0x%08x\n", obj->gtt_offset);
+					seq_printf(m, "Old framebuffer gtt_offset 0x%08lx\n",
+						   i915_gem_obj_ggtt_offset(obj));
 			}
 			if (work->pending_flip_obj) {
 				struct drm_i915_gem_object *obj = work->pending_flip_obj;
 				if (obj)
-					seq_printf(m, "New framebuffer gtt_offset 0x%08x\n", obj->gtt_offset);
+					seq_printf(m, "New framebuffer gtt_offset 0x%08lx\n",
+						   i915_gem_obj_ggtt_offset(obj));
 			}
 		}
 		spin_unlock_irqrestore(&dev->event_lock, flags);
@@ -424,7 +507,7 @@
 	mutex_unlock(&dev->struct_mutex);
 
 	if (count == 0)
-		seq_printf(m, "No requests\n");
+		seq_puts(m, "No requests\n");
 
 	return 0;
 }
@@ -574,10 +657,10 @@
 		seq_printf(m, "Fence %d, pin count = %d, object = ",
 			   i, dev_priv->fence_regs[i].pin_count);
 		if (obj == NULL)
-			seq_printf(m, "unused");
+			seq_puts(m, "unused");
 		else
 			describe_obj(m, obj);
-		seq_printf(m, "\n");
+		seq_putc(m, '\n');
 	}
 
 	mutex_unlock(&dev->struct_mutex);
@@ -606,361 +689,6 @@
 	return 0;
 }
 
-static const char *ring_str(int ring)
-{
-	switch (ring) {
-	case RCS: return "render";
-	case VCS: return "bsd";
-	case BCS: return "blt";
-	case VECS: return "vebox";
-	default: return "";
-	}
-}
-
-static const char *pin_flag(int pinned)
-{
-	if (pinned > 0)
-		return " P";
-	else if (pinned < 0)
-		return " p";
-	else
-		return "";
-}
-
-static const char *tiling_flag(int tiling)
-{
-	switch (tiling) {
-	default:
-	case I915_TILING_NONE: return "";
-	case I915_TILING_X: return " X";
-	case I915_TILING_Y: return " Y";
-	}
-}
-
-static const char *dirty_flag(int dirty)
-{
-	return dirty ? " dirty" : "";
-}
-
-static const char *purgeable_flag(int purgeable)
-{
-	return purgeable ? " purgeable" : "";
-}
-
-static bool __i915_error_ok(struct drm_i915_error_state_buf *e)
-{
-
-	if (!e->err && WARN(e->bytes > (e->size - 1), "overflow")) {
-		e->err = -ENOSPC;
-		return false;
-	}
-
-	if (e->bytes == e->size - 1 || e->err)
-		return false;
-
-	return true;
-}
-
-static bool __i915_error_seek(struct drm_i915_error_state_buf *e,
-			      unsigned len)
-{
-	if (e->pos + len <= e->start) {
-		e->pos += len;
-		return false;
-	}
-
-	/* First vsnprintf needs to fit in its entirety for memmove */
-	if (len >= e->size) {
-		e->err = -EIO;
-		return false;
-	}
-
-	return true;
-}
-
-static void __i915_error_advance(struct drm_i915_error_state_buf *e,
-				 unsigned len)
-{
-	/* If this is first printf in this window, adjust it so that
-	 * start position matches start of the buffer
-	 */
-
-	if (e->pos < e->start) {
-		const size_t off = e->start - e->pos;
-
-		/* Should not happen but be paranoid */
-		if (off > len || e->bytes) {
-			e->err = -EIO;
-			return;
-		}
-
-		memmove(e->buf, e->buf + off, len - off);
-		e->bytes = len - off;
-		e->pos = e->start;
-		return;
-	}
-
-	e->bytes += len;
-	e->pos += len;
-}
-
-static void i915_error_vprintf(struct drm_i915_error_state_buf *e,
-			       const char *f, va_list args)
-{
-	unsigned len;
-
-	if (!__i915_error_ok(e))
-		return;
-
-	/* Seek the first printf which is hits start position */
-	if (e->pos < e->start) {
-		len = vsnprintf(NULL, 0, f, args);
-		if (!__i915_error_seek(e, len))
-			return;
-	}
-
-	len = vsnprintf(e->buf + e->bytes, e->size - e->bytes, f, args);
-	if (len >= e->size - e->bytes)
-		len = e->size - e->bytes - 1;
-
-	__i915_error_advance(e, len);
-}
-
-static void i915_error_puts(struct drm_i915_error_state_buf *e,
-			    const char *str)
-{
-	unsigned len;
-
-	if (!__i915_error_ok(e))
-		return;
-
-	len = strlen(str);
-
-	/* Seek the first printf which is hits start position */
-	if (e->pos < e->start) {
-		if (!__i915_error_seek(e, len))
-			return;
-	}
-
-	if (len >= e->size - e->bytes)
-		len = e->size - e->bytes - 1;
-	memcpy(e->buf + e->bytes, str, len);
-
-	__i915_error_advance(e, len);
-}
-
-void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
-{
-	va_list args;
-
-	va_start(args, f);
-	i915_error_vprintf(e, f, args);
-	va_end(args);
-}
-
-#define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__)
-#define err_puts(e, s) i915_error_puts(e, s)
-
-static void print_error_buffers(struct drm_i915_error_state_buf *m,
-				const char *name,
-				struct drm_i915_error_buffer *err,
-				int count)
-{
-	err_printf(m, "%s [%d]:\n", name, count);
-
-	while (count--) {
-		err_printf(m, "  %08x %8u %02x %02x %x %x",
-			   err->gtt_offset,
-			   err->size,
-			   err->read_domains,
-			   err->write_domain,
-			   err->rseqno, err->wseqno);
-		err_puts(m, pin_flag(err->pinned));
-		err_puts(m, tiling_flag(err->tiling));
-		err_puts(m, dirty_flag(err->dirty));
-		err_puts(m, purgeable_flag(err->purgeable));
-		err_puts(m, err->ring != -1 ? " " : "");
-		err_puts(m, ring_str(err->ring));
-		err_puts(m, cache_level_str(err->cache_level));
-
-		if (err->name)
-			err_printf(m, " (name: %d)", err->name);
-		if (err->fence_reg != I915_FENCE_REG_NONE)
-			err_printf(m, " (fence: %d)", err->fence_reg);
-
-		err_puts(m, "\n");
-		err++;
-	}
-}
-
-static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
-				  struct drm_device *dev,
-				  struct drm_i915_error_state *error,
-				  unsigned ring)
-{
-	BUG_ON(ring >= I915_NUM_RINGS); /* shut up confused gcc */
-	err_printf(m, "%s command stream:\n", ring_str(ring));
-	err_printf(m, "  HEAD: 0x%08x\n", error->head[ring]);
-	err_printf(m, "  TAIL: 0x%08x\n", error->tail[ring]);
-	err_printf(m, "  CTL: 0x%08x\n", error->ctl[ring]);
-	err_printf(m, "  ACTHD: 0x%08x\n", error->acthd[ring]);
-	err_printf(m, "  IPEIR: 0x%08x\n", error->ipeir[ring]);
-	err_printf(m, "  IPEHR: 0x%08x\n", error->ipehr[ring]);
-	err_printf(m, "  INSTDONE: 0x%08x\n", error->instdone[ring]);
-	if (ring == RCS && INTEL_INFO(dev)->gen >= 4)
-		err_printf(m, "  BBADDR: 0x%08llx\n", error->bbaddr);
-
-	if (INTEL_INFO(dev)->gen >= 4)
-		err_printf(m, "  INSTPS: 0x%08x\n", error->instps[ring]);
-	err_printf(m, "  INSTPM: 0x%08x\n", error->instpm[ring]);
-	err_printf(m, "  FADDR: 0x%08x\n", error->faddr[ring]);
-	if (INTEL_INFO(dev)->gen >= 6) {
-		err_printf(m, "  RC PSMI: 0x%08x\n", error->rc_psmi[ring]);
-		err_printf(m, "  FAULT_REG: 0x%08x\n", error->fault_reg[ring]);
-		err_printf(m, "  SYNC_0: 0x%08x [last synced 0x%08x]\n",
-			   error->semaphore_mboxes[ring][0],
-			   error->semaphore_seqno[ring][0]);
-		err_printf(m, "  SYNC_1: 0x%08x [last synced 0x%08x]\n",
-			   error->semaphore_mboxes[ring][1],
-			   error->semaphore_seqno[ring][1]);
-	}
-	err_printf(m, "  seqno: 0x%08x\n", error->seqno[ring]);
-	err_printf(m, "  waiting: %s\n", yesno(error->waiting[ring]));
-	err_printf(m, "  ring->head: 0x%08x\n", error->cpu_ring_head[ring]);
-	err_printf(m, "  ring->tail: 0x%08x\n", error->cpu_ring_tail[ring]);
-}
-
-struct i915_error_state_file_priv {
-	struct drm_device *dev;
-	struct drm_i915_error_state *error;
-};
-
-
-static int i915_error_state(struct i915_error_state_file_priv *error_priv,
-			    struct drm_i915_error_state_buf *m)
-
-{
-	struct drm_device *dev = error_priv->dev;
-	drm_i915_private_t *dev_priv = dev->dev_private;
-	struct drm_i915_error_state *error = error_priv->error;
-	struct intel_ring_buffer *ring;
-	int i, j, page, offset, elt;
-
-	if (!error) {
-		err_printf(m, "no error state collected\n");
-		return 0;
-	}
-
-	err_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec,
-		   error->time.tv_usec);
-	err_printf(m, "Kernel: " UTS_RELEASE "\n");
-	err_printf(m, "PCI ID: 0x%04x\n", dev->pci_device);
-	err_printf(m, "EIR: 0x%08x\n", error->eir);
-	err_printf(m, "IER: 0x%08x\n", error->ier);
-	err_printf(m, "PGTBL_ER: 0x%08x\n", error->pgtbl_er);
-	err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake);
-	err_printf(m, "DERRMR: 0x%08x\n", error->derrmr);
-	err_printf(m, "CCID: 0x%08x\n", error->ccid);
-
-	for (i = 0; i < dev_priv->num_fence_regs; i++)
-		err_printf(m, "  fence[%d] = %08llx\n", i, error->fence[i]);
-
-	for (i = 0; i < ARRAY_SIZE(error->extra_instdone); i++)
-		err_printf(m, "  INSTDONE_%d: 0x%08x\n", i,
-			   error->extra_instdone[i]);
-
-	if (INTEL_INFO(dev)->gen >= 6) {
-		err_printf(m, "ERROR: 0x%08x\n", error->error);
-		err_printf(m, "DONE_REG: 0x%08x\n", error->done_reg);
-	}
-
-	if (INTEL_INFO(dev)->gen == 7)
-		err_printf(m, "ERR_INT: 0x%08x\n", error->err_int);
-
-	for_each_ring(ring, dev_priv, i)
-		i915_ring_error_state(m, dev, error, i);
-
-	if (error->active_bo)
-		print_error_buffers(m, "Active",
-				    error->active_bo,
-				    error->active_bo_count);
-
-	if (error->pinned_bo)
-		print_error_buffers(m, "Pinned",
-				    error->pinned_bo,
-				    error->pinned_bo_count);
-
-	for (i = 0; i < ARRAY_SIZE(error->ring); i++) {
-		struct drm_i915_error_object *obj;
-
-		if ((obj = error->ring[i].batchbuffer)) {
-			err_printf(m, "%s --- gtt_offset = 0x%08x\n",
-				   dev_priv->ring[i].name,
-				   obj->gtt_offset);
-			offset = 0;
-			for (page = 0; page < obj->page_count; page++) {
-				for (elt = 0; elt < PAGE_SIZE/4; elt++) {
-					err_printf(m, "%08x :  %08x\n", offset,
-						   obj->pages[page][elt]);
-					offset += 4;
-				}
-			}
-		}
-
-		if (error->ring[i].num_requests) {
-			err_printf(m, "%s --- %d requests\n",
-				   dev_priv->ring[i].name,
-				   error->ring[i].num_requests);
-			for (j = 0; j < error->ring[i].num_requests; j++) {
-				err_printf(m, "  seqno 0x%08x, emitted %ld, tail 0x%08x\n",
-					   error->ring[i].requests[j].seqno,
-					   error->ring[i].requests[j].jiffies,
-					   error->ring[i].requests[j].tail);
-			}
-		}
-
-		if ((obj = error->ring[i].ringbuffer)) {
-			err_printf(m, "%s --- ringbuffer = 0x%08x\n",
-				   dev_priv->ring[i].name,
-				   obj->gtt_offset);
-			offset = 0;
-			for (page = 0; page < obj->page_count; page++) {
-				for (elt = 0; elt < PAGE_SIZE/4; elt++) {
-					err_printf(m, "%08x :  %08x\n",
-						   offset,
-						   obj->pages[page][elt]);
-					offset += 4;
-				}
-			}
-		}
-
-		obj = error->ring[i].ctx;
-		if (obj) {
-			err_printf(m, "%s --- HW Context = 0x%08x\n",
-				   dev_priv->ring[i].name,
-				   obj->gtt_offset);
-			offset = 0;
-			for (elt = 0; elt < PAGE_SIZE/16; elt += 4) {
-				err_printf(m, "[%04x] %08x %08x %08x %08x\n",
-					   offset,
-					   obj->pages[0][elt],
-					   obj->pages[0][elt+1],
-					   obj->pages[0][elt+2],
-					   obj->pages[0][elt+3]);
-					offset += 16;
-			}
-		}
-	}
-
-	if (error->overlay)
-		intel_overlay_print_error_state(m, error->overlay);
-
-	if (error->display)
-		intel_display_print_error_state(m, dev, error->display);
-
-	return 0;
-}
-
 static ssize_t
 i915_error_state_write(struct file *filp,
 		       const char __user *ubuf,
@@ -986,9 +714,7 @@
 static int i915_error_state_open(struct inode *inode, struct file *file)
 {
 	struct drm_device *dev = inode->i_private;
-	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct i915_error_state_file_priv *error_priv;
-	unsigned long flags;
 
 	error_priv = kzalloc(sizeof(*error_priv), GFP_KERNEL);
 	if (!error_priv)
@@ -996,11 +722,7 @@
 
 	error_priv->dev = dev;
 
-	spin_lock_irqsave(&dev_priv->gpu_error.lock, flags);
-	error_priv->error = dev_priv->gpu_error.first_error;
-	if (error_priv->error)
-		kref_get(&error_priv->error->ref);
-	spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags);
+	i915_error_state_get(dev, error_priv);
 
 	file->private_data = error_priv;
 
@@ -1011,8 +733,7 @@
 {
 	struct i915_error_state_file_priv *error_priv = file->private_data;
 
-	if (error_priv->error)
-		kref_put(&error_priv->error->ref, i915_error_state_free);
+	i915_error_state_put(error_priv);
 	kfree(error_priv);
 
 	return 0;
@@ -1025,41 +746,16 @@
 	struct drm_i915_error_state_buf error_str;
 	loff_t tmp_pos = 0;
 	ssize_t ret_count = 0;
-	int ret = 0;
+	int ret;
 
-	memset(&error_str, 0, sizeof(error_str));
+	ret = i915_error_state_buf_init(&error_str, count, *pos);
+	if (ret)
+		return ret;
 
-	/* We need to have enough room to store any i915_error_state printf
-	 * so that we can move it to start position.
-	 */
-	error_str.size = count + 1 > PAGE_SIZE ? count + 1 : PAGE_SIZE;
-	error_str.buf = kmalloc(error_str.size,
-				GFP_TEMPORARY | __GFP_NORETRY | __GFP_NOWARN);
-
-	if (error_str.buf == NULL) {
-		error_str.size = PAGE_SIZE;
-		error_str.buf = kmalloc(error_str.size, GFP_TEMPORARY);
-	}
-
-	if (error_str.buf == NULL) {
-		error_str.size = 128;
-		error_str.buf = kmalloc(error_str.size, GFP_TEMPORARY);
-	}
-
-	if (error_str.buf == NULL)
-		return -ENOMEM;
-
-	error_str.start = *pos;
-
-	ret = i915_error_state(error_priv, &error_str);
+	ret = i915_error_state_to_str(&error_str, error_priv);
 	if (ret)
 		goto out;
 
-	if (error_str.bytes == 0 && error_str.err) {
-		ret = error_str.err;
-		goto out;
-	}
-
 	ret_count = simple_read_from_buffer(userbuf, count, &tmp_pos,
 					    error_str.buf,
 					    error_str.bytes);
@@ -1069,7 +765,7 @@
 	else
 		*pos = error_str.start + ret_count;
 out:
-	kfree(error_str.buf);
+	i915_error_state_buf_release(&error_str);
 	return ret ?: ret_count;
 }
 
@@ -1246,7 +942,7 @@
 					(freq_sts >> 8) & 0xff));
 		mutex_unlock(&dev_priv->rps.hw_lock);
 	} else {
-		seq_printf(m, "no P-state info available\n");
+		seq_puts(m, "no P-state info available\n");
 	}
 
 	return 0;
@@ -1341,28 +1037,28 @@
 	seq_printf(m, "RS2 VID: %d\n", ((crstandvid >> 8) & 0x3f));
 	seq_printf(m, "Render standby enabled: %s\n",
 		   (rstdbyctl & RCX_SW_EXIT) ? "no" : "yes");
-	seq_printf(m, "Current RS state: ");
+	seq_puts(m, "Current RS state: ");
 	switch (rstdbyctl & RSX_STATUS_MASK) {
 	case RSX_STATUS_ON:
-		seq_printf(m, "on\n");
+		seq_puts(m, "on\n");
 		break;
 	case RSX_STATUS_RC1:
-		seq_printf(m, "RC1\n");
+		seq_puts(m, "RC1\n");
 		break;
 	case RSX_STATUS_RC1E:
-		seq_printf(m, "RC1E\n");
+		seq_puts(m, "RC1E\n");
 		break;
 	case RSX_STATUS_RS1:
-		seq_printf(m, "RS1\n");
+		seq_puts(m, "RS1\n");
 		break;
 	case RSX_STATUS_RS2:
-		seq_printf(m, "RS2 (RC6)\n");
+		seq_puts(m, "RS2 (RC6)\n");
 		break;
 	case RSX_STATUS_RS3:
-		seq_printf(m, "RC3 (RC6+)\n");
+		seq_puts(m, "RC3 (RC6+)\n");
 		break;
 	default:
-		seq_printf(m, "unknown\n");
+		seq_puts(m, "unknown\n");
 		break;
 	}
 
@@ -1377,20 +1073,19 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 rpmodectl1, gt_core_status, rcctl1, rc6vids = 0;
 	unsigned forcewake_count;
-	int count=0, ret;
-
+	int count = 0, ret;
 
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
 	if (ret)
 		return ret;
 
-	spin_lock_irq(&dev_priv->gt_lock);
-	forcewake_count = dev_priv->forcewake_count;
-	spin_unlock_irq(&dev_priv->gt_lock);
+	spin_lock_irq(&dev_priv->uncore.lock);
+	forcewake_count = dev_priv->uncore.forcewake_count;
+	spin_unlock_irq(&dev_priv->uncore.lock);
 
 	if (forcewake_count) {
-		seq_printf(m, "RC information inaccurate because somebody "
-			      "holds a forcewake reference \n");
+		seq_puts(m, "RC information inaccurate because somebody "
+			    "holds a forcewake reference \n");
 	} else {
 		/* NB: we cannot use forcewake, else we read the wrong values */
 		while (count++ < 50 && (I915_READ_NOTRACE(FORCEWAKE_ACK) & 1))
@@ -1399,7 +1094,7 @@
 	}
 
 	gt_core_status = readl(dev_priv->regs + GEN6_GT_CORE_STATUS);
-	trace_i915_reg_rw(false, GEN6_GT_CORE_STATUS, gt_core_status, 4);
+	trace_i915_reg_rw(false, GEN6_GT_CORE_STATUS, gt_core_status, 4, true);
 
 	rpmodectl1 = I915_READ(GEN6_RP_CONTROL);
 	rcctl1 = I915_READ(GEN6_RC_CONTROL);
@@ -1423,25 +1118,25 @@
 		   yesno(rcctl1 & GEN6_RC_CTL_RC6p_ENABLE));
 	seq_printf(m, "Deepest RC6 Enabled: %s\n",
 		   yesno(rcctl1 & GEN6_RC_CTL_RC6pp_ENABLE));
-	seq_printf(m, "Current RC state: ");
+	seq_puts(m, "Current RC state: ");
 	switch (gt_core_status & GEN6_RCn_MASK) {
 	case GEN6_RC0:
 		if (gt_core_status & GEN6_CORE_CPD_STATE_MASK)
-			seq_printf(m, "Core Power Down\n");
+			seq_puts(m, "Core Power Down\n");
 		else
-			seq_printf(m, "on\n");
+			seq_puts(m, "on\n");
 		break;
 	case GEN6_RC3:
-		seq_printf(m, "RC3\n");
+		seq_puts(m, "RC3\n");
 		break;
 	case GEN6_RC6:
-		seq_printf(m, "RC6\n");
+		seq_puts(m, "RC6\n");
 		break;
 	case GEN6_RC7:
-		seq_printf(m, "RC7\n");
+		seq_puts(m, "RC7\n");
 		break;
 	default:
-		seq_printf(m, "Unknown\n");
+		seq_puts(m, "Unknown\n");
 		break;
 	}
 
@@ -1485,43 +1180,52 @@
 	drm_i915_private_t *dev_priv = dev->dev_private;
 
 	if (!I915_HAS_FBC(dev)) {
-		seq_printf(m, "FBC unsupported on this chipset\n");
+		seq_puts(m, "FBC unsupported on this chipset\n");
 		return 0;
 	}
 
 	if (intel_fbc_enabled(dev)) {
-		seq_printf(m, "FBC enabled\n");
+		seq_puts(m, "FBC enabled\n");
 	} else {
-		seq_printf(m, "FBC disabled: ");
-		switch (dev_priv->no_fbc_reason) {
+		seq_puts(m, "FBC disabled: ");
+		switch (dev_priv->fbc.no_fbc_reason) {
+		case FBC_OK:
+			seq_puts(m, "FBC actived, but currently disabled in hardware");
+			break;
+		case FBC_UNSUPPORTED:
+			seq_puts(m, "unsupported by this chipset");
+			break;
 		case FBC_NO_OUTPUT:
-			seq_printf(m, "no outputs");
+			seq_puts(m, "no outputs");
 			break;
 		case FBC_STOLEN_TOO_SMALL:
-			seq_printf(m, "not enough stolen memory");
+			seq_puts(m, "not enough stolen memory");
 			break;
 		case FBC_UNSUPPORTED_MODE:
-			seq_printf(m, "mode not supported");
+			seq_puts(m, "mode not supported");
 			break;
 		case FBC_MODE_TOO_LARGE:
-			seq_printf(m, "mode too large");
+			seq_puts(m, "mode too large");
 			break;
 		case FBC_BAD_PLANE:
-			seq_printf(m, "FBC unsupported on plane");
+			seq_puts(m, "FBC unsupported on plane");
 			break;
 		case FBC_NOT_TILED:
-			seq_printf(m, "scanout buffer not tiled");
+			seq_puts(m, "scanout buffer not tiled");
 			break;
 		case FBC_MULTIPLE_PIPES:
-			seq_printf(m, "multiple pipes are enabled");
+			seq_puts(m, "multiple pipes are enabled");
 			break;
 		case FBC_MODULE_PARAM:
-			seq_printf(m, "disabled per module param (default off)");
+			seq_puts(m, "disabled per module param (default off)");
+			break;
+		case FBC_CHIP_DEFAULT:
+			seq_puts(m, "disabled per chip default");
 			break;
 		default:
-			seq_printf(m, "unknown reason");
+			seq_puts(m, "unknown reason");
 		}
-		seq_printf(m, "\n");
+		seq_putc(m, '\n');
 	}
 	return 0;
 }
@@ -1604,7 +1308,7 @@
 	int gpu_freq, ia_freq;
 
 	if (!(IS_GEN6(dev) || IS_GEN7(dev))) {
-		seq_printf(m, "unsupported on this chipset\n");
+		seq_puts(m, "unsupported on this chipset\n");
 		return 0;
 	}
 
@@ -1612,7 +1316,7 @@
 	if (ret)
 		return ret;
 
-	seq_printf(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n");
+	seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n");
 
 	for (gpu_freq = dev_priv->rps.min_delay;
 	     gpu_freq <= dev_priv->rps.max_delay;
@@ -1701,7 +1405,7 @@
 		   fb->base.bits_per_pixel,
 		   atomic_read(&fb->base.refcount.refcount));
 	describe_obj(m, fb->obj);
-	seq_printf(m, "\n");
+	seq_putc(m, '\n');
 	mutex_unlock(&dev->mode_config.mutex);
 
 	mutex_lock(&dev->mode_config.fb_lock);
@@ -1716,7 +1420,7 @@
 			   fb->base.bits_per_pixel,
 			   atomic_read(&fb->base.refcount.refcount));
 		describe_obj(m, fb->obj);
-		seq_printf(m, "\n");
+		seq_putc(m, '\n');
 	}
 	mutex_unlock(&dev->mode_config.fb_lock);
 
@@ -1736,22 +1440,22 @@
 		return ret;
 
 	if (dev_priv->ips.pwrctx) {
-		seq_printf(m, "power context ");
+		seq_puts(m, "power context ");
 		describe_obj(m, dev_priv->ips.pwrctx);
-		seq_printf(m, "\n");
+		seq_putc(m, '\n');
 	}
 
 	if (dev_priv->ips.renderctx) {
-		seq_printf(m, "render context ");
+		seq_puts(m, "render context ");
 		describe_obj(m, dev_priv->ips.renderctx);
-		seq_printf(m, "\n");
+		seq_putc(m, '\n');
 	}
 
 	for_each_ring(ring, dev_priv, i) {
 		if (ring->default_context) {
 			seq_printf(m, "HW default context %s ring ", ring->name);
 			describe_obj(m, ring->default_context->obj);
-			seq_printf(m, "\n");
+			seq_putc(m, '\n');
 		}
 	}
 
@@ -1767,9 +1471,9 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	unsigned forcewake_count;
 
-	spin_lock_irq(&dev_priv->gt_lock);
-	forcewake_count = dev_priv->forcewake_count;
-	spin_unlock_irq(&dev_priv->gt_lock);
+	spin_lock_irq(&dev_priv->uncore.lock);
+	forcewake_count = dev_priv->uncore.forcewake_count;
+	spin_unlock_irq(&dev_priv->uncore.lock);
 
 	seq_printf(m, "forcewake count = %u\n", forcewake_count);
 
@@ -1778,7 +1482,7 @@
 
 static const char *swizzle_string(unsigned swizzle)
 {
-	switch(swizzle) {
+	switch (swizzle) {
 	case I915_BIT_6_SWIZZLE_NONE:
 		return "none";
 	case I915_BIT_6_SWIZZLE_9:
@@ -1868,7 +1572,7 @@
 	if (dev_priv->mm.aliasing_ppgtt) {
 		struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
 
-		seq_printf(m, "aliasing PPGTT:\n");
+		seq_puts(m, "aliasing PPGTT:\n");
 		seq_printf(m, "pd gtt offset: 0x%08x\n", ppgtt->pd_offset);
 	}
 	seq_printf(m, "ECOCHK: 0x%08x\n", I915_READ(GAM_ECOCHK));
@@ -1886,7 +1590,7 @@
 
 
 	if (!IS_VALLEYVIEW(dev)) {
-		seq_printf(m, "unsupported\n");
+		seq_puts(m, "unsupported\n");
 		return 0;
 	}
 
@@ -1924,6 +1628,194 @@
 	return 0;
 }
 
+static int i915_llc(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	/* Size calculation for LLC is a bit of a pain. Ignore for now. */
+	seq_printf(m, "LLC: %s\n", yesno(HAS_LLC(dev)));
+	seq_printf(m, "eLLC: %zuMB\n", dev_priv->ellc_size);
+
+	return 0;
+}
+
+static int i915_edp_psr_status(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 psrstat, psrperf;
+
+	if (!IS_HASWELL(dev)) {
+		seq_puts(m, "PSR not supported on this platform\n");
+	} else if (IS_HASWELL(dev) && I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE) {
+		seq_puts(m, "PSR enabled\n");
+	} else {
+		seq_puts(m, "PSR disabled: ");
+		switch (dev_priv->no_psr_reason) {
+		case PSR_NO_SOURCE:
+			seq_puts(m, "not supported on this platform");
+			break;
+		case PSR_NO_SINK:
+			seq_puts(m, "not supported by panel");
+			break;
+		case PSR_MODULE_PARAM:
+			seq_puts(m, "disabled by flag");
+			break;
+		case PSR_CRTC_NOT_ACTIVE:
+			seq_puts(m, "crtc not active");
+			break;
+		case PSR_PWR_WELL_ENABLED:
+			seq_puts(m, "power well enabled");
+			break;
+		case PSR_NOT_TILED:
+			seq_puts(m, "not tiled");
+			break;
+		case PSR_SPRITE_ENABLED:
+			seq_puts(m, "sprite enabled");
+			break;
+		case PSR_S3D_ENABLED:
+			seq_puts(m, "stereo 3d enabled");
+			break;
+		case PSR_INTERLACED_ENABLED:
+			seq_puts(m, "interlaced enabled");
+			break;
+		case PSR_HSW_NOT_DDIA:
+			seq_puts(m, "HSW ties PSR to DDI A (eDP)");
+			break;
+		default:
+			seq_puts(m, "unknown reason");
+		}
+		seq_puts(m, "\n");
+		return 0;
+	}
+
+	psrstat = I915_READ(EDP_PSR_STATUS_CTL);
+
+	seq_puts(m, "PSR Current State: ");
+	switch (psrstat & EDP_PSR_STATUS_STATE_MASK) {
+	case EDP_PSR_STATUS_STATE_IDLE:
+		seq_puts(m, "Reset state\n");
+		break;
+	case EDP_PSR_STATUS_STATE_SRDONACK:
+		seq_puts(m, "Wait for TG/Stream to send on frame of data after SRD conditions are met\n");
+		break;
+	case EDP_PSR_STATUS_STATE_SRDENT:
+		seq_puts(m, "SRD entry\n");
+		break;
+	case EDP_PSR_STATUS_STATE_BUFOFF:
+		seq_puts(m, "Wait for buffer turn off\n");
+		break;
+	case EDP_PSR_STATUS_STATE_BUFON:
+		seq_puts(m, "Wait for buffer turn on\n");
+		break;
+	case EDP_PSR_STATUS_STATE_AUXACK:
+		seq_puts(m, "Wait for AUX to acknowledge on SRD exit\n");
+		break;
+	case EDP_PSR_STATUS_STATE_SRDOFFACK:
+		seq_puts(m, "Wait for TG/Stream to acknowledge the SRD VDM exit\n");
+		break;
+	default:
+		seq_puts(m, "Unknown\n");
+		break;
+	}
+
+	seq_puts(m, "Link Status: ");
+	switch (psrstat & EDP_PSR_STATUS_LINK_MASK) {
+	case EDP_PSR_STATUS_LINK_FULL_OFF:
+		seq_puts(m, "Link is fully off\n");
+		break;
+	case EDP_PSR_STATUS_LINK_FULL_ON:
+		seq_puts(m, "Link is fully on\n");
+		break;
+	case EDP_PSR_STATUS_LINK_STANDBY:
+		seq_puts(m, "Link is in standby\n");
+		break;
+	default:
+		seq_puts(m, "Unknown\n");
+		break;
+	}
+
+	seq_printf(m, "PSR Entry Count: %u\n",
+		   psrstat >> EDP_PSR_STATUS_COUNT_SHIFT &
+		   EDP_PSR_STATUS_COUNT_MASK);
+
+	seq_printf(m, "Max Sleep Timer Counter: %u\n",
+		   psrstat >> EDP_PSR_STATUS_MAX_SLEEP_TIMER_SHIFT &
+		   EDP_PSR_STATUS_MAX_SLEEP_TIMER_MASK);
+
+	seq_printf(m, "Had AUX error: %s\n",
+		   yesno(psrstat & EDP_PSR_STATUS_AUX_ERROR));
+
+	seq_printf(m, "Sending AUX: %s\n",
+		   yesno(psrstat & EDP_PSR_STATUS_AUX_SENDING));
+
+	seq_printf(m, "Sending Idle: %s\n",
+		   yesno(psrstat & EDP_PSR_STATUS_SENDING_IDLE));
+
+	seq_printf(m, "Sending TP2 TP3: %s\n",
+		   yesno(psrstat & EDP_PSR_STATUS_SENDING_TP2_TP3));
+
+	seq_printf(m, "Sending TP1: %s\n",
+		   yesno(psrstat & EDP_PSR_STATUS_SENDING_TP1));
+
+	seq_printf(m, "Idle Count: %u\n",
+		   psrstat & EDP_PSR_STATUS_IDLE_MASK);
+
+	psrperf = (I915_READ(EDP_PSR_PERF_CNT)) & EDP_PSR_PERF_CNT_MASK;
+	seq_printf(m, "Performance Counter: %u\n", psrperf);
+
+	return 0;
+}
+
+static int i915_energy_uJ(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u64 power;
+	u32 units;
+
+	if (INTEL_INFO(dev)->gen < 6)
+		return -ENODEV;
+
+	rdmsrl(MSR_RAPL_POWER_UNIT, power);
+	power = (power & 0x1f00) >> 8;
+	units = 1000000 / (1 << power); /* convert to uJ */
+	power = I915_READ(MCH_SECP_NRG_STTS);
+	power *= units;
+
+	seq_printf(m, "%llu", (long long unsigned)power);
+
+	return 0;
+}
+
+static int i915_pc8_status(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (!IS_HASWELL(dev)) {
+		seq_puts(m, "not supported\n");
+		return 0;
+	}
+
+	mutex_lock(&dev_priv->pc8.lock);
+	seq_printf(m, "Requirements met: %s\n",
+		   yesno(dev_priv->pc8.requirements_met));
+	seq_printf(m, "GPU idle: %s\n", yesno(dev_priv->pc8.gpu_idle));
+	seq_printf(m, "Disable count: %d\n", dev_priv->pc8.disable_count);
+	seq_printf(m, "IRQs disabled: %s\n",
+		   yesno(dev_priv->pc8.irqs_disabled));
+	seq_printf(m, "Enabled: %s\n", yesno(dev_priv->pc8.enabled));
+	mutex_unlock(&dev_priv->pc8.lock);
+
+	return 0;
+}
+
 static int
 i915_wedged_get(void *data, u64 *val)
 {
@@ -2006,6 +1898,8 @@
 	struct drm_device *dev = data;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj, *next;
+	struct i915_address_space *vm;
+	struct i915_vma *vma, *x;
 	int ret;
 
 	DRM_DEBUG_DRIVER("Dropping caches: 0x%08llx\n", val);
@@ -2026,12 +1920,17 @@
 		i915_gem_retire_requests(dev);
 
 	if (val & DROP_BOUND) {
-		list_for_each_entry_safe(obj, next, &dev_priv->mm.inactive_list, mm_list)
-			if (obj->pin_count == 0) {
-				ret = i915_gem_object_unbind(obj);
+		list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
+			list_for_each_entry_safe(vma, x, &vm->inactive_list,
+						 mm_list) {
+				if (vma->obj->pin_count)
+					continue;
+
+				ret = i915_vma_unbind(vma);
 				if (ret)
 					goto unlock;
 			}
+		}
 	}
 
 	if (val & DROP_UNBOUND) {
@@ -2326,6 +2225,7 @@
 	{"i915_gem_pinned", i915_gem_gtt_info, 0, (void *) PINNED_LIST},
 	{"i915_gem_active", i915_gem_object_list_info, 0, (void *) ACTIVE_LIST},
 	{"i915_gem_inactive", i915_gem_object_list_info, 0, (void *) INACTIVE_LIST},
+	{"i915_gem_stolen", i915_gem_stolen_list_info },
 	{"i915_gem_pageflip", i915_gem_pageflip_info, 0},
 	{"i915_gem_request", i915_gem_request_info, 0},
 	{"i915_gem_seqno", i915_gem_seqno_info, 0},
@@ -2353,64 +2253,42 @@
 	{"i915_swizzle_info", i915_swizzle_info, 0},
 	{"i915_ppgtt_info", i915_ppgtt_info, 0},
 	{"i915_dpio", i915_dpio_info, 0},
+	{"i915_llc", i915_llc, 0},
+	{"i915_edp_psr_status", i915_edp_psr_status, 0},
+	{"i915_energy_uJ", i915_energy_uJ, 0},
+	{"i915_pc8_status", i915_pc8_status, 0},
 };
 #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
 
+static struct i915_debugfs_files {
+	const char *name;
+	const struct file_operations *fops;
+} i915_debugfs_files[] = {
+	{"i915_wedged", &i915_wedged_fops},
+	{"i915_max_freq", &i915_max_freq_fops},
+	{"i915_min_freq", &i915_min_freq_fops},
+	{"i915_cache_sharing", &i915_cache_sharing_fops},
+	{"i915_ring_stop", &i915_ring_stop_fops},
+	{"i915_gem_drop_caches", &i915_drop_caches_fops},
+	{"i915_error_state", &i915_error_state_fops},
+	{"i915_next_seqno", &i915_next_seqno_fops},
+};
+
 int i915_debugfs_init(struct drm_minor *minor)
 {
-	int ret;
-
-	ret = i915_debugfs_create(minor->debugfs_root, minor,
-				  "i915_wedged",
-				  &i915_wedged_fops);
-	if (ret)
-		return ret;
+	int ret, i;
 
 	ret = i915_forcewake_create(minor->debugfs_root, minor);
 	if (ret)
 		return ret;
 
-	ret = i915_debugfs_create(minor->debugfs_root, minor,
-				  "i915_max_freq",
-				  &i915_max_freq_fops);
-	if (ret)
-		return ret;
-
-	ret = i915_debugfs_create(minor->debugfs_root, minor,
-				  "i915_min_freq",
-				  &i915_min_freq_fops);
-	if (ret)
-		return ret;
-
-	ret = i915_debugfs_create(minor->debugfs_root, minor,
-				  "i915_cache_sharing",
-				  &i915_cache_sharing_fops);
-	if (ret)
-		return ret;
-
-	ret = i915_debugfs_create(minor->debugfs_root, minor,
-				  "i915_ring_stop",
-				  &i915_ring_stop_fops);
-	if (ret)
-		return ret;
-
-	ret = i915_debugfs_create(minor->debugfs_root, minor,
-				  "i915_gem_drop_caches",
-				  &i915_drop_caches_fops);
-	if (ret)
-		return ret;
-
-	ret = i915_debugfs_create(minor->debugfs_root, minor,
-				  "i915_error_state",
-				  &i915_error_state_fops);
-	if (ret)
-		return ret;
-
-	ret = i915_debugfs_create(minor->debugfs_root, minor,
-				 "i915_next_seqno",
-				 &i915_next_seqno_fops);
-	if (ret)
-		return ret;
+	for (i = 0; i < ARRAY_SIZE(i915_debugfs_files); i++) {
+		ret = i915_debugfs_create(minor->debugfs_root, minor,
+					  i915_debugfs_files[i].name,
+					  i915_debugfs_files[i].fops);
+		if (ret)
+			return ret;
+	}
 
 	return drm_debugfs_create_files(i915_debugfs_list,
 					I915_DEBUGFS_ENTRIES,
@@ -2419,26 +2297,18 @@
 
 void i915_debugfs_cleanup(struct drm_minor *minor)
 {
+	int i;
+
 	drm_debugfs_remove_files(i915_debugfs_list,
 				 I915_DEBUGFS_ENTRIES, minor);
 	drm_debugfs_remove_files((struct drm_info_list *) &i915_forcewake_fops,
 				 1, minor);
-	drm_debugfs_remove_files((struct drm_info_list *) &i915_wedged_fops,
-				 1, minor);
-	drm_debugfs_remove_files((struct drm_info_list *) &i915_max_freq_fops,
-				 1, minor);
-	drm_debugfs_remove_files((struct drm_info_list *) &i915_min_freq_fops,
-				 1, minor);
-	drm_debugfs_remove_files((struct drm_info_list *) &i915_cache_sharing_fops,
-				 1, minor);
-	drm_debugfs_remove_files((struct drm_info_list *) &i915_drop_caches_fops,
-				 1, minor);
-	drm_debugfs_remove_files((struct drm_info_list *) &i915_ring_stop_fops,
-				 1, minor);
-	drm_debugfs_remove_files((struct drm_info_list *) &i915_error_state_fops,
-				 1, minor);
-	drm_debugfs_remove_files((struct drm_info_list *) &i915_next_seqno_fops,
-				 1, minor);
+	for (i = 0; i < ARRAY_SIZE(i915_debugfs_files); i++) {
+		struct drm_info_list *info_list =
+			(struct drm_info_list *) i915_debugfs_files[i].fops;
+
+		drm_debugfs_remove_files(info_list, 1, minor);
+	}
 }
 
 #endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index cf188ab..fdaa091 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -976,6 +976,9 @@
 	case I915_PARAM_HAS_LLC:
 		value = HAS_LLC(dev);
 		break;
+	case I915_PARAM_HAS_WT:
+		value = HAS_WT(dev);
+		break;
 	case I915_PARAM_HAS_ALIASING_PPGTT:
 		value = dev_priv->mm.aliasing_ppgtt ? 1 : 0;
 		break;
@@ -1293,7 +1296,7 @@
 
 	intel_register_dsm_handler();
 
-	ret = vga_switcheroo_register_client(dev->pdev, &i915_switcheroo_ops);
+	ret = vga_switcheroo_register_client(dev->pdev, &i915_switcheroo_ops, false);
 	if (ret)
 		goto cleanup_vga_client;
 
@@ -1323,10 +1326,8 @@
 	/* Always safe in the mode setting case. */
 	/* FIXME: do pre/post-mode set stuff in core KMS code */
 	dev->vblank_disable_allowed = 1;
-	if (INTEL_INFO(dev)->num_pipes == 0) {
-		dev_priv->mm.suspended = 0;
+	if (INTEL_INFO(dev)->num_pipes == 0)
 		return 0;
-	}
 
 	ret = intel_fbdev_init(dev);
 	if (ret)
@@ -1352,9 +1353,6 @@
 
 	drm_kms_helper_poll_init(dev);
 
-	/* We're off and running w/KMS */
-	dev_priv->mm.suspended = 0;
-
 	return 0;
 
 cleanup_gem:
@@ -1363,7 +1361,7 @@
 	i915_gem_context_fini(dev);
 	mutex_unlock(&dev->struct_mutex);
 	i915_gem_cleanup_aliasing_ppgtt(dev);
-	drm_mm_takedown(&dev_priv->mm.gtt_space);
+	drm_mm_takedown(&dev_priv->gtt.base.mm);
 cleanup_irq:
 	drm_irq_uninstall(dev);
 cleanup_gem_stolen:
@@ -1441,22 +1439,6 @@
 }
 
 /**
- * intel_early_sanitize_regs - clean up BIOS state
- * @dev: DRM device
- *
- * This function must be called before we do any I915_READ or I915_WRITE. Its
- * purpose is to clean up any state left by the BIOS that may affect us when
- * reading and/or writing registers.
- */
-static void intel_early_sanitize_regs(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	if (HAS_FPGA_DBG_UNCLAIMED(dev))
-		I915_WRITE_NOTRACE(FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
-}
-
-/**
  * i915_driver_load - setup chip and create an initial config
  * @dev: DRM device
  * @flags: startup flags
@@ -1495,8 +1477,33 @@
 	dev_priv->dev = dev;
 	dev_priv->info = info;
 
+	spin_lock_init(&dev_priv->irq_lock);
+	spin_lock_init(&dev_priv->gpu_error.lock);
+	spin_lock_init(&dev_priv->backlight.lock);
+	spin_lock_init(&dev_priv->uncore.lock);
+	spin_lock_init(&dev_priv->mm.object_stat_lock);
+	mutex_init(&dev_priv->dpio_lock);
+	mutex_init(&dev_priv->rps.hw_lock);
+	mutex_init(&dev_priv->modeset_restore_lock);
+
+	mutex_init(&dev_priv->pc8.lock);
+	dev_priv->pc8.requirements_met = false;
+	dev_priv->pc8.gpu_idle = false;
+	dev_priv->pc8.irqs_disabled = false;
+	dev_priv->pc8.enabled = false;
+	dev_priv->pc8.disable_count = 2; /* requirements_met + gpu_idle */
+	INIT_DELAYED_WORK(&dev_priv->pc8.enable_work, hsw_enable_pc8_work);
+
 	i915_dump_device_info(dev_priv);
 
+	/* Not all pre-production machines fall into this category, only the
+	 * very first ones. Almost everything should work, except for maybe
+	 * suspend/resume. And we don't implement workarounds that affect only
+	 * pre-production machines. */
+	if (IS_HSW_EARLY_SDV(dev))
+		DRM_INFO("This is an early pre-production Haswell machine. "
+			 "It may not be fully functional.\n");
+
 	if (i915_get_bridge_dev(dev)) {
 		ret = -EIO;
 		goto free_priv;
@@ -1522,7 +1529,17 @@
 		goto put_bridge;
 	}
 
-	intel_early_sanitize_regs(dev);
+	intel_uncore_early_sanitize(dev);
+
+	if (IS_HASWELL(dev) && (I915_READ(HSW_EDRAM_PRESENT) == 1)) {
+		/* The docs do not explain exactly how the calculation can be
+		 * made. It is somewhat guessable, but for now, it's always
+		 * 128MB.
+		 * NB: We can't write IDICR yet because we do not have gt funcs
+		 * set up */
+		dev_priv->ellc_size = 128;
+		DRM_INFO("Found %zuMB of eLLC\n", dev_priv->ellc_size);
+	}
 
 	ret = i915_gem_gtt_init(dev);
 	if (ret)
@@ -1558,8 +1575,8 @@
 		goto out_rmmap;
 	}
 
-	dev_priv->mm.gtt_mtrr = arch_phys_wc_add(dev_priv->gtt.mappable_base,
-						 aperture_size);
+	dev_priv->gtt.mtrr = arch_phys_wc_add(dev_priv->gtt.mappable_base,
+					      aperture_size);
 
 	/* The i915 workqueue is primarily used for batched retirement of
 	 * requests (and thus managing bo) once the task has been completed
@@ -1585,7 +1602,9 @@
 	intel_detect_pch(dev);
 
 	intel_irq_init(dev);
-	intel_gt_init(dev);
+	intel_pm_init(dev);
+	intel_uncore_sanitize(dev);
+	intel_uncore_init(dev);
 
 	/* Try to make sure MCHBAR is enabled before poking at it */
 	intel_setup_mchbar(dev);
@@ -1610,15 +1629,6 @@
 	if (!IS_I945G(dev) && !IS_I945GM(dev))
 		pci_enable_msi(dev->pdev);
 
-	spin_lock_init(&dev_priv->irq_lock);
-	spin_lock_init(&dev_priv->gpu_error.lock);
-	spin_lock_init(&dev_priv->rps.lock);
-	spin_lock_init(&dev_priv->backlight.lock);
-	mutex_init(&dev_priv->dpio_lock);
-
-	mutex_init(&dev_priv->rps.hw_lock);
-	mutex_init(&dev_priv->modeset_restore_lock);
-
 	dev_priv->num_plane = 1;
 	if (IS_VALLEYVIEW(dev))
 		dev_priv->num_plane = 2;
@@ -1629,9 +1639,6 @@
 			goto out_gem_unload;
 	}
 
-	/* Start out suspended */
-	dev_priv->mm.suspended = 1;
-
 	if (HAS_POWER_WELL(dev))
 		i915_init_power_well(dev);
 
@@ -1641,6 +1648,9 @@
 			DRM_ERROR("failed to init modeset\n");
 			goto out_gem_unload;
 		}
+	} else {
+		/* Start out suspended in ums mode. */
+		dev_priv->ums.mm_suspended = 1;
 	}
 
 	i915_setup_sysfs(dev);
@@ -1648,7 +1658,7 @@
 	if (INTEL_INFO(dev)->num_pipes) {
 		/* Must be done after probing outputs */
 		intel_opregion_init(dev);
-		acpi_video_register_with_quirks();
+		acpi_video_register();
 	}
 
 	if (IS_GEN5(dev))
@@ -1667,9 +1677,9 @@
 	intel_teardown_mchbar(dev);
 	destroy_workqueue(dev_priv->wq);
 out_mtrrfree:
-	arch_phys_wc_del(dev_priv->mm.gtt_mtrr);
+	arch_phys_wc_del(dev_priv->gtt.mtrr);
 	io_mapping_free(dev_priv->gtt.mappable);
-	dev_priv->gtt.gtt_remove(dev);
+	dev_priv->gtt.base.cleanup(&dev_priv->gtt.base);
 out_rmmap:
 	pci_iounmap(dev->pdev, dev_priv->regs);
 put_bridge:
@@ -1686,8 +1696,13 @@
 
 	intel_gpu_ips_teardown();
 
-	if (HAS_POWER_WELL(dev))
+	if (HAS_POWER_WELL(dev)) {
+		/* The i915.ko module is still not prepared to be loaded when
+		 * the power well is not enabled, so just enable it in case
+		 * we're going to unload/reload. */
+		intel_set_power_well(dev, true);
 		i915_remove_power_well(dev);
+	}
 
 	i915_teardown_sysfs(dev);
 
@@ -1705,7 +1720,7 @@
 	cancel_delayed_work_sync(&dev_priv->mm.retire_work);
 
 	io_mapping_free(dev_priv->gtt.mappable);
-	arch_phys_wc_del(dev_priv->mm.gtt_mtrr);
+	arch_phys_wc_del(dev_priv->gtt.mtrr);
 
 	acpi_video_unregister();
 
@@ -1733,6 +1748,8 @@
 	cancel_work_sync(&dev_priv->gpu_error.work);
 	i915_destroy_error_state(dev);
 
+	cancel_delayed_work_sync(&dev_priv->pc8.enable_work);
+
 	if (dev->pdev->msi_enabled)
 		pci_disable_msi(dev->pdev);
 
@@ -1754,7 +1771,9 @@
 			i915_free_hws(dev);
 	}
 
-	drm_mm_takedown(&dev_priv->mm.gtt_space);
+	list_del(&dev_priv->gtt.base.global_link);
+	WARN_ON(!list_empty(&dev_priv->vm_list));
+	drm_mm_takedown(&dev_priv->gtt.base.mm);
 	if (dev_priv->regs != NULL)
 		pci_iounmap(dev->pdev, dev_priv->regs);
 
@@ -1764,7 +1783,7 @@
 	destroy_workqueue(dev_priv->wq);
 	pm_qos_remove_request(&dev_priv->pm_qos);
 
-	dev_priv->gtt.gtt_remove(dev);
+	dev_priv->gtt.base.cleanup(&dev_priv->gtt.base);
 
 	if (dev_priv->slab)
 		kmem_cache_destroy(dev_priv->slab);
@@ -1840,14 +1859,14 @@
 	kfree(file_priv);
 }
 
-struct drm_ioctl_desc i915_ioctls[] = {
+const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_INIT, i915_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_FLUSH, i915_flush_ioctl, DRM_AUTH),
 	DRM_IOCTL_DEF_DRV(I915_FLIP, i915_flip_bufs, DRM_AUTH),
 	DRM_IOCTL_DEF_DRV(I915_BATCHBUFFER, i915_batchbuffer, DRM_AUTH),
 	DRM_IOCTL_DEF_DRV(I915_IRQ_EMIT, i915_irq_emit, DRM_AUTH),
 	DRM_IOCTL_DEF_DRV(I915_IRQ_WAIT, i915_irq_wait, DRM_AUTH),
-	DRM_IOCTL_DEF_DRV(I915_GETPARAM, i915_getparam, DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(I915_GETPARAM, i915_getparam, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_SETPARAM, i915_setparam, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_ALLOC, drm_noop, DRM_AUTH),
 	DRM_IOCTL_DEF_DRV(I915_FREE, drm_noop, DRM_AUTH),
@@ -1860,35 +1879,35 @@
 	DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, i915_set_status_page, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_INIT, i915_gem_init_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY|DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_unpin_ioctl, DRM_AUTH|DRM_ROOT_ONLY|DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(I915_GEM_SET_CACHING, i915_gem_set_caching_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(I915_GEM_GET_CACHING, i915_gem_get_caching_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(I915_GEM_THROTTLE, i915_gem_throttle_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_SET_CACHING, i915_gem_set_caching_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_GET_CACHING, i915_gem_get_caching_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_THROTTLE, i915_gem_throttle_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_ENTERVT, i915_gem_entervt_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_GEM_LEAVEVT, i915_gem_leavevt_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY|DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(I915_GEM_CREATE, i915_gem_create_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(I915_GEM_PREAD, i915_gem_pread_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(I915_GEM_PWRITE, i915_gem_pwrite_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(I915_GEM_MMAP, i915_gem_mmap_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(I915_GEM_MMAP_GTT, i915_gem_mmap_gtt_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(I915_GEM_SET_DOMAIN, i915_gem_set_domain_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(I915_GEM_SW_FINISH, i915_gem_sw_finish_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(I915_GEM_SET_TILING, i915_gem_set_tiling, DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(I915_GEM_GET_TILING, i915_gem_get_tiling, DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_CREATE, i915_gem_create_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_PREAD, i915_gem_pread_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_PWRITE, i915_gem_pwrite_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_MMAP, i915_gem_mmap_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_MMAP_GTT, i915_gem_mmap_gtt_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_SET_DOMAIN, i915_gem_set_domain_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_SW_FINISH, i915_gem_sw_finish_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_SET_TILING, i915_gem_set_tiling, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_GET_TILING, i915_gem_get_tiling, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id, DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(I915_GEM_MADVISE, i915_gem_madvise_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_MADVISE, i915_gem_madvise_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_SET_SPRITE_COLORKEY, intel_sprite_set_colorkey, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(I915_GET_SPRITE_COLORKEY, intel_sprite_get_colorkey, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(I915_GEM_WAIT, i915_gem_wait_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(I915_GEM_WAIT, i915_gem_wait_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_REG_READ, i915_reg_read_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
 };
 
 int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index f4af1ca..ccb28ea 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -118,10 +118,14 @@
 MODULE_PARM_DESC(i915_enable_ppgtt,
 		"Enable PPGTT (default: true)");
 
-unsigned int i915_preliminary_hw_support __read_mostly = 0;
+int i915_enable_psr __read_mostly = 0;
+module_param_named(enable_psr, i915_enable_psr, int, 0600);
+MODULE_PARM_DESC(enable_psr, "Enable PSR (default: false)");
+
+unsigned int i915_preliminary_hw_support __read_mostly = IS_ENABLED(CONFIG_DRM_I915_PRELIMINARY_HW_SUPPORT);
 module_param_named(preliminary_hw_support, i915_preliminary_hw_support, int, 0600);
 MODULE_PARM_DESC(preliminary_hw_support,
-		"Enable preliminary hardware support. (default: false)");
+		"Enable preliminary hardware support.");
 
 int i915_disable_power_well __read_mostly = 1;
 module_param_named(disable_power_well, i915_disable_power_well, int, 0600);
@@ -132,6 +136,24 @@
 module_param_named(enable_ips, i915_enable_ips, int, 0600);
 MODULE_PARM_DESC(enable_ips, "Enable IPS (default: true)");
 
+bool i915_fastboot __read_mostly = 0;
+module_param_named(fastboot, i915_fastboot, bool, 0600);
+MODULE_PARM_DESC(fastboot, "Try to skip unnecessary mode sets at boot time "
+		 "(default: false)");
+
+int i915_enable_pc8 __read_mostly = 1;
+module_param_named(enable_pc8, i915_enable_pc8, int, 0600);
+MODULE_PARM_DESC(enable_pc8, "Enable support for low power package C states (PC8+) (default: true)");
+
+int i915_pc8_timeout __read_mostly = 5000;
+module_param_named(pc8_timeout, i915_pc8_timeout, int, 0600);
+MODULE_PARM_DESC(pc8_timeout, "Number of msecs of idleness required to enter PC8+ (default: 5000)");
+
+bool i915_prefault_disable __read_mostly;
+module_param_named(prefault_disable, i915_prefault_disable, bool, 0600);
+MODULE_PARM_DESC(prefault_disable,
+		"Disable page prefaulting for pread/pwrite/reloc (default:false). For developers only.");
+
 static struct drm_driver driver;
 extern int intel_agp_enabled;
 
@@ -543,6 +565,9 @@
 	dev_priv->modeset_restore = MODESET_SUSPENDED;
 	mutex_unlock(&dev_priv->modeset_restore_lock);
 
+	/* We do a lot of poking in a lot of registers, make sure they work
+	 * properly. */
+	hsw_disable_package_c8(dev_priv);
 	intel_set_power_well(dev, true);
 
 	drm_kms_helper_poll_disable(dev);
@@ -551,7 +576,11 @@
 
 	/* If KMS is active, we do the leavevt stuff here */
 	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
-		int error = i915_gem_idle(dev);
+		int error;
+
+		mutex_lock(&dev->struct_mutex);
+		error = i915_gem_idle(dev);
+		mutex_unlock(&dev->struct_mutex);
 		if (error) {
 			dev_err(&dev->pdev->dev,
 				"GEM idle failed, resume might fail\n");
@@ -656,7 +685,6 @@
 		intel_init_pch_refclk(dev);
 
 		mutex_lock(&dev->struct_mutex);
-		dev_priv->mm.suspended = 0;
 
 		error = i915_gem_init_hw(dev);
 		mutex_unlock(&dev->struct_mutex);
@@ -696,6 +724,10 @@
 		schedule_work(&dev_priv->console_resume_work);
 	}
 
+	/* Undo what we did at i915_drm_freeze so the refcount goes back to the
+	 * expected level. */
+	hsw_enable_package_c8(dev_priv);
+
 	mutex_lock(&dev_priv->modeset_restore_lock);
 	dev_priv->modeset_restore = MODESET_DONE;
 	mutex_unlock(&dev_priv->modeset_restore_lock);
@@ -706,7 +738,7 @@
 {
 	int error = 0;
 
-	intel_gt_reset(dev);
+	intel_uncore_sanitize(dev);
 
 	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
 		mutex_lock(&dev->struct_mutex);
@@ -732,7 +764,7 @@
 
 	pci_set_master(dev->pdev);
 
-	intel_gt_reset(dev);
+	intel_uncore_sanitize(dev);
 
 	/*
 	 * Platforms with opregion should have sane BIOS, older ones (gen3 and
@@ -753,139 +785,6 @@
 	return 0;
 }
 
-static int i8xx_do_reset(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	if (IS_I85X(dev))
-		return -ENODEV;
-
-	I915_WRITE(D_STATE, I915_READ(D_STATE) | DSTATE_GFX_RESET_I830);
-	POSTING_READ(D_STATE);
-
-	if (IS_I830(dev) || IS_845G(dev)) {
-		I915_WRITE(DEBUG_RESET_I830,
-			   DEBUG_RESET_DISPLAY |
-			   DEBUG_RESET_RENDER |
-			   DEBUG_RESET_FULL);
-		POSTING_READ(DEBUG_RESET_I830);
-		msleep(1);
-
-		I915_WRITE(DEBUG_RESET_I830, 0);
-		POSTING_READ(DEBUG_RESET_I830);
-	}
-
-	msleep(1);
-
-	I915_WRITE(D_STATE, I915_READ(D_STATE) & ~DSTATE_GFX_RESET_I830);
-	POSTING_READ(D_STATE);
-
-	return 0;
-}
-
-static int i965_reset_complete(struct drm_device *dev)
-{
-	u8 gdrst;
-	pci_read_config_byte(dev->pdev, I965_GDRST, &gdrst);
-	return (gdrst & GRDOM_RESET_ENABLE) == 0;
-}
-
-static int i965_do_reset(struct drm_device *dev)
-{
-	int ret;
-	u8 gdrst;
-
-	/*
-	 * Set the domains we want to reset (GRDOM/bits 2 and 3) as
-	 * well as the reset bit (GR/bit 0).  Setting the GR bit
-	 * triggers the reset; when done, the hardware will clear it.
-	 */
-	pci_read_config_byte(dev->pdev, I965_GDRST, &gdrst);
-	pci_write_config_byte(dev->pdev, I965_GDRST,
-			      gdrst | GRDOM_RENDER |
-			      GRDOM_RESET_ENABLE);
-	ret =  wait_for(i965_reset_complete(dev), 500);
-	if (ret)
-		return ret;
-
-	/* We can't reset render&media without also resetting display ... */
-	pci_read_config_byte(dev->pdev, I965_GDRST, &gdrst);
-	pci_write_config_byte(dev->pdev, I965_GDRST,
-			      gdrst | GRDOM_MEDIA |
-			      GRDOM_RESET_ENABLE);
-
-	return wait_for(i965_reset_complete(dev), 500);
-}
-
-static int ironlake_do_reset(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	u32 gdrst;
-	int ret;
-
-	gdrst = I915_READ(MCHBAR_MIRROR_BASE + ILK_GDSR);
-	gdrst &= ~GRDOM_MASK;
-	I915_WRITE(MCHBAR_MIRROR_BASE + ILK_GDSR,
-		   gdrst | GRDOM_RENDER | GRDOM_RESET_ENABLE);
-	ret = wait_for(I915_READ(MCHBAR_MIRROR_BASE + ILK_GDSR) & 0x1, 500);
-	if (ret)
-		return ret;
-
-	/* We can't reset render&media without also resetting display ... */
-	gdrst = I915_READ(MCHBAR_MIRROR_BASE + ILK_GDSR);
-	gdrst &= ~GRDOM_MASK;
-	I915_WRITE(MCHBAR_MIRROR_BASE + ILK_GDSR,
-		   gdrst | GRDOM_MEDIA | GRDOM_RESET_ENABLE);
-	return wait_for(I915_READ(MCHBAR_MIRROR_BASE + ILK_GDSR) & 0x1, 500);
-}
-
-static int gen6_do_reset(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int	ret;
-	unsigned long irqflags;
-
-	/* Hold gt_lock across reset to prevent any register access
-	 * with forcewake not set correctly
-	 */
-	spin_lock_irqsave(&dev_priv->gt_lock, irqflags);
-
-	/* Reset the chip */
-
-	/* GEN6_GDRST is not in the gt power well, no need to check
-	 * for fifo space for the write or forcewake the chip for
-	 * the read
-	 */
-	I915_WRITE_NOTRACE(GEN6_GDRST, GEN6_GRDOM_FULL);
-
-	/* Spin waiting for the device to ack the reset request */
-	ret = wait_for((I915_READ_NOTRACE(GEN6_GDRST) & GEN6_GRDOM_FULL) == 0, 500);
-
-	/* If reset with a user forcewake, try to restore, otherwise turn it off */
-	if (dev_priv->forcewake_count)
-		dev_priv->gt.force_wake_get(dev_priv);
-	else
-		dev_priv->gt.force_wake_put(dev_priv);
-
-	/* Restore fifo count */
-	dev_priv->gt_fifo_count = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
-
-	spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags);
-	return ret;
-}
-
-int intel_gpu_reset(struct drm_device *dev)
-{
-	switch (INTEL_INFO(dev)->gen) {
-	case 7:
-	case 6: return gen6_do_reset(dev);
-	case 5: return ironlake_do_reset(dev);
-	case 4: return i965_do_reset(dev);
-	case 2: return i8xx_do_reset(dev);
-	default: return -ENODEV;
-	}
-}
-
 /**
  * i915_reset - reset chip after a hang
  * @dev: drm device to reset
@@ -955,11 +854,11 @@
 	 * switched away).
 	 */
 	if (drm_core_check_feature(dev, DRIVER_MODESET) ||
-			!dev_priv->mm.suspended) {
+			!dev_priv->ums.mm_suspended) {
 		struct intel_ring_buffer *ring;
 		int i;
 
-		dev_priv->mm.suspended = 0;
+		dev_priv->ums.mm_suspended = 0;
 
 		i915_gem_init_swizzling(dev);
 
@@ -1110,7 +1009,6 @@
 	.unlocked_ioctl = drm_ioctl,
 	.mmap = drm_gem_mmap,
 	.poll = drm_poll,
-	.fasync = drm_fasync,
 	.read = drm_read,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = i915_compat_ioctl,
@@ -1123,8 +1021,9 @@
 	 * deal with them for Intel hardware.
 	 */
 	.driver_features =
-	    DRIVER_USE_AGP | DRIVER_REQUIRE_AGP | /* DRIVER_USE_MTRR |*/
-	    DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | DRIVER_PRIME,
+	    DRIVER_USE_AGP | DRIVER_REQUIRE_AGP |
+	    DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | DRIVER_PRIME |
+	    DRIVER_RENDER,
 	.load = i915_driver_load,
 	.unload = i915_driver_unload,
 	.open = i915_driver_open,
@@ -1154,7 +1053,7 @@
 
 	.dumb_create = i915_gem_dumb_create,
 	.dumb_map_offset = i915_gem_mmap_gtt,
-	.dumb_destroy = i915_gem_dumb_destroy,
+	.dumb_destroy = drm_gem_dumb_destroy,
 	.ioctls = i915_ioctls,
 	.fops = &i915_driver_fops,
 	.name = DRIVER_NAME,
@@ -1215,133 +1114,3 @@
 MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL and additional rights");
-
-/* We give fast paths for the really cool registers */
-#define NEEDS_FORCE_WAKE(dev_priv, reg) \
-	((HAS_FORCE_WAKE((dev_priv)->dev)) && \
-	 ((reg) < 0x40000) &&            \
-	 ((reg) != FORCEWAKE))
-static void
-ilk_dummy_write(struct drm_i915_private *dev_priv)
-{
-	/* WaIssueDummyWriteToWakeupFromRC6:ilk Issue a dummy write to wake up
-	 * the chip from rc6 before touching it for real. MI_MODE is masked,
-	 * hence harmless to write 0 into. */
-	I915_WRITE_NOTRACE(MI_MODE, 0);
-}
-
-static void
-hsw_unclaimed_reg_clear(struct drm_i915_private *dev_priv, u32 reg)
-{
-	if (HAS_FPGA_DBG_UNCLAIMED(dev_priv->dev) &&
-	    (I915_READ_NOTRACE(FPGA_DBG) & FPGA_DBG_RM_NOCLAIM)) {
-		DRM_ERROR("Unknown unclaimed register before writing to %x\n",
-			  reg);
-		I915_WRITE_NOTRACE(FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
-	}
-}
-
-static void
-hsw_unclaimed_reg_check(struct drm_i915_private *dev_priv, u32 reg)
-{
-	if (HAS_FPGA_DBG_UNCLAIMED(dev_priv->dev) &&
-	    (I915_READ_NOTRACE(FPGA_DBG) & FPGA_DBG_RM_NOCLAIM)) {
-		DRM_ERROR("Unclaimed write to %x\n", reg);
-		I915_WRITE_NOTRACE(FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
-	}
-}
-
-#define __i915_read(x, y) \
-u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
-	u##x val = 0; \
-	if (IS_GEN5(dev_priv->dev)) \
-		ilk_dummy_write(dev_priv); \
-	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
-		unsigned long irqflags; \
-		spin_lock_irqsave(&dev_priv->gt_lock, irqflags); \
-		if (dev_priv->forcewake_count == 0) \
-			dev_priv->gt.force_wake_get(dev_priv); \
-		val = read##y(dev_priv->regs + reg); \
-		if (dev_priv->forcewake_count == 0) \
-			dev_priv->gt.force_wake_put(dev_priv); \
-		spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags); \
-	} else { \
-		val = read##y(dev_priv->regs + reg); \
-	} \
-	trace_i915_reg_rw(false, reg, val, sizeof(val)); \
-	return val; \
-}
-
-__i915_read(8, b)
-__i915_read(16, w)
-__i915_read(32, l)
-__i915_read(64, q)
-#undef __i915_read
-
-#define __i915_write(x, y) \
-void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \
-	u32 __fifo_ret = 0; \
-	trace_i915_reg_rw(true, reg, val, sizeof(val)); \
-	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
-		__fifo_ret = __gen6_gt_wait_for_fifo(dev_priv); \
-	} \
-	if (IS_GEN5(dev_priv->dev)) \
-		ilk_dummy_write(dev_priv); \
-	hsw_unclaimed_reg_clear(dev_priv, reg); \
-	write##y(val, dev_priv->regs + reg); \
-	if (unlikely(__fifo_ret)) { \
-		gen6_gt_check_fifodbg(dev_priv); \
-	} \
-	hsw_unclaimed_reg_check(dev_priv, reg); \
-}
-__i915_write(8, b)
-__i915_write(16, w)
-__i915_write(32, l)
-__i915_write(64, q)
-#undef __i915_write
-
-static const struct register_whitelist {
-	uint64_t offset;
-	uint32_t size;
-	uint32_t gen_bitmask; /* support gens, 0x10 for 4, 0x30 for 4 and 5, etc. */
-} whitelist[] = {
-	{ RING_TIMESTAMP(RENDER_RING_BASE), 8, 0xF0 },
-};
-
-int i915_reg_read_ioctl(struct drm_device *dev,
-			void *data, struct drm_file *file)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_reg_read *reg = data;
-	struct register_whitelist const *entry = whitelist;
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(whitelist); i++, entry++) {
-		if (entry->offset == reg->offset &&
-		    (1 << INTEL_INFO(dev)->gen & entry->gen_bitmask))
-			break;
-	}
-
-	if (i == ARRAY_SIZE(whitelist))
-		return -EINVAL;
-
-	switch (entry->size) {
-	case 8:
-		reg->val = I915_READ64(reg->offset);
-		break;
-	case 4:
-		reg->val = I915_READ(reg->offset);
-		break;
-	case 2:
-		reg->val = I915_READ16(reg->offset);
-		break;
-	case 1:
-		reg->val = I915_READ8(reg->offset);
-		break;
-	default:
-		WARN_ON(1);
-		return -EINVAL;
-	}
-
-	return 0;
-}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a416645..52a3785 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -144,6 +144,7 @@
 
 struct intel_dpll_hw_state {
 	uint32_t dpll;
+	uint32_t dpll_md;
 	uint32_t fp0;
 	uint32_t fp1;
 };
@@ -156,6 +157,8 @@
 	/* should match the index in the dev_priv->shared_dplls array */
 	enum intel_dpll_id id;
 	struct intel_dpll_hw_state hw_state;
+	void (*mode_set)(struct drm_i915_private *dev_priv,
+			 struct intel_shared_dpll *pll);
 	void (*enable)(struct drm_i915_private *dev_priv,
 		       struct intel_shared_dpll *pll);
 	void (*disable)(struct drm_i915_private *dev_priv,
@@ -198,7 +201,6 @@
 #define DRIVER_MINOR		6
 #define DRIVER_PATCHLEVEL	0
 
-#define WATCH_COHERENCY	0
 #define WATCH_LISTS	0
 #define WATCH_GTT	0
 
@@ -320,8 +322,8 @@
 		u32 purgeable:1;
 		s32 ring:4;
 		u32 cache_level:2;
-	} *active_bo, *pinned_bo;
-	u32 active_bo_count, pinned_bo_count;
+	} **active_bo, **pinned_bo;
+	u32 *active_bo_count, *pinned_bo_count;
 	struct intel_overlay_error_state *overlay;
 	struct intel_display_error_state *display;
 };
@@ -356,14 +358,16 @@
 			  struct dpll *match_clock,
 			  struct dpll *best_clock);
 	void (*update_wm)(struct drm_device *dev);
-	void (*update_sprite_wm)(struct drm_device *dev, int pipe,
+	void (*update_sprite_wm)(struct drm_plane *plane,
+				 struct drm_crtc *crtc,
 				 uint32_t sprite_width, int pixel_size,
-				 bool enable);
+				 bool enable, bool scaled);
 	void (*modeset_global_resources)(struct drm_device *dev);
 	/* Returns the active state of the crtc, and if the crtc is active,
 	 * fills out the pipe-config with the hw state. */
 	bool (*get_pipe_config)(struct intel_crtc *,
 				struct intel_crtc_config *);
+	void (*get_clock)(struct intel_crtc *, struct intel_crtc_config *);
 	int (*crtc_mode_set)(struct drm_crtc *crtc,
 			     int x, int y,
 			     struct drm_framebuffer *old_fb);
@@ -376,7 +380,8 @@
 	void (*init_clock_gating)(struct drm_device *dev);
 	int (*queue_flip)(struct drm_device *dev, struct drm_crtc *crtc,
 			  struct drm_framebuffer *fb,
-			  struct drm_i915_gem_object *obj);
+			  struct drm_i915_gem_object *obj,
+			  uint32_t flags);
 	int (*update_plane)(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 			    int x, int y);
 	void (*hpd_irq_setup)(struct drm_device *dev);
@@ -387,11 +392,20 @@
 	/* pll clock increase/decrease */
 };
 
-struct drm_i915_gt_funcs {
+struct intel_uncore_funcs {
 	void (*force_wake_get)(struct drm_i915_private *dev_priv);
 	void (*force_wake_put)(struct drm_i915_private *dev_priv);
 };
 
+struct intel_uncore {
+	spinlock_t lock; /** lock is also taken in irq contexts. */
+
+	struct intel_uncore_funcs funcs;
+
+	unsigned fifo_count;
+	unsigned forcewake_count;
+};
+
 #define DEV_INFO_FOR_EACH_FLAG(func, sep) \
 	func(is_mobile) sep \
 	func(is_i85x) sep \
@@ -436,12 +450,64 @@
 
 enum i915_cache_level {
 	I915_CACHE_NONE = 0,
-	I915_CACHE_LLC,
-	I915_CACHE_LLC_MLC, /* gen6+, in docs at least! */
+	I915_CACHE_LLC, /* also used for snoopable memory on non-LLC */
+	I915_CACHE_L3_LLC, /* gen7+, L3 sits between the domain specifc
+			      caches, eg sampler/render caches, and the
+			      large Last-Level-Cache. LLC is coherent with
+			      the CPU, but L3 is only visible to the GPU. */
+	I915_CACHE_WT, /* hsw:gt3e WriteThrough for scanouts */
 };
 
 typedef uint32_t gen6_gtt_pte_t;
 
+struct i915_address_space {
+	struct drm_mm mm;
+	struct drm_device *dev;
+	struct list_head global_link;
+	unsigned long start;		/* Start offset always 0 for dri2 */
+	size_t total;		/* size addr space maps (ex. 2GB for ggtt) */
+
+	struct {
+		dma_addr_t addr;
+		struct page *page;
+	} scratch;
+
+	/**
+	 * List of objects currently involved in rendering.
+	 *
+	 * Includes buffers having the contents of their GPU caches
+	 * flushed, not necessarily primitives.  last_rendering_seqno
+	 * represents when the rendering involved will be completed.
+	 *
+	 * A reference is held on the buffer while on this list.
+	 */
+	struct list_head active_list;
+
+	/**
+	 * LRU list of objects which are not in the ringbuffer and
+	 * are ready to unbind, but are still in the GTT.
+	 *
+	 * last_rendering_seqno is 0 while an object is in this list.
+	 *
+	 * A reference is not held on the buffer while on this list,
+	 * as merely being GTT-bound shouldn't prevent its being
+	 * freed, and we'll pull it off the list in the free path.
+	 */
+	struct list_head inactive_list;
+
+	/* FIXME: Need a more generic return type */
+	gen6_gtt_pte_t (*pte_encode)(dma_addr_t addr,
+				     enum i915_cache_level level);
+	void (*clear_range)(struct i915_address_space *vm,
+			    unsigned int first_entry,
+			    unsigned int num_entries);
+	void (*insert_entries)(struct i915_address_space *vm,
+			       struct sg_table *st,
+			       unsigned int first_entry,
+			       enum i915_cache_level cache_level);
+	void (*cleanup)(struct i915_address_space *vm);
+};
+
 /* The Graphics Translation Table is the way in which GEN hardware translates a
  * Graphics Virtual Address into a Physical Address. In addition to the normal
  * collateral associated with any va->pa translations GEN hardware also has a
@@ -450,8 +516,7 @@
  * the spec.
  */
 struct i915_gtt {
-	unsigned long start;		/* Start offset of used GTT */
-	size_t total;			/* Total size GTT can map */
+	struct i915_address_space base;
 	size_t stolen_size;		/* Total size of stolen memory */
 
 	unsigned long mappable_end;	/* End offset that we can CPU map */
@@ -462,50 +527,47 @@
 	void __iomem *gsm;
 
 	bool do_idle_maps;
-	dma_addr_t scratch_page_dma;
-	struct page *scratch_page;
+
+	int mtrr;
 
 	/* global gtt ops */
 	int (*gtt_probe)(struct drm_device *dev, size_t *gtt_total,
 			  size_t *stolen, phys_addr_t *mappable_base,
 			  unsigned long *mappable_end);
-	void (*gtt_remove)(struct drm_device *dev);
-	void (*gtt_clear_range)(struct drm_device *dev,
-				unsigned int first_entry,
-				unsigned int num_entries);
-	void (*gtt_insert_entries)(struct drm_device *dev,
-				   struct sg_table *st,
-				   unsigned int pg_start,
-				   enum i915_cache_level cache_level);
-	gen6_gtt_pte_t (*pte_encode)(struct drm_device *dev,
-				     dma_addr_t addr,
-				     enum i915_cache_level level);
 };
-#define gtt_total_entries(gtt) ((gtt).total >> PAGE_SHIFT)
+#define gtt_total_entries(gtt) ((gtt).base.total >> PAGE_SHIFT)
 
-#define I915_PPGTT_PD_ENTRIES 512
-#define I915_PPGTT_PT_ENTRIES 1024
 struct i915_hw_ppgtt {
-	struct drm_device *dev;
+	struct i915_address_space base;
 	unsigned num_pd_entries;
 	struct page **pt_pages;
 	uint32_t pd_offset;
 	dma_addr_t *pt_dma_addr;
-	dma_addr_t scratch_page_dma_addr;
 
-	/* pte functions, mirroring the interface of the global gtt. */
-	void (*clear_range)(struct i915_hw_ppgtt *ppgtt,
-			    unsigned int first_entry,
-			    unsigned int num_entries);
-	void (*insert_entries)(struct i915_hw_ppgtt *ppgtt,
-			       struct sg_table *st,
-			       unsigned int pg_start,
-			       enum i915_cache_level cache_level);
-	gen6_gtt_pte_t (*pte_encode)(struct drm_device *dev,
-				     dma_addr_t addr,
-				     enum i915_cache_level level);
 	int (*enable)(struct drm_device *dev);
-	void (*cleanup)(struct i915_hw_ppgtt *ppgtt);
+};
+
+/**
+ * A VMA represents a GEM BO that is bound into an address space. Therefore, a
+ * VMA's presence cannot be guaranteed before binding, or after unbinding the
+ * object into/from the address space.
+ *
+ * To make things as simple as possible (ie. no refcounting), a VMA's lifetime
+ * will always be <= an objects lifetime. So object refcounting should cover us.
+ */
+struct i915_vma {
+	struct drm_mm_node node;
+	struct drm_i915_gem_object *obj;
+	struct i915_address_space *vm;
+
+	/** This object's place on the active/inactive lists */
+	struct list_head mm_list;
+
+	struct list_head vma_link; /* Link in the object's VMA list */
+
+	/** This vma's place in the batchbuffer or on the eviction list */
+	struct list_head exec_list;
+
 };
 
 struct i915_ctx_hang_stats {
@@ -528,15 +590,48 @@
 	struct i915_ctx_hang_stats hang_stats;
 };
 
-enum no_fbc_reason {
-	FBC_NO_OUTPUT, /* no outputs enabled to compress */
-	FBC_STOLEN_TOO_SMALL, /* not enough space to hold compressed buffers */
-	FBC_UNSUPPORTED_MODE, /* interlace or doublescanned mode */
-	FBC_MODE_TOO_LARGE, /* mode too large for compression */
-	FBC_BAD_PLANE, /* fbc not supported on plane */
-	FBC_NOT_TILED, /* buffer not tiled */
-	FBC_MULTIPLE_PIPES, /* more than one pipe active */
-	FBC_MODULE_PARAM,
+struct i915_fbc {
+	unsigned long size;
+	unsigned int fb_id;
+	enum plane plane;
+	int y;
+
+	struct drm_mm_node *compressed_fb;
+	struct drm_mm_node *compressed_llb;
+
+	struct intel_fbc_work {
+		struct delayed_work work;
+		struct drm_crtc *crtc;
+		struct drm_framebuffer *fb;
+		int interval;
+	} *fbc_work;
+
+	enum no_fbc_reason {
+		FBC_OK, /* FBC is enabled */
+		FBC_UNSUPPORTED, /* FBC is not supported by this chipset */
+		FBC_NO_OUTPUT, /* no outputs enabled to compress */
+		FBC_STOLEN_TOO_SMALL, /* not enough space for buffers */
+		FBC_UNSUPPORTED_MODE, /* interlace or doublescanned mode */
+		FBC_MODE_TOO_LARGE, /* mode too large for compression */
+		FBC_BAD_PLANE, /* fbc not supported on plane */
+		FBC_NOT_TILED, /* buffer not tiled */
+		FBC_MULTIPLE_PIPES, /* more than one pipe active */
+		FBC_MODULE_PARAM,
+		FBC_CHIP_DEFAULT, /* disabled by default on this chip */
+	} no_fbc_reason;
+};
+
+enum no_psr_reason {
+	PSR_NO_SOURCE, /* Not supported on platform */
+	PSR_NO_SINK, /* Not supported by panel */
+	PSR_MODULE_PARAM,
+	PSR_CRTC_NOT_ACTIVE,
+	PSR_PWR_WELL_ENABLED,
+	PSR_NOT_TILED,
+	PSR_SPRITE_ENABLED,
+	PSR_S3D_ENABLED,
+	PSR_INTERLACED_ENABLED,
+	PSR_HSW_NOT_DDIA,
 };
 
 enum intel_pch {
@@ -555,6 +650,7 @@
 #define QUIRK_PIPEA_FORCE (1<<0)
 #define QUIRK_LVDS_SSC_DISABLE (1<<1)
 #define QUIRK_INVERT_BRIGHTNESS (1<<2)
+#define QUIRK_NO_PCH_PWM_ENABLE (1<<3)
 
 struct intel_fbdev;
 struct intel_fbc_work;
@@ -721,12 +817,12 @@
 };
 
 struct intel_gen6_power_mgmt {
+	/* work and pm_iir are protected by dev_priv->irq_lock */
 	struct work_struct work;
-	struct delayed_work vlv_work;
 	u32 pm_iir;
-	/* lock - irqsave spinlock that protectects the work_struct and
-	 * pm_iir. */
-	spinlock_t lock;
+
+	/* On vlv we need to manually drop to Vmin with a delayed work. */
+	struct delayed_work vlv_work;
 
 	/* The below variables an all the rps hw state are protected by
 	 * dev->struct mutext. */
@@ -792,6 +888,18 @@
 	uint32_t counter;
 };
 
+struct i915_ums_state {
+	/**
+	 * Flag if the X Server, and thus DRM, is not currently in
+	 * control of the device.
+	 *
+	 * This is set between LeaveVT and EnterVT.  It needs to be
+	 * replaced with a semaphore.  It also needs to be
+	 * transitioned away from for kernel modesetting.
+	 */
+	int mm_suspended;
+};
+
 struct intel_l3_parity {
 	u32 *remap_info;
 	struct work_struct error_work;
@@ -800,8 +908,6 @@
 struct i915_gem_mm {
 	/** Memory allocator for GTT stolen memory */
 	struct drm_mm stolen;
-	/** Memory allocator for GTT */
-	struct drm_mm gtt_space;
 	/** List of all objects in gtt_space. Used to restore gtt
 	 * mappings on resume */
 	struct list_head bound_list;
@@ -815,37 +921,12 @@
 	/** Usable portion of the GTT for GEM */
 	unsigned long stolen_base; /* limited to low memory (32-bit) */
 
-	int gtt_mtrr;
-
 	/** PPGTT used for aliasing the PPGTT with the GTT */
 	struct i915_hw_ppgtt *aliasing_ppgtt;
 
 	struct shrinker inactive_shrinker;
 	bool shrinker_no_lock_stealing;
 
-	/**
-	 * List of objects currently involved in rendering.
-	 *
-	 * Includes buffers having the contents of their GPU caches
-	 * flushed, not necessarily primitives.  last_rendering_seqno
-	 * represents when the rendering involved will be completed.
-	 *
-	 * A reference is held on the buffer while on this list.
-	 */
-	struct list_head active_list;
-
-	/**
-	 * LRU list of objects which are not in the ringbuffer and
-	 * are ready to unbind, but are still in the GTT.
-	 *
-	 * last_rendering_seqno is 0 while an object is in this list.
-	 *
-	 * A reference is not held on the buffer while on this list,
-	 * as merely being GTT-bound shouldn't prevent its being
-	 * freed, and we'll pull it off the list in the free path.
-	 */
-	struct list_head inactive_list;
-
 	/** LRU list of objects with fence regs on them. */
 	struct list_head fence_list;
 
@@ -864,16 +945,6 @@
 	 */
 	bool interruptible;
 
-	/**
-	 * Flag if the X Server, and thus DRM, is not currently in
-	 * control of the device.
-	 *
-	 * This is set between LeaveVT and EnterVT.  It needs to be
-	 * replaced with a semaphore.  It also needs to be
-	 * transitioned away from for kernel modesetting.
-	 */
-	int suspended;
-
 	/** Bit 6 swizzling required for X tiling */
 	uint32_t bit_6_swizzle_x;
 	/** Bit 6 swizzling required for Y tiling */
@@ -883,6 +954,7 @@
 	struct drm_i915_gem_phys_object *phys_objs[I915_MAX_PHYS_OBJECT];
 
 	/* accounting, useful for userland debugging */
+	spinlock_t object_stat_lock;
 	size_t object_memory;
 	u32 object_count;
 };
@@ -896,6 +968,11 @@
 	loff_t pos;
 };
 
+struct i915_error_state_file_priv {
+	struct drm_device *dev;
+	struct drm_i915_error_state *error;
+};
+
 struct i915_gpu_error {
 	/* For hangcheck timer */
 #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
@@ -987,6 +1064,88 @@
 	struct child_device_config *child_dev;
 };
 
+enum intel_ddb_partitioning {
+	INTEL_DDB_PART_1_2,
+	INTEL_DDB_PART_5_6, /* IVB+ */
+};
+
+struct intel_wm_level {
+	bool enable;
+	uint32_t pri_val;
+	uint32_t spr_val;
+	uint32_t cur_val;
+	uint32_t fbc_val;
+};
+
+/*
+ * This struct tracks the state needed for the Package C8+ feature.
+ *
+ * Package states C8 and deeper are really deep PC states that can only be
+ * reached when all the devices on the system allow it, so even if the graphics
+ * device allows PC8+, it doesn't mean the system will actually get to these
+ * states.
+ *
+ * Our driver only allows PC8+ when all the outputs are disabled, the power well
+ * is disabled and the GPU is idle. When these conditions are met, we manually
+ * do the other conditions: disable the interrupts, clocks and switch LCPLL
+ * refclk to Fclk.
+ *
+ * When we really reach PC8 or deeper states (not just when we allow it) we lose
+ * the state of some registers, so when we come back from PC8+ we need to
+ * restore this state. We don't get into PC8+ if we're not in RC6, so we don't
+ * need to take care of the registers kept by RC6.
+ *
+ * The interrupt disabling is part of the requirements. We can only leave the
+ * PCH HPD interrupts enabled. If we're in PC8+ and we get another interrupt we
+ * can lock the machine.
+ *
+ * Ideally every piece of our code that needs PC8+ disabled would call
+ * hsw_disable_package_c8, which would increment disable_count and prevent the
+ * system from reaching PC8+. But we don't have a symmetric way to do this for
+ * everything, so we have the requirements_met and gpu_idle variables. When we
+ * switch requirements_met or gpu_idle to true we decrease disable_count, and
+ * increase it in the opposite case. The requirements_met variable is true when
+ * all the CRTCs, encoders and the power well are disabled. The gpu_idle
+ * variable is true when the GPU is idle.
+ *
+ * In addition to everything, we only actually enable PC8+ if disable_count
+ * stays at zero for at least some seconds. This is implemented with the
+ * enable_work variable. We do this so we don't enable/disable PC8 dozens of
+ * consecutive times when all screens are disabled and some background app
+ * queries the state of our connectors, or we have some application constantly
+ * waking up to use the GPU. Only after the enable_work function actually
+ * enables PC8+ the "enable" variable will become true, which means that it can
+ * be false even if disable_count is 0.
+ *
+ * The irqs_disabled variable becomes true exactly after we disable the IRQs and
+ * goes back to false exactly before we reenable the IRQs. We use this variable
+ * to check if someone is trying to enable/disable IRQs while they're supposed
+ * to be disabled. This shouldn't happen and we'll print some error messages in
+ * case it happens, but if it actually happens we'll also update the variables
+ * inside struct regsave so when we restore the IRQs they will contain the
+ * latest expected values.
+ *
+ * For more, read "Display Sequences for Package C8" on our documentation.
+ */
+struct i915_package_c8 {
+	bool requirements_met;
+	bool gpu_idle;
+	bool irqs_disabled;
+	/* Only true after the delayed work task actually enables it. */
+	bool enabled;
+	int disable_count;
+	struct mutex lock;
+	struct delayed_work enable_work;
+
+	struct {
+		uint32_t deimr;
+		uint32_t sdeimr;
+		uint32_t gtimr;
+		uint32_t gtier;
+		uint32_t gen6_pmimr;
+	} regsave;
+};
+
 typedef struct drm_i915_private {
 	struct drm_device *dev;
 	struct kmem_cache *slab;
@@ -997,14 +1156,7 @@
 
 	void __iomem *regs;
 
-	struct drm_i915_gt_funcs gt;
-	/** gt_fifo_count and the subsequent register write are synchronized
-	 * with dev->struct_mutex. */
-	unsigned gt_fifo_count;
-	/** forcewake_count is protected by gt_lock */
-	unsigned forcewake_count;
-	/** gt_lock is also taken in irq contexts. */
-	spinlock_t gt_lock;
+	struct intel_uncore uncore;
 
 	struct intel_gmbus gmbus[GMBUS_NUM_PORTS];
 
@@ -1041,6 +1193,7 @@
 	/** Cached value of IMR to avoid reads in updating the bitfield */
 	u32 irq_mask;
 	u32 gt_irq_mask;
+	u32 pm_irq_mask;
 
 	struct work_struct hotplug_work;
 	bool enable_hotplug_processing;
@@ -1058,12 +1211,7 @@
 
 	int num_plane;
 
-	unsigned long cfb_size;
-	unsigned int cfb_fb;
-	enum plane cfb_plane;
-	int cfb_y;
-	struct intel_fbc_work *fbc_work;
-
+	struct i915_fbc fbc;
 	struct intel_opregion opregion;
 	struct intel_vbt_data vbt;
 
@@ -1080,8 +1228,6 @@
 	} backlight;
 
 	/* LVDS info */
-	struct drm_display_mode *lfp_lvds_vbt_mode; /* if any */
-	struct drm_display_mode *sdvo_lvds_vbt_mode; /* if any */
 	bool no_aux_handshake;
 
 	struct drm_i915_fence_reg fence_regs[I915_MAX_NUM_FENCES]; /* assume 965 */
@@ -1104,7 +1250,8 @@
 	enum modeset_restore modeset_restore;
 	struct mutex modeset_restore_lock;
 
-	struct i915_gtt gtt;
+	struct list_head vm_list; /* Global list of all address spaces */
+	struct i915_gtt gtt; /* VMA representing the global address space */
 
 	struct i915_gem_mm mm;
 
@@ -1131,6 +1278,9 @@
 
 	struct intel_l3_parity l3_parity;
 
+	/* Cannot be determined by PCIID. You must always read a register. */
+	size_t ellc_size;
+
 	/* gen6+ rps state */
 	struct intel_gen6_power_mgmt rps;
 
@@ -1141,10 +1291,7 @@
 	/* Haswell power well */
 	struct i915_power_well power_well;
 
-	enum no_fbc_reason no_fbc_reason;
-
-	struct drm_mm_node *compressed_fb;
-	struct drm_mm_node *compressed_llb;
+	enum no_psr_reason no_psr_reason;
 
 	struct i915_gpu_error gpu_error;
 
@@ -1169,11 +1316,34 @@
 
 	struct i915_suspend_saved_registers regfile;
 
+	struct {
+		/*
+		 * Raw watermark latency values:
+		 * in 0.1us units for WM0,
+		 * in 0.5us units for WM1+.
+		 */
+		/* primary */
+		uint16_t pri_latency[5];
+		/* sprite */
+		uint16_t spr_latency[5];
+		/* cursor */
+		uint16_t cur_latency[5];
+	} wm;
+
+	struct i915_package_c8 pc8;
+
 	/* Old dri1 support infrastructure, beware the dragons ya fools entering
 	 * here! */
 	struct i915_dri1_state dri1;
+	/* Old ums support infrastructure, same warning applies. */
+	struct i915_ums_state ums;
 } drm_i915_private_t;
 
+static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
+{
+	return dev->dev_private;
+}
+
 /* Iterate over initialised rings */
 #define for_each_ring(ring__, dev_priv__, i__) \
 	for ((i__) = 0; (i__) < I915_NUM_RINGS; (i__)++) \
@@ -1186,7 +1356,7 @@
 	HDMI_AUDIO_ON,			/* force turn on HDMI audio */
 };
 
-#define I915_GTT_RESERVED ((struct drm_mm_node *)0x1)
+#define I915_GTT_OFFSET_NONE ((u32)-1)
 
 struct drm_i915_gem_object_ops {
 	/* Interface between the GEM object and its backing storage.
@@ -1211,15 +1381,16 @@
 
 	const struct drm_i915_gem_object_ops *ops;
 
-	/** Current space allocated to this object in the GTT, if any. */
-	struct drm_mm_node *gtt_space;
+	/** List of VMAs backed by this object */
+	struct list_head vma_list;
+
 	/** Stolen memory for this object, instead of being backed by shmem. */
 	struct drm_mm_node *stolen;
 	struct list_head global_list;
 
-	/** This object's place on the active/inactive lists */
 	struct list_head ring_list;
-	struct list_head mm_list;
+	/** Used in execbuf to temporarily hold a ref */
+	struct list_head obj_exec_link;
 	/** This object's place in the batchbuffer or on the eviction list */
 	struct list_head exec_list;
 
@@ -1286,6 +1457,7 @@
 	 */
 	unsigned int fault_mappable:1;
 	unsigned int pin_mappable:1;
+	unsigned int pin_display:1;
 
 	/*
 	 * Is the GPU currently using a fence to access this buffer,
@@ -1293,7 +1465,7 @@
 	unsigned int pending_fenced_gpu_access:1;
 	unsigned int fenced_gpu_access:1;
 
-	unsigned int cache_level:2;
+	unsigned int cache_level:3;
 
 	unsigned int has_aliasing_ppgtt_mapping:1;
 	unsigned int has_global_gtt_mapping:1;
@@ -1313,13 +1485,6 @@
 	unsigned long exec_handle;
 	struct drm_i915_gem_exec_object2 *exec_entry;
 
-	/**
-	 * Current offset of the object in GTT space.
-	 *
-	 * This is the same as gtt_space->start
-	 */
-	uint32_t gtt_offset;
-
 	struct intel_ring_buffer *ring;
 
 	/** Breadcrumb of last rendering to the buffer. */
@@ -1395,7 +1560,7 @@
 	struct i915_ctx_hang_stats hang_stats;
 };
 
-#define INTEL_INFO(dev)	(((struct drm_i915_private *) (dev)->dev_private)->info)
+#define INTEL_INFO(dev)	(to_i915(dev)->info)
 
 #define IS_I830(dev)		((dev)->pci_device == 0x3577)
 #define IS_845G(dev)		((dev)->pci_device == 0x2562)
@@ -1413,7 +1578,6 @@
 #define IS_PINEVIEW_M(dev)	((dev)->pci_device == 0xa011)
 #define IS_PINEVIEW(dev)	(INTEL_INFO(dev)->is_pineview)
 #define IS_G33(dev)		(INTEL_INFO(dev)->is_g33)
-#define IS_IRONLAKE_D(dev)	((dev)->pci_device == 0x0042)
 #define IS_IRONLAKE_M(dev)	((dev)->pci_device == 0x0046)
 #define IS_IVYBRIDGE(dev)	(INTEL_INFO(dev)->is_ivybridge)
 #define IS_IVB_GT1(dev)		((dev)->pci_device == 0x0156 || \
@@ -1425,6 +1589,8 @@
 #define IS_VALLEYVIEW(dev)	(INTEL_INFO(dev)->is_valleyview)
 #define IS_HASWELL(dev)	(INTEL_INFO(dev)->is_haswell)
 #define IS_MOBILE(dev)		(INTEL_INFO(dev)->is_mobile)
+#define IS_HSW_EARLY_SDV(dev)	(IS_HASWELL(dev) && \
+				 ((dev)->pci_device & 0xFF00) == 0x0C00)
 #define IS_ULT(dev)		(IS_HASWELL(dev) && \
 				 ((dev)->pci_device & 0xFF00) == 0x0A00)
 
@@ -1445,6 +1611,7 @@
 #define HAS_BLT(dev)            (INTEL_INFO(dev)->has_blt_ring)
 #define HAS_VEBOX(dev)          (INTEL_INFO(dev)->has_vebox_ring)
 #define HAS_LLC(dev)            (INTEL_INFO(dev)->has_llc)
+#define HAS_WT(dev)            (IS_HASWELL(dev) && to_i915(dev)->ellc_size)
 #define I915_NEED_GFX_HWS(dev)	(INTEL_INFO(dev)->need_gfx_hws)
 
 #define HAS_HW_CONTEXTS(dev)	(INTEL_INFO(dev)->gen >= 6)
@@ -1467,8 +1634,6 @@
 #define SUPPORTS_EDP(dev)		(IS_IRONLAKE_M(dev))
 #define SUPPORTS_TV(dev)		(INTEL_INFO(dev)->supports_tv)
 #define I915_HAS_HOTPLUG(dev)		 (INTEL_INFO(dev)->has_hotplug)
-/* dsparb controlled by hw only */
-#define DSPARB_HWCONTROL(dev) (IS_G4X(dev) || IS_IRONLAKE(dev))
 
 #define HAS_FW_BLC(dev) (INTEL_INFO(dev)->gen > 2)
 #define HAS_PIPE_CXSR(dev) (INTEL_INFO(dev)->has_pipe_cxsr)
@@ -1476,8 +1641,6 @@
 
 #define HAS_IPS(dev)		(IS_ULT(dev))
 
-#define HAS_PIPE_CONTROL(dev) (INTEL_INFO(dev)->gen >= 5)
-
 #define HAS_DDI(dev)		(INTEL_INFO(dev)->has_ddi)
 #define HAS_POWER_WELL(dev)	(IS_HASWELL(dev))
 #define HAS_FPGA_DBG_UNCLAIMED(dev)	(INTEL_INFO(dev)->has_fpga_dbg)
@@ -1489,7 +1652,7 @@
 #define INTEL_PCH_LPT_DEVICE_ID_TYPE		0x8c00
 #define INTEL_PCH_LPT_LP_DEVICE_ID_TYPE		0x9c00
 
-#define INTEL_PCH_TYPE(dev) (((struct drm_i915_private *)(dev)->dev_private)->pch_type)
+#define INTEL_PCH_TYPE(dev) (to_i915(dev)->pch_type)
 #define HAS_PCH_LPT(dev) (INTEL_PCH_TYPE(dev) == PCH_LPT)
 #define HAS_PCH_CPT(dev) (INTEL_PCH_TYPE(dev) == PCH_CPT)
 #define HAS_PCH_IBX(dev) (INTEL_PCH_TYPE(dev) == PCH_IBX)
@@ -1525,7 +1688,7 @@
 #define INTEL_RC6p_ENABLE			(1<<1)
 #define INTEL_RC6pp_ENABLE			(1<<2)
 
-extern struct drm_ioctl_desc i915_ioctls[];
+extern const struct drm_ioctl_desc i915_ioctls[];
 extern int i915_max_ioctl;
 extern unsigned int i915_fbpercrtc __always_unused;
 extern int i915_panel_ignore_lid __read_mostly;
@@ -1539,9 +1702,14 @@
 extern int i915_enable_fbc __read_mostly;
 extern bool i915_enable_hangcheck __read_mostly;
 extern int i915_enable_ppgtt __read_mostly;
+extern int i915_enable_psr __read_mostly;
 extern unsigned int i915_preliminary_hw_support __read_mostly;
 extern int i915_disable_power_well __read_mostly;
 extern int i915_enable_ips __read_mostly;
+extern bool i915_fastboot __read_mostly;
+extern int i915_enable_pc8 __read_mostly;
+extern int i915_pc8_timeout __read_mostly;
+extern bool i915_prefault_disable __read_mostly;
 
 extern int i915_suspend(struct drm_device *dev, pm_message_t state);
 extern int i915_resume(struct drm_device *dev);
@@ -1577,15 +1745,19 @@
 extern void intel_console_resume(struct work_struct *work);
 
 /* i915_irq.c */
-void i915_hangcheck_elapsed(unsigned long data);
+void i915_queue_hangcheck(struct drm_device *dev);
 void i915_handle_error(struct drm_device *dev, bool wedged);
 
 extern void intel_irq_init(struct drm_device *dev);
+extern void intel_pm_init(struct drm_device *dev);
 extern void intel_hpd_init(struct drm_device *dev);
-extern void intel_gt_init(struct drm_device *dev);
-extern void intel_gt_reset(struct drm_device *dev);
+extern void intel_pm_init(struct drm_device *dev);
 
-void i915_error_state_free(struct kref *error_ref);
+extern void intel_uncore_sanitize(struct drm_device *dev);
+extern void intel_uncore_early_sanitize(struct drm_device *dev);
+extern void intel_uncore_init(struct drm_device *dev);
+extern void intel_uncore_clear_errors(struct drm_device *dev);
+extern void intel_uncore_check_errors(struct drm_device *dev);
 
 void
 i915_enable_pipestat(drm_i915_private_t *dev_priv, int pipe, u32 mask);
@@ -1593,13 +1765,6 @@
 void
 i915_disable_pipestat(drm_i915_private_t *dev_priv, int pipe, u32 mask);
 
-#ifdef CONFIG_DEBUG_FS
-extern void i915_destroy_error_state(struct drm_device *dev);
-#else
-#define i915_destroy_error_state(x)
-#endif
-
-
 /* i915_gem.c */
 int i915_gem_init_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
@@ -1656,13 +1821,18 @@
 struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 						  size_t size);
 void i915_gem_free_object(struct drm_gem_object *obj);
+struct i915_vma *i915_gem_vma_create(struct drm_i915_gem_object *obj,
+				     struct i915_address_space *vm);
+void i915_gem_vma_destroy(struct i915_vma *vma);
 
 int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
+				     struct i915_address_space *vm,
 				     uint32_t alignment,
 				     bool map_and_fenceable,
 				     bool nonblocking);
 void i915_gem_object_unpin(struct drm_i915_gem_object *obj);
-int __must_check i915_gem_object_unbind(struct drm_i915_gem_object *obj);
+int __must_check i915_vma_unbind(struct i915_vma *vma);
+int __must_check i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj);
 int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
 void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
 void i915_gem_lastclose(struct drm_device *dev);
@@ -1699,8 +1869,6 @@
 			 struct drm_mode_create_dumb *args);
 int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev,
 		      uint32_t handle, uint64_t *offset);
-int i915_gem_dumb_destroy(struct drm_file *file_priv, struct drm_device *dev,
-			  uint32_t handle);
 /**
  * Returns true if seq1 is later than seq2.
  */
@@ -1752,10 +1920,7 @@
 }
 
 void i915_gem_reset(struct drm_device *dev);
-void i915_gem_clflush_object(struct drm_i915_gem_object *obj);
-int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj,
-					    uint32_t read_domains,
-					    uint32_t write_domain);
+bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
 int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_init(struct drm_device *dev);
 int __must_check i915_gem_init_hw(struct drm_device *dev);
@@ -1782,6 +1947,7 @@
 i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 				     u32 alignment,
 				     struct intel_ring_buffer *pipelined);
+void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj);
 int i915_gem_attach_phys_object(struct drm_device *dev,
 				struct drm_i915_gem_object *obj,
 				int id,
@@ -1808,6 +1974,56 @@
 
 void i915_gem_restore_fences(struct drm_device *dev);
 
+unsigned long i915_gem_obj_offset(struct drm_i915_gem_object *o,
+				  struct i915_address_space *vm);
+bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o);
+bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
+			struct i915_address_space *vm);
+unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
+				struct i915_address_space *vm);
+struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
+				     struct i915_address_space *vm);
+struct i915_vma *
+i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
+				  struct i915_address_space *vm);
+/* Some GGTT VM helpers */
+#define obj_to_ggtt(obj) \
+	(&((struct drm_i915_private *)(obj)->base.dev->dev_private)->gtt.base)
+static inline bool i915_is_ggtt(struct i915_address_space *vm)
+{
+	struct i915_address_space *ggtt =
+		&((struct drm_i915_private *)(vm)->dev->dev_private)->gtt.base;
+	return vm == ggtt;
+}
+
+static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj)
+{
+	return i915_gem_obj_bound(obj, obj_to_ggtt(obj));
+}
+
+static inline unsigned long
+i915_gem_obj_ggtt_offset(struct drm_i915_gem_object *obj)
+{
+	return i915_gem_obj_offset(obj, obj_to_ggtt(obj));
+}
+
+static inline unsigned long
+i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj)
+{
+	return i915_gem_obj_size(obj, obj_to_ggtt(obj));
+}
+
+static inline int __must_check
+i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
+		      uint32_t alignment,
+		      bool map_and_fenceable,
+		      bool nonblocking)
+{
+	return i915_gem_object_pin(obj, obj_to_ggtt(obj), alignment,
+				   map_and_fenceable, nonblocking);
+}
+#undef obj_to_ggtt
+
 /* i915_gem_context.c */
 void i915_gem_context_init(struct drm_device *dev);
 void i915_gem_context_fini(struct drm_device *dev);
@@ -1826,7 +2042,7 @@
 }
 
 struct i915_ctx_hang_stats * __must_check
-i915_gem_context_get_hang_stats(struct intel_ring_buffer *ring,
+i915_gem_context_get_hang_stats(struct drm_device *dev,
 				struct drm_file *file,
 				u32 id);
 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
@@ -1860,7 +2076,9 @@
 
 
 /* i915_gem_evict.c */
-int __must_check i915_gem_evict_something(struct drm_device *dev, int min_size,
+int __must_check i915_gem_evict_something(struct drm_device *dev,
+					  struct i915_address_space *vm,
+					  int min_size,
 					  unsigned alignment,
 					  unsigned cache_level,
 					  bool mappable,
@@ -1882,7 +2100,7 @@
 void i915_gem_object_release_stolen(struct drm_i915_gem_object *obj);
 
 /* i915_gem_tiling.c */
-inline static bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
+static inline bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
 {
 	drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
 
@@ -1895,23 +2113,36 @@
 void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj);
 
 /* i915_gem_debug.c */
-void i915_gem_dump_object(struct drm_i915_gem_object *obj, int len,
-			  const char *where, uint32_t mark);
 #if WATCH_LISTS
 int i915_verify_lists(struct drm_device *dev);
 #else
 #define i915_verify_lists(dev) 0
 #endif
-void i915_gem_object_check_coherency(struct drm_i915_gem_object *obj,
-				     int handle);
-void i915_gem_dump_object(struct drm_i915_gem_object *obj, int len,
-			  const char *where, uint32_t mark);
 
 /* i915_debugfs.c */
 int i915_debugfs_init(struct drm_minor *minor);
 void i915_debugfs_cleanup(struct drm_minor *minor);
+
+/* i915_gpu_error.c */
 __printf(2, 3)
 void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...);
+int i915_error_state_to_str(struct drm_i915_error_state_buf *estr,
+			    const struct i915_error_state_file_priv *error);
+int i915_error_state_buf_init(struct drm_i915_error_state_buf *eb,
+			      size_t count, loff_t pos);
+static inline void i915_error_state_buf_release(
+	struct drm_i915_error_state_buf *eb)
+{
+	kfree(eb->buf);
+}
+void i915_capture_error_state(struct drm_device *dev);
+void i915_error_state_get(struct drm_device *dev,
+			  struct i915_error_state_file_priv *error_priv);
+void i915_error_state_put(struct i915_error_state_file_priv *error_priv);
+void i915_destroy_error_state(struct drm_device *dev);
+
+void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone);
+const char *i915_cache_level_str(int type);
 
 /* i915_suspend.c */
 extern int i915_save_state(struct drm_device *dev);
@@ -1991,7 +2222,6 @@
 			struct drm_file *file);
 
 /* overlay */
-#ifdef CONFIG_DEBUG_FS
 extern struct intel_overlay_error_state *intel_overlay_capture_error_state(struct drm_device *dev);
 extern void intel_overlay_print_error_state(struct drm_i915_error_state_buf *e,
 					    struct intel_overlay_error_state *error);
@@ -2000,7 +2230,6 @@
 extern void intel_display_print_error_state(struct drm_i915_error_state_buf *e,
 					    struct drm_device *dev,
 					    struct intel_display_error_state *error);
-#endif
 
 /* On SNB platform, before reading ring registers forcewake bit
  * must be set to prevent GT core from power down and stale values being
@@ -2008,7 +2237,6 @@
  */
 void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
 void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
-int __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
 
 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val);
 int sandybridge_pcode_write(struct drm_i915_private *dev_priv, u8 mbox, u32 val);
@@ -2027,39 +2255,37 @@
 int vlv_gpu_freq(int ddr_freq, int val);
 int vlv_freq_opcode(int ddr_freq, int val);
 
-#define __i915_read(x, y) \
-	u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg);
-
-__i915_read(8, b)
-__i915_read(16, w)
-__i915_read(32, l)
-__i915_read(64, q)
+#define __i915_read(x) \
+	u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg, bool trace);
+__i915_read(8)
+__i915_read(16)
+__i915_read(32)
+__i915_read(64)
 #undef __i915_read
 
-#define __i915_write(x, y) \
-	void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val);
-
-__i915_write(8, b)
-__i915_write(16, w)
-__i915_write(32, l)
-__i915_write(64, q)
+#define __i915_write(x) \
+	void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val, bool trace);
+__i915_write(8)
+__i915_write(16)
+__i915_write(32)
+__i915_write(64)
 #undef __i915_write
 
-#define I915_READ8(reg)		i915_read8(dev_priv, (reg))
-#define I915_WRITE8(reg, val)	i915_write8(dev_priv, (reg), (val))
+#define I915_READ8(reg)		i915_read8(dev_priv, (reg), true)
+#define I915_WRITE8(reg, val)	i915_write8(dev_priv, (reg), (val), true)
 
-#define I915_READ16(reg)	i915_read16(dev_priv, (reg))
-#define I915_WRITE16(reg, val)	i915_write16(dev_priv, (reg), (val))
-#define I915_READ16_NOTRACE(reg)	readw(dev_priv->regs + (reg))
-#define I915_WRITE16_NOTRACE(reg, val)	writew(val, dev_priv->regs + (reg))
+#define I915_READ16(reg)	i915_read16(dev_priv, (reg), true)
+#define I915_WRITE16(reg, val)	i915_write16(dev_priv, (reg), (val), true)
+#define I915_READ16_NOTRACE(reg)	i915_read16(dev_priv, (reg), false)
+#define I915_WRITE16_NOTRACE(reg, val)	i915_write16(dev_priv, (reg), (val), false)
 
-#define I915_READ(reg)		i915_read32(dev_priv, (reg))
-#define I915_WRITE(reg, val)	i915_write32(dev_priv, (reg), (val))
-#define I915_READ_NOTRACE(reg)		readl(dev_priv->regs + (reg))
-#define I915_WRITE_NOTRACE(reg, val)	writel(val, dev_priv->regs + (reg))
+#define I915_READ(reg)		i915_read32(dev_priv, (reg), true)
+#define I915_WRITE(reg, val)	i915_write32(dev_priv, (reg), (val), true)
+#define I915_READ_NOTRACE(reg)		i915_read32(dev_priv, (reg), false)
+#define I915_WRITE_NOTRACE(reg, val)	i915_write32(dev_priv, (reg), (val), false)
 
-#define I915_WRITE64(reg, val)	i915_write64(dev_priv, (reg), (val))
-#define I915_READ64(reg)	i915_read64(dev_priv, (reg))
+#define I915_WRITE64(reg, val)	i915_write64(dev_priv, (reg), (val), true)
+#define I915_READ64(reg)	i915_read64(dev_priv, (reg), true)
 
 #define POSTING_READ(reg)	(void)I915_READ_NOTRACE(reg)
 #define POSTING_READ16(reg)	(void)I915_READ16_NOTRACE(reg)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 97afd26..2d1cb10 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -26,6 +26,7 @@
  */
 
 #include <drm/drmP.h>
+#include <drm/drm_vma_manager.h>
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include "i915_trace.h"
@@ -37,11 +38,14 @@
 #include <linux/dma-buf.h>
 
 static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
-static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
-static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
-						    unsigned alignment,
-						    bool map_and_fenceable,
-						    bool nonblocking);
+static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
+						   bool force);
+static __must_check int
+i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
+			   struct i915_address_space *vm,
+			   unsigned alignment,
+			   bool map_and_fenceable,
+			   bool nonblocking);
 static int i915_gem_phys_pwrite(struct drm_device *dev,
 				struct drm_i915_gem_object *obj,
 				struct drm_i915_gem_pwrite *args,
@@ -59,6 +63,20 @@
 static void i915_gem_shrink_all(struct drm_i915_private *dev_priv);
 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
 
+static bool cpu_cache_is_coherent(struct drm_device *dev,
+				  enum i915_cache_level level)
+{
+	return HAS_LLC(dev) || level != I915_CACHE_NONE;
+}
+
+static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+	if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
+		return true;
+
+	return obj->pin_display;
+}
+
 static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
 {
 	if (obj->tiling_mode)
@@ -75,15 +93,19 @@
 static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
 				  size_t size)
 {
+	spin_lock(&dev_priv->mm.object_stat_lock);
 	dev_priv->mm.object_count++;
 	dev_priv->mm.object_memory += size;
+	spin_unlock(&dev_priv->mm.object_stat_lock);
 }
 
 static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
 				     size_t size)
 {
+	spin_lock(&dev_priv->mm.object_stat_lock);
 	dev_priv->mm.object_count--;
 	dev_priv->mm.object_memory -= size;
+	spin_unlock(&dev_priv->mm.object_stat_lock);
 }
 
 static int
@@ -135,7 +157,7 @@
 static inline bool
 i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
 {
-	return obj->gtt_space && !obj->active;
+	return i915_gem_obj_bound_any(obj) && !obj->active;
 }
 
 int
@@ -178,10 +200,10 @@
 	mutex_lock(&dev->struct_mutex);
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
 		if (obj->pin_count)
-			pinned += obj->gtt_space->size;
+			pinned += i915_gem_obj_ggtt_size(obj);
 	mutex_unlock(&dev->struct_mutex);
 
-	args->aper_size = dev_priv->gtt.total;
+	args->aper_size = dev_priv->gtt.base.total;
 	args->aper_available_size = args->aper_size - pinned;
 
 	return 0;
@@ -219,16 +241,10 @@
 		return -ENOMEM;
 
 	ret = drm_gem_handle_create(file, &obj->base, &handle);
-	if (ret) {
-		drm_gem_object_release(&obj->base);
-		i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
-		i915_gem_object_free(obj);
-		return ret;
-	}
-
 	/* drop reference from allocate - handle holds it now */
-	drm_gem_object_unreference(&obj->base);
-	trace_i915_gem_object_create(obj);
+	drm_gem_object_unreference_unlocked(&obj->base);
+	if (ret)
+		return ret;
 
 	*handle_p = handle;
 	return 0;
@@ -246,13 +262,6 @@
 			       args->size, &args->handle);
 }
 
-int i915_gem_dumb_destroy(struct drm_file *file,
-			  struct drm_device *dev,
-			  uint32_t handle)
-{
-	return drm_gem_handle_delete(file, handle);
-}
-
 /**
  * Creates a new mm object and returns a handle to it.
  */
@@ -420,9 +429,8 @@
 		 * read domain and manually flush cachelines (if required). This
 		 * optimizes for the case when the gpu will dirty the data
 		 * anyway again before the next pread happens. */
-		if (obj->cache_level == I915_CACHE_NONE)
-			needs_clflush = 1;
-		if (obj->gtt_space) {
+		needs_clflush = !cpu_cache_is_coherent(dev, obj->cache_level);
+		if (i915_gem_obj_bound_any(obj)) {
 			ret = i915_gem_object_set_to_gtt_domain(obj, false);
 			if (ret)
 				return ret;
@@ -465,7 +473,7 @@
 
 		mutex_unlock(&dev->struct_mutex);
 
-		if (!prefaulted) {
+		if (likely(!i915_prefault_disable) && !prefaulted) {
 			ret = fault_in_multipages_writeable(user_data, remain);
 			/* Userspace is tricking us, but we've already clobbered
 			 * its pages with the prefault and promised to write the
@@ -594,7 +602,7 @@
 	char __user *user_data;
 	int page_offset, page_length, ret;
 
-	ret = i915_gem_object_pin(obj, 0, true, true);
+	ret = i915_gem_obj_ggtt_pin(obj, 0, true, true);
 	if (ret)
 		goto out;
 
@@ -609,7 +617,7 @@
 	user_data = to_user_ptr(args->data_ptr);
 	remain = args->size;
 
-	offset = obj->gtt_offset + args->offset;
+	offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
 
 	while (remain > 0) {
 		/* Operation in this page
@@ -737,19 +745,18 @@
 		 * write domain and manually flush cachelines (if required). This
 		 * optimizes for the case when the gpu will use the data
 		 * right away and we therefore have to clflush anyway. */
-		if (obj->cache_level == I915_CACHE_NONE)
-			needs_clflush_after = 1;
-		if (obj->gtt_space) {
+		needs_clflush_after = cpu_write_needs_clflush(obj);
+		if (i915_gem_obj_bound_any(obj)) {
 			ret = i915_gem_object_set_to_gtt_domain(obj, true);
 			if (ret)
 				return ret;
 		}
 	}
-	/* Same trick applies for invalidate partially written cachelines before
-	 * writing.  */
-	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)
-	    && obj->cache_level == I915_CACHE_NONE)
-		needs_clflush_before = 1;
+	/* Same trick applies to invalidate partially written cachelines read
+	 * before writing. */
+	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
+		needs_clflush_before =
+			!cpu_cache_is_coherent(dev, obj->cache_level);
 
 	ret = i915_gem_object_get_pages(obj);
 	if (ret)
@@ -828,8 +835,8 @@
 		 */
 		if (!needs_clflush_after &&
 		    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
-			i915_gem_clflush_object(obj);
-			i915_gem_chipset_flush(dev);
+			if (i915_gem_clflush_object(obj, obj->pin_display))
+				i915_gem_chipset_flush(dev);
 		}
 	}
 
@@ -860,10 +867,12 @@
 		       args->size))
 		return -EFAULT;
 
-	ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr),
-					   args->size);
-	if (ret)
-		return -EFAULT;
+	if (likely(!i915_prefault_disable)) {
+		ret = fault_in_multipages_readable(to_user_ptr(args->data_ptr),
+						   args->size);
+		if (ret)
+			return -EFAULT;
+	}
 
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
@@ -904,9 +913,9 @@
 		goto out;
 	}
 
-	if (obj->cache_level == I915_CACHE_NONE &&
-	    obj->tiling_mode == I915_TILING_NONE &&
-	    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
+	if (obj->tiling_mode == I915_TILING_NONE &&
+	    obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
+	    cpu_write_needs_clflush(obj)) {
 		ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
 		/* Note that the gtt paths might fail with non-page-backed user
 		 * pointers (e.g. gtt mappings when moving data between
@@ -990,6 +999,8 @@
 	bool wait_forever = true;
 	int ret;
 
+	WARN(dev_priv->pc8.irqs_disabled, "IRQs disabled\n");
+
 	if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
 		return 0;
 
@@ -1255,8 +1266,8 @@
 	}
 
 	/* Pinned buffers may be scanout, so flush the cache */
-	if (obj->pin_count)
-		i915_gem_object_flush_cpu_write_domain(obj);
+	if (obj->pin_display)
+		i915_gem_object_flush_cpu_write_domain(obj, true);
 
 	drm_gem_object_unreference(&obj->base);
 unlock:
@@ -1346,7 +1357,7 @@
 	}
 
 	/* Now bind it into the GTT if needed */
-	ret = i915_gem_object_pin(obj, 0, true, false);
+	ret = i915_gem_obj_ggtt_pin(obj,  0, true, false);
 	if (ret)
 		goto unlock;
 
@@ -1360,8 +1371,9 @@
 
 	obj->fault_mappable = true;
 
-	pfn = ((dev_priv->gtt.mappable_base + obj->gtt_offset) >> PAGE_SHIFT) +
-		page_offset;
+	pfn = dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj);
+	pfn >>= PAGE_SHIFT;
+	pfn += page_offset;
 
 	/* Finally, remap it using the new GTT offset */
 	ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
@@ -1425,11 +1437,7 @@
 	if (!obj->fault_mappable)
 		return;
 
-	if (obj->base.dev->dev_mapping)
-		unmap_mapping_range(obj->base.dev->dev_mapping,
-				    (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT,
-				    obj->base.size, 1);
-
+	drm_vma_node_unmap(&obj->base.vma_node, obj->base.dev->dev_mapping);
 	obj->fault_mappable = false;
 }
 
@@ -1485,7 +1493,7 @@
 	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
 	int ret;
 
-	if (obj->base.map_list.map)
+	if (drm_vma_node_has_offset(&obj->base.vma_node))
 		return 0;
 
 	dev_priv->mm.shrinker_no_lock_stealing = true;
@@ -1516,9 +1524,6 @@
 
 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
 {
-	if (!obj->base.map_list.map)
-		return;
-
 	drm_gem_free_mmap_offset(&obj->base);
 }
 
@@ -1557,7 +1562,7 @@
 	if (ret)
 		goto out;
 
-	*offset = (u64)obj->base.map_list.hash.key << PAGE_SHIFT;
+	*offset = drm_vma_node_offset_addr(&obj->base.vma_node);
 
 out:
 	drm_gem_object_unreference(&obj->base);
@@ -1632,7 +1637,7 @@
 		 * hope for the best.
 		 */
 		WARN_ON(ret != -EIO);
-		i915_gem_clflush_object(obj);
+		i915_gem_clflush_object(obj, true);
 		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
 	}
 
@@ -1667,11 +1672,11 @@
 	if (obj->pages == NULL)
 		return 0;
 
-	BUG_ON(obj->gtt_space);
-
 	if (obj->pages_pin_count)
 		return -EBUSY;
 
+	BUG_ON(i915_gem_obj_bound_any(obj));
+
 	/* ->put_pages might need to allocate memory for the bit17 swizzle
 	 * array, hence protect them from being reaped by removing them from gtt
 	 * lists early. */
@@ -1704,12 +1709,18 @@
 		}
 	}
 
-	list_for_each_entry_safe(obj, next,
-				 &dev_priv->mm.inactive_list,
-				 mm_list) {
-		if ((i915_gem_object_is_purgeable(obj) || !purgeable_only) &&
-		    i915_gem_object_unbind(obj) == 0 &&
-		    i915_gem_object_put_pages(obj) == 0) {
+	list_for_each_entry_safe(obj, next, &dev_priv->mm.bound_list,
+				 global_list) {
+		struct i915_vma *vma, *v;
+
+		if (!i915_gem_object_is_purgeable(obj) && purgeable_only)
+			continue;
+
+		list_for_each_entry_safe(vma, v, &obj->vma_list, vma_link)
+			if (i915_vma_unbind(vma))
+				break;
+
+		if (!i915_gem_object_put_pages(obj)) {
 			count += obj->base.size >> PAGE_SHIFT;
 			if (count >= target)
 				return count;
@@ -1892,8 +1903,6 @@
 		obj->active = 1;
 	}
 
-	/* Move from whatever list we were on to the tail of execution. */
-	list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
 	list_move_tail(&obj->ring_list, &ring->active_list);
 
 	obj->last_read_seqno = seqno;
@@ -1915,13 +1924,14 @@
 static void
 i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
 {
-	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+	struct i915_address_space *ggtt_vm = &dev_priv->gtt.base;
+	struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
 
 	BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS);
 	BUG_ON(!obj->active);
 
-	list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
+	list_move_tail(&vma->mm_list, &ggtt_vm->inactive_list);
 
 	list_del_init(&obj->ring_list);
 	obj->ring = NULL;
@@ -2085,11 +2095,9 @@
 	trace_i915_gem_request_add(ring, request->seqno);
 	ring->outstanding_lazy_request = 0;
 
-	if (!dev_priv->mm.suspended) {
-		if (i915_enable_hangcheck) {
-			mod_timer(&dev_priv->gpu_error.hangcheck_timer,
-				  round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
-		}
+	if (!dev_priv->ums.mm_suspended) {
+		i915_queue_hangcheck(ring->dev);
+
 		if (was_empty) {
 			queue_delayed_work(dev_priv->wq,
 					   &dev_priv->mm.retire_work,
@@ -2119,10 +2127,11 @@
 	spin_unlock(&file_priv->mm.lock);
 }
 
-static bool i915_head_inside_object(u32 acthd, struct drm_i915_gem_object *obj)
+static bool i915_head_inside_object(u32 acthd, struct drm_i915_gem_object *obj,
+				    struct i915_address_space *vm)
 {
-	if (acthd >= obj->gtt_offset &&
-	    acthd < obj->gtt_offset + obj->base.size)
+	if (acthd >= i915_gem_obj_offset(obj, vm) &&
+	    acthd < i915_gem_obj_offset(obj, vm) + obj->base.size)
 		return true;
 
 	return false;
@@ -2145,6 +2154,17 @@
 	return false;
 }
 
+static struct i915_address_space *
+request_to_vm(struct drm_i915_gem_request *request)
+{
+	struct drm_i915_private *dev_priv = request->ring->dev->dev_private;
+	struct i915_address_space *vm;
+
+	vm = &dev_priv->gtt.base;
+
+	return vm;
+}
+
 static bool i915_request_guilty(struct drm_i915_gem_request *request,
 				const u32 acthd, bool *inside)
 {
@@ -2152,9 +2172,9 @@
 	 * pointing inside the ring, matches the batch_obj address range.
 	 * However this is extremely unlikely.
 	 */
-
 	if (request->batch_obj) {
-		if (i915_head_inside_object(acthd, request->batch_obj)) {
+		if (i915_head_inside_object(acthd, request->batch_obj,
+					    request_to_vm(request))) {
 			*inside = true;
 			return true;
 		}
@@ -2174,17 +2194,21 @@
 {
 	struct i915_ctx_hang_stats *hs = NULL;
 	bool inside, guilty;
+	unsigned long offset = 0;
 
 	/* Innocent until proven guilty */
 	guilty = false;
 
-	if (ring->hangcheck.action != wait &&
+	if (request->batch_obj)
+		offset = i915_gem_obj_offset(request->batch_obj,
+					     request_to_vm(request));
+
+	if (ring->hangcheck.action != HANGCHECK_WAIT &&
 	    i915_request_guilty(request, acthd, &inside)) {
-		DRM_ERROR("%s hung %s bo (0x%x ctx %d) at 0x%x\n",
+		DRM_ERROR("%s hung %s bo (0x%lx ctx %d) at 0x%x\n",
 			  ring->name,
 			  inside ? "inside" : "flushing",
-			  request->batch_obj ?
-			  request->batch_obj->gtt_offset : 0,
+			  offset,
 			  request->ctx ? request->ctx->id : 0,
 			  acthd);
 
@@ -2258,30 +2282,29 @@
 
 	for (i = 0; i < dev_priv->num_fence_regs; i++) {
 		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
-		i915_gem_write_fence(dev, i, reg->obj);
+
+		/*
+		 * Commit delayed tiling changes if we have an object still
+		 * attached to the fence, otherwise just clear the fence.
+		 */
+		if (reg->obj) {
+			i915_gem_object_update_fence(reg->obj, reg,
+						     reg->obj->tiling_mode);
+		} else {
+			i915_gem_write_fence(dev, i, NULL);
+		}
 	}
 }
 
 void i915_gem_reset(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_gem_object *obj;
 	struct intel_ring_buffer *ring;
 	int i;
 
 	for_each_ring(ring, dev_priv, i)
 		i915_gem_reset_ring_lists(dev_priv, ring);
 
-	/* Move everything out of the GPU domains to ensure we do any
-	 * necessary invalidation upon reuse.
-	 */
-	list_for_each_entry(obj,
-			    &dev_priv->mm.inactive_list,
-			    mm_list)
-	{
-		obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
-	}
-
 	i915_gem_restore_fences(dev);
 }
 
@@ -2390,7 +2413,7 @@
 		idle &= list_empty(&ring->request_list);
 	}
 
-	if (!dev_priv->mm.suspended && !idle)
+	if (!dev_priv->ums.mm_suspended && !idle)
 		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
 				   round_jiffies_up_relative(HZ));
 	if (idle)
@@ -2576,18 +2599,18 @@
 					    old_write_domain);
 }
 
-/**
- * Unbinds an object from the GTT aperture.
- */
-int
-i915_gem_object_unbind(struct drm_i915_gem_object *obj)
+int i915_vma_unbind(struct i915_vma *vma)
 {
+	struct drm_i915_gem_object *obj = vma->obj;
 	drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
 	int ret;
 
-	if (obj->gtt_space == NULL)
+	if (list_empty(&vma->vma_link))
 		return 0;
 
+	if (!drm_mm_node_allocated(&vma->node))
+		goto destroy;
+
 	if (obj->pin_count)
 		return -EBUSY;
 
@@ -2608,7 +2631,7 @@
 	if (ret)
 		return ret;
 
-	trace_i915_gem_object_unbind(obj);
+	trace_i915_vma_unbind(vma);
 
 	if (obj->has_global_gtt_mapping)
 		i915_gem_gtt_unbind_object(obj);
@@ -2619,18 +2642,46 @@
 	i915_gem_gtt_finish_object(obj);
 	i915_gem_object_unpin_pages(obj);
 
-	list_del(&obj->mm_list);
-	list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
+	list_del(&vma->mm_list);
 	/* Avoid an unnecessary call to unbind on rebind. */
-	obj->map_and_fenceable = true;
+	if (i915_is_ggtt(vma->vm))
+		obj->map_and_fenceable = true;
 
-	drm_mm_put_block(obj->gtt_space);
-	obj->gtt_space = NULL;
-	obj->gtt_offset = 0;
+	drm_mm_remove_node(&vma->node);
+
+destroy:
+	i915_gem_vma_destroy(vma);
+
+	/* Since the unbound list is global, only move to that list if
+	 * no more VMAs exist.
+	 * NB: Until we have real VMAs there will only ever be one */
+	WARN_ON(!list_empty(&obj->vma_list));
+	if (list_empty(&obj->vma_list))
+		list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
 
 	return 0;
 }
 
+/**
+ * Unbinds an object from the global GTT aperture.
+ */
+int
+i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+	struct i915_address_space *ggtt = &dev_priv->gtt.base;
+
+	if (!i915_gem_obj_ggtt_bound(obj))
+		return 0;
+
+	if (obj->pin_count)
+		return -EBUSY;
+
+	BUG_ON(obj->pages == NULL);
+
+	return i915_vma_unbind(i915_gem_obj_to_vma(obj, ggtt));
+}
+
 int i915_gpu_idle(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
@@ -2681,12 +2732,12 @@
 	POSTING_READ(fence_reg);
 
 	if (obj) {
-		u32 size = obj->gtt_space->size;
+		u32 size = i915_gem_obj_ggtt_size(obj);
 		uint64_t val;
 
-		val = (uint64_t)((obj->gtt_offset + size - 4096) &
+		val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) &
 				 0xfffff000) << 32;
-		val |= obj->gtt_offset & 0xfffff000;
+		val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000;
 		val |= (uint64_t)((obj->stride / 128) - 1) << fence_pitch_shift;
 		if (obj->tiling_mode == I915_TILING_Y)
 			val |= 1 << I965_FENCE_TILING_Y_SHIFT;
@@ -2710,15 +2761,15 @@
 	u32 val;
 
 	if (obj) {
-		u32 size = obj->gtt_space->size;
+		u32 size = i915_gem_obj_ggtt_size(obj);
 		int pitch_val;
 		int tile_width;
 
-		WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
+		WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) ||
 		     (size & -size) != size ||
-		     (obj->gtt_offset & (size - 1)),
-		     "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
-		     obj->gtt_offset, obj->map_and_fenceable, size);
+		     (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
+		     "object 0x%08lx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
+		     i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size);
 
 		if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
 			tile_width = 128;
@@ -2729,7 +2780,7 @@
 		pitch_val = obj->stride / tile_width;
 		pitch_val = ffs(pitch_val) - 1;
 
-		val = obj->gtt_offset;
+		val = i915_gem_obj_ggtt_offset(obj);
 		if (obj->tiling_mode == I915_TILING_Y)
 			val |= 1 << I830_FENCE_TILING_Y_SHIFT;
 		val |= I915_FENCE_SIZE_BITS(size);
@@ -2754,19 +2805,19 @@
 	uint32_t val;
 
 	if (obj) {
-		u32 size = obj->gtt_space->size;
+		u32 size = i915_gem_obj_ggtt_size(obj);
 		uint32_t pitch_val;
 
-		WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) ||
+		WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) ||
 		     (size & -size) != size ||
-		     (obj->gtt_offset & (size - 1)),
-		     "object 0x%08x not 512K or pot-size 0x%08x aligned\n",
-		     obj->gtt_offset, size);
+		     (i915_gem_obj_ggtt_offset(obj) & (size - 1)),
+		     "object 0x%08lx not 512K or pot-size 0x%08x aligned\n",
+		     i915_gem_obj_ggtt_offset(obj), size);
 
 		pitch_val = obj->stride / 128;
 		pitch_val = ffs(pitch_val) - 1;
 
-		val = obj->gtt_offset;
+		val = i915_gem_obj_ggtt_offset(obj);
 		if (obj->tiling_mode == I915_TILING_Y)
 			val |= 1 << I830_FENCE_TILING_Y_SHIFT;
 		val |= I830_FENCE_SIZE_BITS(size);
@@ -2795,6 +2846,10 @@
 	if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
 		mb();
 
+	WARN(obj && (!obj->stride || !obj->tiling_mode),
+	     "bogus fence setup with stride: 0x%x, tiling mode: %i\n",
+	     obj->stride, obj->tiling_mode);
+
 	switch (INTEL_INFO(dev)->gen) {
 	case 7:
 	case 6:
@@ -2836,6 +2891,7 @@
 		fence->obj = NULL;
 		list_del_init(&fence->lru_list);
 	}
+	obj->fence_dirty = false;
 }
 
 static int
@@ -2965,7 +3021,6 @@
 		return 0;
 
 	i915_gem_object_update_fence(obj, reg, enable);
-	obj->fence_dirty = false;
 
 	return 0;
 }
@@ -2983,7 +3038,7 @@
 	if (HAS_LLC(dev))
 		return true;
 
-	if (gtt_space == NULL)
+	if (!drm_mm_node_allocated(gtt_space))
 		return true;
 
 	if (list_empty(&gtt_space->node_list))
@@ -3016,8 +3071,8 @@
 
 		if (obj->cache_level != obj->gtt_space->color) {
 			printk(KERN_ERR "object reserved space [%08lx, %08lx] with wrong color, cache_level=%x, color=%lx\n",
-			       obj->gtt_space->start,
-			       obj->gtt_space->start + obj->gtt_space->size,
+			       i915_gem_obj_ggtt_offset(obj),
+			       i915_gem_obj_ggtt_offset(obj) + i915_gem_obj_ggtt_size(obj),
 			       obj->cache_level,
 			       obj->gtt_space->color);
 			err++;
@@ -3028,8 +3083,8 @@
 					      obj->gtt_space,
 					      obj->cache_level)) {
 			printk(KERN_ERR "invalid GTT space found at [%08lx, %08lx] - color=%x\n",
-			       obj->gtt_space->start,
-			       obj->gtt_space->start + obj->gtt_space->size,
+			       i915_gem_obj_ggtt_offset(obj),
+			       i915_gem_obj_ggtt_offset(obj) + i915_gem_obj_ggtt_size(obj),
 			       obj->cache_level);
 			err++;
 			continue;
@@ -3044,18 +3099,18 @@
  * Finds free space in the GTT aperture and binds the object there.
  */
 static int
-i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
-			    unsigned alignment,
-			    bool map_and_fenceable,
-			    bool nonblocking)
+i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
+			   struct i915_address_space *vm,
+			   unsigned alignment,
+			   bool map_and_fenceable,
+			   bool nonblocking)
 {
 	struct drm_device *dev = obj->base.dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	struct drm_mm_node *node;
 	u32 size, fence_size, fence_alignment, unfenced_alignment;
-	bool mappable, fenceable;
-	size_t gtt_max = map_and_fenceable ?
-		dev_priv->gtt.mappable_end : dev_priv->gtt.total;
+	size_t gtt_max =
+		map_and_fenceable ? dev_priv->gtt.mappable_end : vm->total;
+	struct i915_vma *vma;
 	int ret;
 
 	fence_size = i915_gem_get_gtt_size(dev,
@@ -3096,77 +3151,89 @@
 
 	i915_gem_object_pin_pages(obj);
 
-	node = kzalloc(sizeof(*node), GFP_KERNEL);
-	if (node == NULL) {
-		i915_gem_object_unpin_pages(obj);
-		return -ENOMEM;
+	BUG_ON(!i915_is_ggtt(vm));
+
+	vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto err_unpin;
 	}
 
+	/* For now we only ever use 1 vma per object */
+	WARN_ON(!list_is_singular(&obj->vma_list));
+
 search_free:
-	ret = drm_mm_insert_node_in_range_generic(&dev_priv->mm.gtt_space, node,
+	ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
 						  size, alignment,
-						  obj->cache_level, 0, gtt_max);
+						  obj->cache_level, 0, gtt_max,
+						  DRM_MM_SEARCH_DEFAULT);
 	if (ret) {
-		ret = i915_gem_evict_something(dev, size, alignment,
+		ret = i915_gem_evict_something(dev, vm, size, alignment,
 					       obj->cache_level,
 					       map_and_fenceable,
 					       nonblocking);
 		if (ret == 0)
 			goto search_free;
 
-		i915_gem_object_unpin_pages(obj);
-		kfree(node);
-		return ret;
+		goto err_free_vma;
 	}
-	if (WARN_ON(!i915_gem_valid_gtt_space(dev, node, obj->cache_level))) {
-		i915_gem_object_unpin_pages(obj);
-		drm_mm_put_block(node);
-		return -EINVAL;
+	if (WARN_ON(!i915_gem_valid_gtt_space(dev, &vma->node,
+					      obj->cache_level))) {
+		ret = -EINVAL;
+		goto err_remove_node;
 	}
 
 	ret = i915_gem_gtt_prepare_object(obj);
-	if (ret) {
-		i915_gem_object_unpin_pages(obj);
-		drm_mm_put_block(node);
-		return ret;
-	}
+	if (ret)
+		goto err_remove_node;
 
 	list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
-	list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
+	list_add_tail(&vma->mm_list, &vm->inactive_list);
 
-	obj->gtt_space = node;
-	obj->gtt_offset = node->start;
+	if (i915_is_ggtt(vm)) {
+		bool mappable, fenceable;
 
-	fenceable =
-		node->size == fence_size &&
-		(node->start & (fence_alignment - 1)) == 0;
+		fenceable = (vma->node.size == fence_size &&
+			     (vma->node.start & (fence_alignment - 1)) == 0);
 
-	mappable =
-		obj->gtt_offset + obj->base.size <= dev_priv->gtt.mappable_end;
+		mappable = (vma->node.start + obj->base.size <=
+			    dev_priv->gtt.mappable_end);
 
-	obj->map_and_fenceable = mappable && fenceable;
+		obj->map_and_fenceable = mappable && fenceable;
+	}
 
-	trace_i915_gem_object_bind(obj, map_and_fenceable);
+	WARN_ON(map_and_fenceable && !obj->map_and_fenceable);
+
+	trace_i915_vma_bind(vma, map_and_fenceable);
 	i915_gem_verify_gtt(dev);
 	return 0;
+
+err_remove_node:
+	drm_mm_remove_node(&vma->node);
+err_free_vma:
+	i915_gem_vma_destroy(vma);
+err_unpin:
+	i915_gem_object_unpin_pages(obj);
+	return ret;
 }
 
-void
-i915_gem_clflush_object(struct drm_i915_gem_object *obj)
+bool
+i915_gem_clflush_object(struct drm_i915_gem_object *obj,
+			bool force)
 {
 	/* If we don't have a page list set up, then we're not pinned
 	 * to GPU, and we can ignore the cache flush because it'll happen
 	 * again at bind time.
 	 */
 	if (obj->pages == NULL)
-		return;
+		return false;
 
 	/*
 	 * Stolen memory is always coherent with the GPU as it is explicitly
 	 * marked as wc by the system, or the system is cache-coherent.
 	 */
 	if (obj->stolen)
-		return;
+		return false;
 
 	/* If the GPU is snooping the contents of the CPU cache,
 	 * we do not need to manually clear the CPU cache lines.  However,
@@ -3176,12 +3243,13 @@
 	 * snooping behaviour occurs naturally as the result of our domain
 	 * tracking.
 	 */
-	if (obj->cache_level != I915_CACHE_NONE)
-		return;
+	if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
+		return false;
 
 	trace_i915_gem_object_clflush(obj);
-
 	drm_clflush_sg(obj->pages);
+
+	return true;
 }
 
 /** Flushes the GTT write domain for the object if it's dirty. */
@@ -3213,15 +3281,17 @@
 
 /** Flushes the CPU write domain for the object if it's dirty. */
 static void
-i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
+i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
+				       bool force)
 {
 	uint32_t old_write_domain;
 
 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
 		return;
 
-	i915_gem_clflush_object(obj);
-	i915_gem_chipset_flush(obj->base.dev);
+	if (i915_gem_clflush_object(obj, force))
+		i915_gem_chipset_flush(obj->base.dev);
+
 	old_write_domain = obj->base.write_domain;
 	obj->base.write_domain = 0;
 
@@ -3244,7 +3314,7 @@
 	int ret;
 
 	/* Not valid to be called on unbound objects. */
-	if (obj->gtt_space == NULL)
+	if (!i915_gem_obj_bound_any(obj))
 		return -EINVAL;
 
 	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
@@ -3254,7 +3324,7 @@
 	if (ret)
 		return ret;
 
-	i915_gem_object_flush_cpu_write_domain(obj);
+	i915_gem_object_flush_cpu_write_domain(obj, false);
 
 	/* Serialise direct access to this object with the barriers for
 	 * coherent writes from the GPU, by effectively invalidating the
@@ -3282,8 +3352,14 @@
 					    old_write_domain);
 
 	/* And bump the LRU for this access */
-	if (i915_gem_object_is_inactive(obj))
-		list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
+	if (i915_gem_object_is_inactive(obj)) {
+		struct i915_vma *vma = i915_gem_obj_to_vma(obj,
+							   &dev_priv->gtt.base);
+		if (vma)
+			list_move_tail(&vma->mm_list,
+				       &dev_priv->gtt.base.inactive_list);
+
+	}
 
 	return 0;
 }
@@ -3293,6 +3369,7 @@
 {
 	struct drm_device *dev = obj->base.dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct i915_vma *vma;
 	int ret;
 
 	if (obj->cache_level == cache_level)
@@ -3303,13 +3380,17 @@
 		return -EBUSY;
 	}
 
-	if (!i915_gem_valid_gtt_space(dev, obj->gtt_space, cache_level)) {
-		ret = i915_gem_object_unbind(obj);
-		if (ret)
-			return ret;
+	list_for_each_entry(vma, &obj->vma_list, vma_link) {
+		if (!i915_gem_valid_gtt_space(dev, &vma->node, cache_level)) {
+			ret = i915_vma_unbind(vma);
+			if (ret)
+				return ret;
+
+			break;
+		}
 	}
 
-	if (obj->gtt_space) {
+	if (i915_gem_obj_bound_any(obj)) {
 		ret = i915_gem_object_finish_gpu(obj);
 		if (ret)
 			return ret;
@@ -3331,11 +3412,13 @@
 		if (obj->has_aliasing_ppgtt_mapping)
 			i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
 					       obj, cache_level);
-
-		obj->gtt_space->color = cache_level;
 	}
 
-	if (cache_level == I915_CACHE_NONE) {
+	list_for_each_entry(vma, &obj->vma_list, vma_link)
+		vma->node.color = cache_level;
+	obj->cache_level = cache_level;
+
+	if (cpu_write_needs_clflush(obj)) {
 		u32 old_read_domains, old_write_domain;
 
 		/* If we're coming from LLC cached, then we haven't
@@ -3345,7 +3428,6 @@
 		 * Just set it to the CPU cache for now.
 		 */
 		WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
-		WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU);
 
 		old_read_domains = obj->base.read_domains;
 		old_write_domain = obj->base.write_domain;
@@ -3358,7 +3440,6 @@
 						    old_write_domain);
 	}
 
-	obj->cache_level = cache_level;
 	i915_gem_verify_gtt(dev);
 	return 0;
 }
@@ -3380,7 +3461,20 @@
 		goto unlock;
 	}
 
-	args->caching = obj->cache_level != I915_CACHE_NONE;
+	switch (obj->cache_level) {
+	case I915_CACHE_LLC:
+	case I915_CACHE_L3_LLC:
+		args->caching = I915_CACHING_CACHED;
+		break;
+
+	case I915_CACHE_WT:
+		args->caching = I915_CACHING_DISPLAY;
+		break;
+
+	default:
+		args->caching = I915_CACHING_NONE;
+		break;
+	}
 
 	drm_gem_object_unreference(&obj->base);
 unlock:
@@ -3403,6 +3497,9 @@
 	case I915_CACHING_CACHED:
 		level = I915_CACHE_LLC;
 		break;
+	case I915_CACHING_DISPLAY:
+		level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -3425,6 +3522,22 @@
 	return ret;
 }
 
+static bool is_pin_display(struct drm_i915_gem_object *obj)
+{
+	/* There are 3 sources that pin objects:
+	 *   1. The display engine (scanouts, sprites, cursors);
+	 *   2. Reservations for execbuffer;
+	 *   3. The user.
+	 *
+	 * We can ignore reservations as we hold the struct_mutex and
+	 * are only called outside of the reservation path.  The user
+	 * can only increment pin_count once, and so if after
+	 * subtracting the potential reference by the user, any pin_count
+	 * remains, it must be due to another use by the display engine.
+	 */
+	return obj->pin_count - !!obj->user_pin_count;
+}
+
 /*
  * Prepare buffer for display plane (scanout, cursors, etc).
  * Can be called from an uninterruptible phase (modesetting) and allows
@@ -3444,6 +3557,11 @@
 			return ret;
 	}
 
+	/* Mark the pin_display early so that we account for the
+	 * display coherency whilst setting up the cache domains.
+	 */
+	obj->pin_display = true;
+
 	/* The display engine is not coherent with the LLC cache on gen6.  As
 	 * a result, we make sure that the pinning that is about to occur is
 	 * done with uncached PTEs. This is lowest common denominator for all
@@ -3453,19 +3571,20 @@
 	 * of uncaching, which would allow us to flush all the LLC-cached data
 	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
 	 */
-	ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
+	ret = i915_gem_object_set_cache_level(obj,
+					      HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
 	if (ret)
-		return ret;
+		goto err_unpin_display;
 
 	/* As the user may map the buffer once pinned in the display plane
 	 * (e.g. libkms for the bootup splash), we have to ensure that we
 	 * always use map_and_fenceable for all scanout buffers.
 	 */
-	ret = i915_gem_object_pin(obj, alignment, true, false);
+	ret = i915_gem_obj_ggtt_pin(obj, alignment, true, false);
 	if (ret)
-		return ret;
+		goto err_unpin_display;
 
-	i915_gem_object_flush_cpu_write_domain(obj);
+	i915_gem_object_flush_cpu_write_domain(obj, true);
 
 	old_write_domain = obj->base.write_domain;
 	old_read_domains = obj->base.read_domains;
@@ -3481,6 +3600,17 @@
 					    old_write_domain);
 
 	return 0;
+
+err_unpin_display:
+	obj->pin_display = is_pin_display(obj);
+	return ret;
+}
+
+void
+i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj)
+{
+	i915_gem_object_unpin(obj);
+	obj->pin_display = is_pin_display(obj);
 }
 
 int
@@ -3526,7 +3656,7 @@
 
 	/* Flush the CPU cache if it's still invalid. */
 	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
-		i915_gem_clflush_object(obj);
+		i915_gem_clflush_object(obj, false);
 
 		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
 	}
@@ -3604,37 +3734,44 @@
 
 int
 i915_gem_object_pin(struct drm_i915_gem_object *obj,
+		    struct i915_address_space *vm,
 		    uint32_t alignment,
 		    bool map_and_fenceable,
 		    bool nonblocking)
 {
+	struct i915_vma *vma;
 	int ret;
 
 	if (WARN_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
 		return -EBUSY;
 
-	if (obj->gtt_space != NULL) {
-		if ((alignment && obj->gtt_offset & (alignment - 1)) ||
+	WARN_ON(map_and_fenceable && !i915_is_ggtt(vm));
+
+	vma = i915_gem_obj_to_vma(obj, vm);
+
+	if (vma) {
+		if ((alignment &&
+		     vma->node.start & (alignment - 1)) ||
 		    (map_and_fenceable && !obj->map_and_fenceable)) {
 			WARN(obj->pin_count,
 			     "bo is already pinned with incorrect alignment:"
-			     " offset=%x, req.alignment=%x, req.map_and_fenceable=%d,"
+			     " offset=%lx, req.alignment=%x, req.map_and_fenceable=%d,"
 			     " obj->map_and_fenceable=%d\n",
-			     obj->gtt_offset, alignment,
+			     i915_gem_obj_offset(obj, vm), alignment,
 			     map_and_fenceable,
 			     obj->map_and_fenceable);
-			ret = i915_gem_object_unbind(obj);
+			ret = i915_vma_unbind(vma);
 			if (ret)
 				return ret;
 		}
 	}
 
-	if (obj->gtt_space == NULL) {
+	if (!i915_gem_obj_bound(obj, vm)) {
 		struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
 
-		ret = i915_gem_object_bind_to_gtt(obj, alignment,
-						  map_and_fenceable,
-						  nonblocking);
+		ret = i915_gem_object_bind_to_vm(obj, vm, alignment,
+						 map_and_fenceable,
+						 nonblocking);
 		if (ret)
 			return ret;
 
@@ -3655,7 +3792,7 @@
 i915_gem_object_unpin(struct drm_i915_gem_object *obj)
 {
 	BUG_ON(obj->pin_count == 0);
-	BUG_ON(obj->gtt_space == NULL);
+	BUG_ON(!i915_gem_obj_bound_any(obj));
 
 	if (--obj->pin_count == 0)
 		obj->pin_mappable = false;
@@ -3693,7 +3830,7 @@
 	}
 
 	if (obj->user_pin_count == 0) {
-		ret = i915_gem_object_pin(obj, args->alignment, true, false);
+		ret = i915_gem_obj_ggtt_pin(obj, args->alignment, true, false);
 		if (ret)
 			goto out;
 	}
@@ -3701,11 +3838,7 @@
 	obj->user_pin_count++;
 	obj->pin_filp = file;
 
-	/* XXX - flush the CPU caches for pinned objects
-	 * as the X server doesn't manage domains yet
-	 */
-	i915_gem_object_flush_cpu_write_domain(obj);
-	args->offset = obj->gtt_offset;
+	args->offset = i915_gem_obj_ggtt_offset(obj);
 out:
 	drm_gem_object_unreference(&obj->base);
 unlock:
@@ -3844,10 +3977,11 @@
 void i915_gem_object_init(struct drm_i915_gem_object *obj,
 			  const struct drm_i915_gem_object_ops *ops)
 {
-	INIT_LIST_HEAD(&obj->mm_list);
 	INIT_LIST_HEAD(&obj->global_list);
 	INIT_LIST_HEAD(&obj->ring_list);
 	INIT_LIST_HEAD(&obj->exec_list);
+	INIT_LIST_HEAD(&obj->obj_exec_link);
+	INIT_LIST_HEAD(&obj->vma_list);
 
 	obj->ops = ops;
 
@@ -3912,6 +4046,8 @@
 	} else
 		obj->cache_level = I915_CACHE_NONE;
 
+	trace_i915_gem_object_create(obj);
+
 	return obj;
 }
 
@@ -3927,6 +4063,7 @@
 	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
 	struct drm_device *dev = obj->base.dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct i915_vma *vma, *next;
 
 	trace_i915_gem_object_destroy(obj);
 
@@ -3934,15 +4071,21 @@
 		i915_gem_detach_phys_object(dev, obj);
 
 	obj->pin_count = 0;
-	if (WARN_ON(i915_gem_object_unbind(obj) == -ERESTARTSYS)) {
-		bool was_interruptible;
+	/* NB: 0 or 1 elements */
+	WARN_ON(!list_empty(&obj->vma_list) &&
+		!list_is_singular(&obj->vma_list));
+	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
+		int ret = i915_vma_unbind(vma);
+		if (WARN_ON(ret == -ERESTARTSYS)) {
+			bool was_interruptible;
 
-		was_interruptible = dev_priv->mm.interruptible;
-		dev_priv->mm.interruptible = false;
+			was_interruptible = dev_priv->mm.interruptible;
+			dev_priv->mm.interruptible = false;
 
-		WARN_ON(i915_gem_object_unbind(obj));
+			WARN_ON(i915_vma_unbind(vma));
 
-		dev_priv->mm.interruptible = was_interruptible;
+			dev_priv->mm.interruptible = was_interruptible;
+		}
 	}
 
 	/* Stolen objects don't hold a ref, but do hold pin count. Fix that up
@@ -3968,15 +4111,42 @@
 	i915_gem_object_free(obj);
 }
 
+struct i915_vma *i915_gem_vma_create(struct drm_i915_gem_object *obj,
+				     struct i915_address_space *vm)
+{
+	struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
+	if (vma == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&vma->vma_link);
+	INIT_LIST_HEAD(&vma->mm_list);
+	INIT_LIST_HEAD(&vma->exec_list);
+	vma->vm = vm;
+	vma->obj = obj;
+
+	/* Keep GGTT vmas first to make debug easier */
+	if (i915_is_ggtt(vm))
+		list_add(&vma->vma_link, &obj->vma_list);
+	else
+		list_add_tail(&vma->vma_link, &obj->vma_list);
+
+	return vma;
+}
+
+void i915_gem_vma_destroy(struct i915_vma *vma)
+{
+	WARN_ON(vma->node.allocated);
+	list_del(&vma->vma_link);
+	kfree(vma);
+}
+
 int
 i915_gem_idle(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	int ret;
 
-	mutex_lock(&dev->struct_mutex);
-
-	if (dev_priv->mm.suspended) {
+	if (dev_priv->ums.mm_suspended) {
 		mutex_unlock(&dev->struct_mutex);
 		return 0;
 	}
@@ -3992,18 +4162,11 @@
 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
 		i915_gem_evict_everything(dev);
 
-	/* Hack!  Don't let anybody do execbuf while we don't control the chip.
-	 * We need to replace this with a semaphore, or something.
-	 * And not confound mm.suspended!
-	 */
-	dev_priv->mm.suspended = 1;
 	del_timer_sync(&dev_priv->gpu_error.hangcheck_timer);
 
 	i915_kernel_lost_context(dev);
 	i915_gem_cleanup_ringbuffer(dev);
 
-	mutex_unlock(&dev->struct_mutex);
-
 	/* Cancel the retire work handler, which should be idle now. */
 	cancel_delayed_work_sync(&dev_priv->mm.retire_work);
 
@@ -4136,8 +4299,8 @@
 	if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
 		return -EIO;
 
-	if (IS_HASWELL(dev) && (I915_READ(0x120010) == 1))
-		I915_WRITE(0x9008, I915_READ(0x9008) | 0xf0000);
+	if (dev_priv->ellc_size)
+		I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
 
 	if (HAS_PCH_NOP(dev)) {
 		u32 temp = I915_READ(GEN7_MSG_CTL);
@@ -4213,7 +4376,7 @@
 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *file_priv)
 {
-	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret;
 
 	if (drm_core_check_feature(dev, DRIVER_MODESET))
@@ -4225,7 +4388,7 @@
 	}
 
 	mutex_lock(&dev->struct_mutex);
-	dev_priv->mm.suspended = 0;
+	dev_priv->ums.mm_suspended = 0;
 
 	ret = i915_gem_init_hw(dev);
 	if (ret != 0) {
@@ -4233,7 +4396,7 @@
 		return ret;
 	}
 
-	BUG_ON(!list_empty(&dev_priv->mm.active_list));
+	BUG_ON(!list_empty(&dev_priv->gtt.base.active_list));
 	mutex_unlock(&dev->struct_mutex);
 
 	ret = drm_irq_install(dev);
@@ -4245,7 +4408,7 @@
 cleanup_ringbuffer:
 	mutex_lock(&dev->struct_mutex);
 	i915_gem_cleanup_ringbuffer(dev);
-	dev_priv->mm.suspended = 1;
+	dev_priv->ums.mm_suspended = 1;
 	mutex_unlock(&dev->struct_mutex);
 
 	return ret;
@@ -4255,11 +4418,26 @@
 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *file_priv)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret;
+
 	if (drm_core_check_feature(dev, DRIVER_MODESET))
 		return 0;
 
 	drm_irq_uninstall(dev);
-	return i915_gem_idle(dev);
+
+	mutex_lock(&dev->struct_mutex);
+	ret =  i915_gem_idle(dev);
+
+	/* Hack!  Don't let anybody do execbuf while we don't control the chip.
+	 * We need to replace this with a semaphore, or something.
+	 * And not confound ums.mm_suspended!
+	 */
+	if (ret != 0)
+		dev_priv->ums.mm_suspended = 1;
+	mutex_unlock(&dev->struct_mutex);
+
+	return ret;
 }
 
 void
@@ -4270,9 +4448,11 @@
 	if (drm_core_check_feature(dev, DRIVER_MODESET))
 		return;
 
+	mutex_lock(&dev->struct_mutex);
 	ret = i915_gem_idle(dev);
 	if (ret)
 		DRM_ERROR("failed to idle hardware: %d\n", ret);
+	mutex_unlock(&dev->struct_mutex);
 }
 
 static void
@@ -4282,6 +4462,16 @@
 	INIT_LIST_HEAD(&ring->request_list);
 }
 
+static void i915_init_vm(struct drm_i915_private *dev_priv,
+			 struct i915_address_space *vm)
+{
+	vm->dev = dev_priv->dev;
+	INIT_LIST_HEAD(&vm->active_list);
+	INIT_LIST_HEAD(&vm->inactive_list);
+	INIT_LIST_HEAD(&vm->global_link);
+	list_add(&vm->global_link, &dev_priv->vm_list);
+}
+
 void
 i915_gem_load(struct drm_device *dev)
 {
@@ -4294,8 +4484,9 @@
 				  SLAB_HWCACHE_ALIGN,
 				  NULL);
 
-	INIT_LIST_HEAD(&dev_priv->mm.active_list);
-	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
+	INIT_LIST_HEAD(&dev_priv->vm_list);
+	i915_init_vm(dev_priv, &dev_priv->gtt.base);
+
 	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
 	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
 	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
@@ -4594,11 +4785,101 @@
 	list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list)
 		if (obj->pages_pin_count == 0)
 			cnt += obj->base.size >> PAGE_SHIFT;
-	list_for_each_entry(obj, &dev_priv->mm.inactive_list, mm_list)
+
+	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
+		if (obj->active)
+			continue;
+
 		if (obj->pin_count == 0 && obj->pages_pin_count == 0)
 			cnt += obj->base.size >> PAGE_SHIFT;
+	}
 
 	if (unlock)
 		mutex_unlock(&dev->struct_mutex);
 	return cnt;
 }
+
+/* All the new VM stuff */
+unsigned long i915_gem_obj_offset(struct drm_i915_gem_object *o,
+				  struct i915_address_space *vm)
+{
+	struct drm_i915_private *dev_priv = o->base.dev->dev_private;
+	struct i915_vma *vma;
+
+	if (vm == &dev_priv->mm.aliasing_ppgtt->base)
+		vm = &dev_priv->gtt.base;
+
+	BUG_ON(list_empty(&o->vma_list));
+	list_for_each_entry(vma, &o->vma_list, vma_link) {
+		if (vma->vm == vm)
+			return vma->node.start;
+
+	}
+	return -1;
+}
+
+bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
+			struct i915_address_space *vm)
+{
+	struct i915_vma *vma;
+
+	list_for_each_entry(vma, &o->vma_list, vma_link)
+		if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
+			return true;
+
+	return false;
+}
+
+bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o)
+{
+	struct drm_i915_private *dev_priv = o->base.dev->dev_private;
+	struct i915_address_space *vm;
+
+	list_for_each_entry(vm, &dev_priv->vm_list, global_link)
+		if (i915_gem_obj_bound(o, vm))
+			return true;
+
+	return false;
+}
+
+unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
+				struct i915_address_space *vm)
+{
+	struct drm_i915_private *dev_priv = o->base.dev->dev_private;
+	struct i915_vma *vma;
+
+	if (vm == &dev_priv->mm.aliasing_ppgtt->base)
+		vm = &dev_priv->gtt.base;
+
+	BUG_ON(list_empty(&o->vma_list));
+
+	list_for_each_entry(vma, &o->vma_list, vma_link)
+		if (vma->vm == vm)
+			return vma->node.size;
+
+	return 0;
+}
+
+struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
+				     struct i915_address_space *vm)
+{
+	struct i915_vma *vma;
+	list_for_each_entry(vma, &obj->vma_list, vma_link)
+		if (vma->vm == vm)
+			return vma;
+
+	return NULL;
+}
+
+struct i915_vma *
+i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
+				  struct i915_address_space *vm)
+{
+	struct i915_vma *vma;
+
+	vma = i915_gem_obj_to_vma(obj, vm);
+	if (!vma)
+		vma = i915_gem_vma_create(obj, vm);
+
+	return vma;
+}
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 51b7a21..403309c 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -155,7 +155,7 @@
 
 	if (INTEL_INFO(dev)->gen >= 7) {
 		ret = i915_gem_object_set_cache_level(ctx->obj,
-						      I915_CACHE_LLC_MLC);
+						      I915_CACHE_L3_LLC);
 		/* Failure shouldn't ever happen this early */
 		if (WARN_ON(ret))
 			goto err_out;
@@ -214,7 +214,7 @@
 	 * default context.
 	 */
 	dev_priv->ring[RCS].default_context = ctx;
-	ret = i915_gem_object_pin(ctx->obj, CONTEXT_ALIGN, false, false);
+	ret = i915_gem_obj_ggtt_pin(ctx->obj, CONTEXT_ALIGN, false, false);
 	if (ret) {
 		DRM_DEBUG_DRIVER("Couldn't pin %d\n", ret);
 		goto err_destroy;
@@ -304,31 +304,24 @@
 }
 
 struct i915_ctx_hang_stats *
-i915_gem_context_get_hang_stats(struct intel_ring_buffer *ring,
+i915_gem_context_get_hang_stats(struct drm_device *dev,
 				struct drm_file *file,
 				u32 id)
 {
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
-	struct i915_hw_context *to;
-
-	if (dev_priv->hw_contexts_disabled)
-		return ERR_PTR(-ENOENT);
-
-	if (ring->id != RCS)
-		return ERR_PTR(-EINVAL);
-
-	if (file == NULL)
-		return ERR_PTR(-EINVAL);
+	struct i915_hw_context *ctx;
 
 	if (id == DEFAULT_CONTEXT_ID)
 		return &file_priv->hang_stats;
 
-	to = i915_gem_context_get(file->driver_priv, id);
-	if (to == NULL)
+	ctx = NULL;
+	if (!dev_priv->hw_contexts_disabled)
+		ctx = i915_gem_context_get(file->driver_priv, id);
+	if (ctx == NULL)
 		return ERR_PTR(-ENOENT);
 
-	return &to->hang_stats;
+	return &ctx->hang_stats;
 }
 
 void i915_gem_context_close(struct drm_device *dev, struct drm_file *file)
@@ -377,7 +370,7 @@
 
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_emit(ring, MI_SET_CONTEXT);
-	intel_ring_emit(ring, new_context->obj->gtt_offset |
+	intel_ring_emit(ring, i915_gem_obj_ggtt_offset(new_context->obj) |
 			MI_MM_SPACE_GTT |
 			MI_SAVE_EXT_STATE_EN |
 			MI_RESTORE_EXT_STATE_EN |
@@ -407,7 +400,7 @@
 	if (from == to)
 		return 0;
 
-	ret = i915_gem_object_pin(to->obj, CONTEXT_ALIGN, false, false);
+	ret = i915_gem_obj_ggtt_pin(to->obj, CONTEXT_ALIGN, false, false);
 	if (ret)
 		return ret;
 
@@ -443,7 +436,10 @@
 	 * MI_SET_CONTEXT instead of when the next seqno has completed.
 	 */
 	if (from != NULL) {
+		struct drm_i915_private *dev_priv = from->obj->base.dev->dev_private;
+		struct i915_address_space *ggtt = &dev_priv->gtt.base;
 		from->obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+		list_move_tail(&i915_gem_obj_to_vma(from->obj, ggtt)->mm_list, &ggtt->active_list);
 		i915_gem_object_move_to_active(from->obj, ring);
 		/* As long as MI_SET_CONTEXT is serializing, ie. it flushes the
 		 * whole damn pipeline, we don't need to explicitly mark the
diff --git a/drivers/gpu/drm/i915/i915_gem_debug.c b/drivers/gpu/drm/i915/i915_gem_debug.c
index 582e6a5..775d506 100644
--- a/drivers/gpu/drm/i915/i915_gem_debug.c
+++ b/drivers/gpu/drm/i915/i915_gem_debug.c
@@ -97,7 +97,7 @@
 		}
 	}
 
-	list_for_each_entry(obj, &dev_priv->mm.inactive_list, list) {
+	list_for_each_entry(obj, &i915_gtt_vm->inactive_list, list) {
 		if (obj->base.dev != dev ||
 		    !atomic_read(&obj->base.refcount.refcount)) {
 			DRM_ERROR("freed inactive %p\n", obj);
@@ -115,73 +115,4 @@
 
 	return warned = err;
 }
-#endif /* WATCH_INACTIVE */
-
-#if WATCH_COHERENCY
-void
-i915_gem_object_check_coherency(struct drm_i915_gem_object *obj, int handle)
-{
-	struct drm_device *dev = obj->base.dev;
-	int page;
-	uint32_t *gtt_mapping;
-	uint32_t *backing_map = NULL;
-	int bad_count = 0;
-
-	DRM_INFO("%s: checking coherency of object %p@0x%08x (%d, %zdkb):\n",
-		 __func__, obj, obj->gtt_offset, handle,
-		 obj->size / 1024);
-
-	gtt_mapping = ioremap(dev_priv->mm.gtt_base_addr + obj->gtt_offset,
-			      obj->base.size);
-	if (gtt_mapping == NULL) {
-		DRM_ERROR("failed to map GTT space\n");
-		return;
-	}
-
-	for (page = 0; page < obj->size / PAGE_SIZE; page++) {
-		int i;
-
-		backing_map = kmap_atomic(obj->pages[page]);
-
-		if (backing_map == NULL) {
-			DRM_ERROR("failed to map backing page\n");
-			goto out;
-		}
-
-		for (i = 0; i < PAGE_SIZE / 4; i++) {
-			uint32_t cpuval = backing_map[i];
-			uint32_t gttval = readl(gtt_mapping +
-						page * 1024 + i);
-
-			if (cpuval != gttval) {
-				DRM_INFO("incoherent CPU vs GPU at 0x%08x: "
-					 "0x%08x vs 0x%08x\n",
-					 (int)(obj->gtt_offset +
-					       page * PAGE_SIZE + i * 4),
-					 cpuval, gttval);
-				if (bad_count++ >= 8) {
-					DRM_INFO("...\n");
-					goto out;
-				}
-			}
-		}
-		kunmap_atomic(backing_map);
-		backing_map = NULL;
-	}
-
- out:
-	if (backing_map != NULL)
-		kunmap_atomic(backing_map);
-	iounmap(gtt_mapping);
-
-	/* give syslog time to catch up */
-	msleep(1);
-
-	/* Directly flush the object, since we just loaded values with the CPU
-	 * from the backing pages and we don't want to disturb the cache
-	 * management that we're trying to observe.
-	 */
-
-	i915_gem_clflush_object(obj);
-}
-#endif
+#endif /* WATCH_LIST */
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index dc53a52..e918b05 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -27,10 +27,15 @@
 #include "i915_drv.h"
 #include <linux/dma-buf.h>
 
+static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf)
+{
+	return to_intel_bo(buf->priv);
+}
+
 static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachment,
 					     enum dma_data_direction dir)
 {
-	struct drm_i915_gem_object *obj = attachment->dmabuf->priv;
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf);
 	struct sg_table *st;
 	struct scatterlist *src, *dst;
 	int ret, i;
@@ -85,25 +90,22 @@
 				   struct sg_table *sg,
 				   enum dma_data_direction dir)
 {
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf);
+
+	mutex_lock(&obj->base.dev->struct_mutex);
+
 	dma_unmap_sg(attachment->dev, sg->sgl, sg->nents, dir);
 	sg_free_table(sg);
 	kfree(sg);
-}
 
-static void i915_gem_dmabuf_release(struct dma_buf *dma_buf)
-{
-	struct drm_i915_gem_object *obj = dma_buf->priv;
+	i915_gem_object_unpin_pages(obj);
 
-	if (obj->base.export_dma_buf == dma_buf) {
-		/* drop the reference on the export fd holds */
-		obj->base.export_dma_buf = NULL;
-		drm_gem_object_unreference_unlocked(&obj->base);
-	}
+	mutex_unlock(&obj->base.dev->struct_mutex);
 }
 
 static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
 {
-	struct drm_i915_gem_object *obj = dma_buf->priv;
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
 	struct drm_device *dev = obj->base.dev;
 	struct sg_page_iter sg_iter;
 	struct page **pages;
@@ -151,7 +153,7 @@
 
 static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr)
 {
-	struct drm_i915_gem_object *obj = dma_buf->priv;
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
 	struct drm_device *dev = obj->base.dev;
 	int ret;
 
@@ -194,7 +196,7 @@
 
 static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, size_t start, size_t length, enum dma_data_direction direction)
 {
-	struct drm_i915_gem_object *obj = dma_buf->priv;
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
 	struct drm_device *dev = obj->base.dev;
 	int ret;
 	bool write = (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE);
@@ -211,7 +213,7 @@
 static const struct dma_buf_ops i915_dmabuf_ops =  {
 	.map_dma_buf = i915_gem_map_dma_buf,
 	.unmap_dma_buf = i915_gem_unmap_dma_buf,
-	.release = i915_gem_dmabuf_release,
+	.release = drm_gem_dmabuf_release,
 	.kmap = i915_gem_dmabuf_kmap,
 	.kmap_atomic = i915_gem_dmabuf_kmap_atomic,
 	.kunmap = i915_gem_dmabuf_kunmap,
@@ -225,9 +227,7 @@
 struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
 				      struct drm_gem_object *gem_obj, int flags)
 {
-	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
-
-	return dma_buf_export(obj, &i915_dmabuf_ops, obj->base.size, flags);
+	return dma_buf_export(gem_obj, &i915_dmabuf_ops, gem_obj->size, flags);
 }
 
 static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
@@ -264,7 +264,7 @@
 
 	/* is this one of own objects? */
 	if (dma_buf->ops == &i915_dmabuf_ops) {
-		obj = dma_buf->priv;
+		obj = dma_buf_to_obj(dma_buf);
 		/* is it from our device? */
 		if (obj->base.dev == dev) {
 			/*
@@ -289,12 +289,7 @@
 		goto fail_detach;
 	}
 
-	ret = drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
-	if (ret) {
-		i915_gem_object_free(obj);
-		goto fail_detach;
-	}
-
+	drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
 	i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops);
 	obj->base.import_attach = attach;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index c86d5d9..91b7001 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -32,23 +32,23 @@
 #include "i915_trace.h"
 
 static bool
-mark_free(struct drm_i915_gem_object *obj, struct list_head *unwind)
+mark_free(struct i915_vma *vma, struct list_head *unwind)
 {
-	if (obj->pin_count)
+	if (vma->obj->pin_count)
 		return false;
 
-	list_add(&obj->exec_list, unwind);
-	return drm_mm_scan_add_block(obj->gtt_space);
+	list_add(&vma->exec_list, unwind);
+	return drm_mm_scan_add_block(&vma->node);
 }
 
 int
-i915_gem_evict_something(struct drm_device *dev, int min_size,
-			 unsigned alignment, unsigned cache_level,
+i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
+			 int min_size, unsigned alignment, unsigned cache_level,
 			 bool mappable, bool nonblocking)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct list_head eviction_list, unwind_list;
-	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
 	int ret = 0;
 
 	trace_i915_gem_evict(dev, min_size, alignment, mappable);
@@ -77,17 +77,17 @@
 	 */
 
 	INIT_LIST_HEAD(&unwind_list);
-	if (mappable)
-		drm_mm_init_scan_with_range(&dev_priv->mm.gtt_space,
-					    min_size, alignment, cache_level,
-					    0, dev_priv->gtt.mappable_end);
-	else
-		drm_mm_init_scan(&dev_priv->mm.gtt_space,
-				 min_size, alignment, cache_level);
+	if (mappable) {
+		BUG_ON(!i915_is_ggtt(vm));
+		drm_mm_init_scan_with_range(&vm->mm, min_size,
+					    alignment, cache_level, 0,
+					    dev_priv->gtt.mappable_end);
+	} else
+		drm_mm_init_scan(&vm->mm, min_size, alignment, cache_level);
 
 	/* First see if there is a large enough contiguous idle region... */
-	list_for_each_entry(obj, &dev_priv->mm.inactive_list, mm_list) {
-		if (mark_free(obj, &unwind_list))
+	list_for_each_entry(vma, &vm->inactive_list, mm_list) {
+		if (mark_free(vma, &unwind_list))
 			goto found;
 	}
 
@@ -95,22 +95,21 @@
 		goto none;
 
 	/* Now merge in the soon-to-be-expired objects... */
-	list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) {
-		if (mark_free(obj, &unwind_list))
+	list_for_each_entry(vma, &vm->active_list, mm_list) {
+		if (mark_free(vma, &unwind_list))
 			goto found;
 	}
 
 none:
 	/* Nothing found, clean up and bail out! */
 	while (!list_empty(&unwind_list)) {
-		obj = list_first_entry(&unwind_list,
-				       struct drm_i915_gem_object,
+		vma = list_first_entry(&unwind_list,
+				       struct i915_vma,
 				       exec_list);
-
-		ret = drm_mm_scan_remove_block(obj->gtt_space);
+		ret = drm_mm_scan_remove_block(&vma->node);
 		BUG_ON(ret);
 
-		list_del_init(&obj->exec_list);
+		list_del_init(&vma->exec_list);
 	}
 
 	/* We expect the caller to unpin, evict all and try again, or give up.
@@ -124,27 +123,30 @@
 	 * temporary list. */
 	INIT_LIST_HEAD(&eviction_list);
 	while (!list_empty(&unwind_list)) {
-		obj = list_first_entry(&unwind_list,
-				       struct drm_i915_gem_object,
+		vma = list_first_entry(&unwind_list,
+				       struct i915_vma,
 				       exec_list);
-		if (drm_mm_scan_remove_block(obj->gtt_space)) {
-			list_move(&obj->exec_list, &eviction_list);
-			drm_gem_object_reference(&obj->base);
+		if (drm_mm_scan_remove_block(&vma->node)) {
+			list_move(&vma->exec_list, &eviction_list);
+			drm_gem_object_reference(&vma->obj->base);
 			continue;
 		}
-		list_del_init(&obj->exec_list);
+		list_del_init(&vma->exec_list);
 	}
 
 	/* Unbinding will emit any required flushes */
 	while (!list_empty(&eviction_list)) {
-		obj = list_first_entry(&eviction_list,
-				       struct drm_i915_gem_object,
+		struct drm_gem_object *obj;
+		vma = list_first_entry(&eviction_list,
+				       struct i915_vma,
 				       exec_list);
-		if (ret == 0)
-			ret = i915_gem_object_unbind(obj);
 
-		list_del_init(&obj->exec_list);
-		drm_gem_object_unreference(&obj->base);
+		obj =  &vma->obj->base;
+		list_del_init(&vma->exec_list);
+		if (ret == 0)
+			ret = i915_vma_unbind(vma);
+
+		drm_gem_object_unreference(obj);
 	}
 
 	return ret;
@@ -154,12 +156,18 @@
 i915_gem_evict_everything(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	struct drm_i915_gem_object *obj, *next;
-	bool lists_empty;
+	struct i915_address_space *vm;
+	struct i915_vma *vma, *next;
+	bool lists_empty = true;
 	int ret;
 
-	lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
-		       list_empty(&dev_priv->mm.active_list));
+	list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
+		lists_empty = (list_empty(&vm->inactive_list) &&
+			       list_empty(&vm->active_list));
+		if (!lists_empty)
+			lists_empty = false;
+	}
+
 	if (lists_empty)
 		return -ENOSPC;
 
@@ -176,10 +184,11 @@
 	i915_gem_retire_requests(dev);
 
 	/* Having flushed everything, unbind() should never raise an error */
-	list_for_each_entry_safe(obj, next,
-				 &dev_priv->mm.inactive_list, mm_list)
-		if (obj->pin_count == 0)
-			WARN_ON(i915_gem_object_unbind(obj));
+	list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
+		list_for_each_entry_safe(vma, next, &vm->inactive_list, mm_list)
+			if (vma->obj->pin_count == 0)
+				WARN_ON(i915_vma_unbind(vma));
+	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 87a3227..792c52a 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -172,9 +172,60 @@
 }
 
 static int
+relocate_entry_cpu(struct drm_i915_gem_object *obj,
+		   struct drm_i915_gem_relocation_entry *reloc)
+{
+	uint32_t page_offset = offset_in_page(reloc->offset);
+	char *vaddr;
+	int ret = -EINVAL;
+
+	ret = i915_gem_object_set_to_cpu_domain(obj, 1);
+	if (ret)
+		return ret;
+
+	vaddr = kmap_atomic(i915_gem_object_get_page(obj,
+				reloc->offset >> PAGE_SHIFT));
+	*(uint32_t *)(vaddr + page_offset) = reloc->delta;
+	kunmap_atomic(vaddr);
+
+	return 0;
+}
+
+static int
+relocate_entry_gtt(struct drm_i915_gem_object *obj,
+		   struct drm_i915_gem_relocation_entry *reloc)
+{
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t __iomem *reloc_entry;
+	void __iomem *reloc_page;
+	int ret = -EINVAL;
+
+	ret = i915_gem_object_set_to_gtt_domain(obj, true);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_object_put_fence(obj);
+	if (ret)
+		return ret;
+
+	/* Map the page containing the relocation we're going to perform.  */
+	reloc->offset += i915_gem_obj_ggtt_offset(obj);
+	reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
+			reloc->offset & PAGE_MASK);
+	reloc_entry = (uint32_t __iomem *)
+		(reloc_page + offset_in_page(reloc->offset));
+	iowrite32(reloc->delta, reloc_entry);
+	io_mapping_unmap_atomic(reloc_page);
+
+	return 0;
+}
+
+static int
 i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
 				   struct eb_objects *eb,
-				   struct drm_i915_gem_relocation_entry *reloc)
+				   struct drm_i915_gem_relocation_entry *reloc,
+				   struct i915_address_space *vm)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_gem_object *target_obj;
@@ -188,7 +239,7 @@
 		return -ENOENT;
 
 	target_i915_obj = to_intel_bo(target_obj);
-	target_offset = target_i915_obj->gtt_offset;
+	target_offset = i915_gem_obj_ggtt_offset(target_i915_obj);
 
 	/* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
 	 * pipe_control writes because the gpu doesn't properly redirect them
@@ -254,40 +305,10 @@
 		return -EFAULT;
 
 	reloc->delta += target_offset;
-	if (use_cpu_reloc(obj)) {
-		uint32_t page_offset = reloc->offset & ~PAGE_MASK;
-		char *vaddr;
-
-		ret = i915_gem_object_set_to_cpu_domain(obj, 1);
-		if (ret)
-			return ret;
-
-		vaddr = kmap_atomic(i915_gem_object_get_page(obj,
-							     reloc->offset >> PAGE_SHIFT));
-		*(uint32_t *)(vaddr + page_offset) = reloc->delta;
-		kunmap_atomic(vaddr);
-	} else {
-		struct drm_i915_private *dev_priv = dev->dev_private;
-		uint32_t __iomem *reloc_entry;
-		void __iomem *reloc_page;
-
-		ret = i915_gem_object_set_to_gtt_domain(obj, true);
-		if (ret)
-			return ret;
-
-		ret = i915_gem_object_put_fence(obj);
-		if (ret)
-			return ret;
-
-		/* Map the page containing the relocation we're going to perform.  */
-		reloc->offset += obj->gtt_offset;
-		reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
-						      reloc->offset & PAGE_MASK);
-		reloc_entry = (uint32_t __iomem *)
-			(reloc_page + (reloc->offset & ~PAGE_MASK));
-		iowrite32(reloc->delta, reloc_entry);
-		io_mapping_unmap_atomic(reloc_page);
-	}
+	if (use_cpu_reloc(obj))
+		ret = relocate_entry_cpu(obj, reloc);
+	else
+		ret = relocate_entry_gtt(obj, reloc);
 
 	/* and update the user's relocation entry */
 	reloc->presumed_offset = target_offset;
@@ -297,7 +318,8 @@
 
 static int
 i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
-				    struct eb_objects *eb)
+				    struct eb_objects *eb,
+				    struct i915_address_space *vm)
 {
 #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
 	struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
@@ -321,7 +343,8 @@
 		do {
 			u64 offset = r->presumed_offset;
 
-			ret = i915_gem_execbuffer_relocate_entry(obj, eb, r);
+			ret = i915_gem_execbuffer_relocate_entry(obj, eb, r,
+								 vm);
 			if (ret)
 				return ret;
 
@@ -344,13 +367,15 @@
 static int
 i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
 					 struct eb_objects *eb,
-					 struct drm_i915_gem_relocation_entry *relocs)
+					 struct drm_i915_gem_relocation_entry *relocs,
+					 struct i915_address_space *vm)
 {
 	const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
 	int i, ret;
 
 	for (i = 0; i < entry->relocation_count; i++) {
-		ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]);
+		ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i],
+							 vm);
 		if (ret)
 			return ret;
 	}
@@ -359,7 +384,8 @@
 }
 
 static int
-i915_gem_execbuffer_relocate(struct eb_objects *eb)
+i915_gem_execbuffer_relocate(struct eb_objects *eb,
+			     struct i915_address_space *vm)
 {
 	struct drm_i915_gem_object *obj;
 	int ret = 0;
@@ -373,7 +399,7 @@
 	 */
 	pagefault_disable();
 	list_for_each_entry(obj, &eb->objects, exec_list) {
-		ret = i915_gem_execbuffer_relocate_object(obj, eb);
+		ret = i915_gem_execbuffer_relocate_object(obj, eb, vm);
 		if (ret)
 			break;
 	}
@@ -395,6 +421,7 @@
 static int
 i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
 				   struct intel_ring_buffer *ring,
+				   struct i915_address_space *vm,
 				   bool *need_reloc)
 {
 	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
@@ -409,7 +436,8 @@
 		obj->tiling_mode != I915_TILING_NONE;
 	need_mappable = need_fence || need_reloc_mappable(obj);
 
-	ret = i915_gem_object_pin(obj, entry->alignment, need_mappable, false);
+	ret = i915_gem_object_pin(obj, vm, entry->alignment, need_mappable,
+				  false);
 	if (ret)
 		return ret;
 
@@ -436,8 +464,8 @@
 		obj->has_aliasing_ppgtt_mapping = 1;
 	}
 
-	if (entry->offset != obj->gtt_offset) {
-		entry->offset = obj->gtt_offset;
+	if (entry->offset != i915_gem_obj_offset(obj, vm)) {
+		entry->offset = i915_gem_obj_offset(obj, vm);
 		*need_reloc = true;
 	}
 
@@ -458,7 +486,7 @@
 {
 	struct drm_i915_gem_exec_object2 *entry;
 
-	if (!obj->gtt_space)
+	if (!i915_gem_obj_bound_any(obj))
 		return;
 
 	entry = obj->exec_entry;
@@ -475,6 +503,7 @@
 static int
 i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
 			    struct list_head *objects,
+			    struct i915_address_space *vm,
 			    bool *need_relocs)
 {
 	struct drm_i915_gem_object *obj;
@@ -529,31 +558,37 @@
 		list_for_each_entry(obj, objects, exec_list) {
 			struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
 			bool need_fence, need_mappable;
+			u32 obj_offset;
 
-			if (!obj->gtt_space)
+			if (!i915_gem_obj_bound(obj, vm))
 				continue;
 
+			obj_offset = i915_gem_obj_offset(obj, vm);
 			need_fence =
 				has_fenced_gpu_access &&
 				entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 				obj->tiling_mode != I915_TILING_NONE;
 			need_mappable = need_fence || need_reloc_mappable(obj);
 
-			if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) ||
+			WARN_ON((need_mappable || need_fence) &&
+				!i915_is_ggtt(vm));
+
+			if ((entry->alignment &&
+			     obj_offset & (entry->alignment - 1)) ||
 			    (need_mappable && !obj->map_and_fenceable))
-				ret = i915_gem_object_unbind(obj);
+				ret = i915_vma_unbind(i915_gem_obj_to_vma(obj, vm));
 			else
-				ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs);
+				ret = i915_gem_execbuffer_reserve_object(obj, ring, vm, need_relocs);
 			if (ret)
 				goto err;
 		}
 
 		/* Bind fresh objects */
 		list_for_each_entry(obj, objects, exec_list) {
-			if (obj->gtt_space)
+			if (i915_gem_obj_bound(obj, vm))
 				continue;
 
-			ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs);
+			ret = i915_gem_execbuffer_reserve_object(obj, ring, vm, need_relocs);
 			if (ret)
 				goto err;
 		}
@@ -577,7 +612,8 @@
 				  struct drm_file *file,
 				  struct intel_ring_buffer *ring,
 				  struct eb_objects *eb,
-				  struct drm_i915_gem_exec_object2 *exec)
+				  struct drm_i915_gem_exec_object2 *exec,
+				  struct i915_address_space *vm)
 {
 	struct drm_i915_gem_relocation_entry *reloc;
 	struct drm_i915_gem_object *obj;
@@ -661,14 +697,15 @@
 		goto err;
 
 	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
-	ret = i915_gem_execbuffer_reserve(ring, &eb->objects, &need_relocs);
+	ret = i915_gem_execbuffer_reserve(ring, &eb->objects, vm, &need_relocs);
 	if (ret)
 		goto err;
 
 	list_for_each_entry(obj, &eb->objects, exec_list) {
 		int offset = obj->exec_entry - exec;
 		ret = i915_gem_execbuffer_relocate_object_slow(obj, eb,
-							       reloc + reloc_offset[offset]);
+							       reloc + reloc_offset[offset],
+							       vm);
 		if (ret)
 			goto err;
 	}
@@ -691,6 +728,7 @@
 {
 	struct drm_i915_gem_object *obj;
 	uint32_t flush_domains = 0;
+	bool flush_chipset = false;
 	int ret;
 
 	list_for_each_entry(obj, objects, exec_list) {
@@ -699,12 +737,12 @@
 			return ret;
 
 		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
-			i915_gem_clflush_object(obj);
+			flush_chipset |= i915_gem_clflush_object(obj, false);
 
 		flush_domains |= obj->base.write_domain;
 	}
 
-	if (flush_domains & I915_GEM_DOMAIN_CPU)
+	if (flush_chipset)
 		i915_gem_chipset_flush(ring->dev);
 
 	if (flush_domains & I915_GEM_DOMAIN_GTT)
@@ -758,8 +796,10 @@
 		if (!access_ok(VERIFY_WRITE, ptr, length))
 			return -EFAULT;
 
-		if (fault_in_multipages_readable(ptr, length))
-			return -EFAULT;
+		if (likely(!i915_prefault_disable)) {
+			if (fault_in_multipages_readable(ptr, length))
+				return -EFAULT;
+		}
 	}
 
 	return 0;
@@ -767,6 +807,7 @@
 
 static void
 i915_gem_execbuffer_move_to_active(struct list_head *objects,
+				   struct i915_address_space *vm,
 				   struct intel_ring_buffer *ring)
 {
 	struct drm_i915_gem_object *obj;
@@ -781,6 +822,8 @@
 		obj->base.read_domains = obj->base.pending_read_domains;
 		obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
 
+		/* FIXME: This lookup gets fixed later <-- danvet */
+		list_move_tail(&i915_gem_obj_to_vma(obj, vm)->mm_list, &vm->active_list);
 		i915_gem_object_move_to_active(obj, ring);
 		if (obj->base.write_domain) {
 			obj->dirty = 1;
@@ -835,7 +878,8 @@
 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		       struct drm_file *file,
 		       struct drm_i915_gem_execbuffer2 *args,
-		       struct drm_i915_gem_exec_object2 *exec)
+		       struct drm_i915_gem_exec_object2 *exec,
+		       struct i915_address_space *vm)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct eb_objects *eb;
@@ -872,7 +916,7 @@
 		break;
 	case I915_EXEC_BSD:
 		ring = &dev_priv->ring[VCS];
-		if (ctx_id != 0) {
+		if (ctx_id != DEFAULT_CONTEXT_ID) {
 			DRM_DEBUG("Ring %s doesn't support contexts\n",
 				  ring->name);
 			return -EPERM;
@@ -880,7 +924,7 @@
 		break;
 	case I915_EXEC_BLT:
 		ring = &dev_priv->ring[BCS];
-		if (ctx_id != 0) {
+		if (ctx_id != DEFAULT_CONTEXT_ID) {
 			DRM_DEBUG("Ring %s doesn't support contexts\n",
 				  ring->name);
 			return -EPERM;
@@ -888,7 +932,7 @@
 		break;
 	case I915_EXEC_VEBOX:
 		ring = &dev_priv->ring[VECS];
-		if (ctx_id != 0) {
+		if (ctx_id != DEFAULT_CONTEXT_ID) {
 			DRM_DEBUG("Ring %s doesn't support contexts\n",
 				  ring->name);
 			return -EPERM;
@@ -972,7 +1016,7 @@
 	if (ret)
 		goto pre_mutex_err;
 
-	if (dev_priv->mm.suspended) {
+	if (dev_priv->ums.mm_suspended) {
 		mutex_unlock(&dev->struct_mutex);
 		ret = -EBUSY;
 		goto pre_mutex_err;
@@ -997,17 +1041,17 @@
 
 	/* Move the objects en-masse into the GTT, evicting if necessary. */
 	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
-	ret = i915_gem_execbuffer_reserve(ring, &eb->objects, &need_relocs);
+	ret = i915_gem_execbuffer_reserve(ring, &eb->objects, vm, &need_relocs);
 	if (ret)
 		goto err;
 
 	/* The objects are in their final locations, apply the relocations. */
 	if (need_relocs)
-		ret = i915_gem_execbuffer_relocate(eb);
+		ret = i915_gem_execbuffer_relocate(eb, vm);
 	if (ret) {
 		if (ret == -EFAULT) {
 			ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring,
-								eb, exec);
+								eb, exec, vm);
 			BUG_ON(!mutex_is_locked(&dev->struct_mutex));
 		}
 		if (ret)
@@ -1058,7 +1102,8 @@
 			goto err;
 	}
 
-	exec_start = batch_obj->gtt_offset + args->batch_start_offset;
+	exec_start = i915_gem_obj_offset(batch_obj, vm) +
+		args->batch_start_offset;
 	exec_len = args->batch_len;
 	if (cliprects) {
 		for (i = 0; i < args->num_cliprects; i++) {
@@ -1083,7 +1128,7 @@
 
 	trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
 
-	i915_gem_execbuffer_move_to_active(&eb->objects, ring);
+	i915_gem_execbuffer_move_to_active(&eb->objects, vm, ring);
 	i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
 
 err:
@@ -1104,6 +1149,7 @@
 i915_gem_execbuffer(struct drm_device *dev, void *data,
 		    struct drm_file *file)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_execbuffer *args = data;
 	struct drm_i915_gem_execbuffer2 exec2;
 	struct drm_i915_gem_exec_object *exec_list = NULL;
@@ -1159,7 +1205,8 @@
 	exec2.flags = I915_EXEC_RENDER;
 	i915_execbuffer2_set_context_id(exec2, 0);
 
-	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
+	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list,
+				     &dev_priv->gtt.base);
 	if (!ret) {
 		/* Copy the new buffer offsets back to the user's exec list. */
 		for (i = 0; i < args->buffer_count; i++)
@@ -1185,6 +1232,7 @@
 i915_gem_execbuffer2(struct drm_device *dev, void *data,
 		     struct drm_file *file)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_execbuffer2 *args = data;
 	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
 	int ret;
@@ -1215,7 +1263,8 @@
 		return -EFAULT;
 	}
 
-	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
+	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list,
+				     &dev_priv->gtt.base);
 	if (!ret) {
 		/* Copy the new buffer offsets back to the user's exec list. */
 		ret = copy_to_user(to_user_ptr(args->buffers_ptr),
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 5101ab6..212f6d8 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -28,8 +28,12 @@
 #include "i915_trace.h"
 #include "intel_drv.h"
 
+#define GEN6_PPGTT_PD_ENTRIES 512
+#define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
+
 /* PPGTT stuff */
 #define GEN6_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0xff0))
+#define HSW_GTT_ADDR_ENCODE(addr)	((addr) | (((addr) >> 28) & 0x7f0))
 
 #define GEN6_PDE_VALID			(1 << 0)
 /* gen6+ has bit 11-4 for physical addr bit 39-32 */
@@ -39,19 +43,50 @@
 #define GEN6_PTE_UNCACHED		(1 << 1)
 #define HSW_PTE_UNCACHED		(0)
 #define GEN6_PTE_CACHE_LLC		(2 << 1)
-#define GEN6_PTE_CACHE_LLC_MLC		(3 << 1)
+#define GEN7_PTE_CACHE_L3_LLC		(3 << 1)
 #define GEN6_PTE_ADDR_ENCODE(addr)	GEN6_GTT_ADDR_ENCODE(addr)
+#define HSW_PTE_ADDR_ENCODE(addr)	HSW_GTT_ADDR_ENCODE(addr)
 
-static gen6_gtt_pte_t gen6_pte_encode(struct drm_device *dev,
-				      dma_addr_t addr,
-				      enum i915_cache_level level)
+/* Cacheability Control is a 4-bit value. The low three bits are stored in *
+ * bits 3:1 of the PTE, while the fourth bit is stored in bit 11 of the PTE.
+ */
+#define HSW_CACHEABILITY_CONTROL(bits)	((((bits) & 0x7) << 1) | \
+					 (((bits) & 0x8) << (11 - 3)))
+#define HSW_WB_LLC_AGE3			HSW_CACHEABILITY_CONTROL(0x2)
+#define HSW_WB_LLC_AGE0			HSW_CACHEABILITY_CONTROL(0x3)
+#define HSW_WB_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0xb)
+#define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)
+
+static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
+				     enum i915_cache_level level)
 {
 	gen6_gtt_pte_t pte = GEN6_PTE_VALID;
 	pte |= GEN6_PTE_ADDR_ENCODE(addr);
 
 	switch (level) {
-	case I915_CACHE_LLC_MLC:
-		pte |= GEN6_PTE_CACHE_LLC_MLC;
+	case I915_CACHE_L3_LLC:
+	case I915_CACHE_LLC:
+		pte |= GEN6_PTE_CACHE_LLC;
+		break;
+	case I915_CACHE_NONE:
+		pte |= GEN6_PTE_UNCACHED;
+		break;
+	default:
+		WARN_ON(1);
+	}
+
+	return pte;
+}
+
+static gen6_gtt_pte_t ivb_pte_encode(dma_addr_t addr,
+				     enum i915_cache_level level)
+{
+	gen6_gtt_pte_t pte = GEN6_PTE_VALID;
+	pte |= GEN6_PTE_ADDR_ENCODE(addr);
+
+	switch (level) {
+	case I915_CACHE_L3_LLC:
+		pte |= GEN7_PTE_CACHE_L3_LLC;
 		break;
 	case I915_CACHE_LLC:
 		pte |= GEN6_PTE_CACHE_LLC;
@@ -60,7 +95,7 @@
 		pte |= GEN6_PTE_UNCACHED;
 		break;
 	default:
-		BUG();
+		WARN_ON(1);
 	}
 
 	return pte;
@@ -69,8 +104,7 @@
 #define BYT_PTE_WRITEABLE		(1 << 1)
 #define BYT_PTE_SNOOPED_BY_CPU_CACHES	(1 << 2)
 
-static gen6_gtt_pte_t byt_pte_encode(struct drm_device *dev,
-				     dma_addr_t addr,
+static gen6_gtt_pte_t byt_pte_encode(dma_addr_t addr,
 				     enum i915_cache_level level)
 {
 	gen6_gtt_pte_t pte = GEN6_PTE_VALID;
@@ -87,22 +121,41 @@
 	return pte;
 }
 
-static gen6_gtt_pte_t hsw_pte_encode(struct drm_device *dev,
-				     dma_addr_t addr,
+static gen6_gtt_pte_t hsw_pte_encode(dma_addr_t addr,
 				     enum i915_cache_level level)
 {
 	gen6_gtt_pte_t pte = GEN6_PTE_VALID;
-	pte |= GEN6_PTE_ADDR_ENCODE(addr);
+	pte |= HSW_PTE_ADDR_ENCODE(addr);
 
 	if (level != I915_CACHE_NONE)
-		pte |= GEN6_PTE_CACHE_LLC;
+		pte |= HSW_WB_LLC_AGE3;
+
+	return pte;
+}
+
+static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
+				      enum i915_cache_level level)
+{
+	gen6_gtt_pte_t pte = GEN6_PTE_VALID;
+	pte |= HSW_PTE_ADDR_ENCODE(addr);
+
+	switch (level) {
+	case I915_CACHE_NONE:
+		break;
+	case I915_CACHE_WT:
+		pte |= HSW_WT_ELLC_LLC_AGE0;
+		break;
+	default:
+		pte |= HSW_WB_ELLC_LLC_AGE0;
+		break;
+	}
 
 	return pte;
 }
 
 static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt)
 {
-	struct drm_i915_private *dev_priv = ppgtt->dev->dev_private;
+	struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
 	gen6_gtt_pte_t __iomem *pd_addr;
 	uint32_t pd_entry;
 	int i;
@@ -181,18 +234,18 @@
 }
 
 /* PPGTT support for Sandybdrige/Gen6 and later */
-static void gen6_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
+static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
 				   unsigned first_entry,
 				   unsigned num_entries)
 {
+	struct i915_hw_ppgtt *ppgtt =
+		container_of(vm, struct i915_hw_ppgtt, base);
 	gen6_gtt_pte_t *pt_vaddr, scratch_pte;
 	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
 	unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
 	unsigned last_pte, i;
 
-	scratch_pte = ppgtt->pte_encode(ppgtt->dev,
-					ppgtt->scratch_page_dma_addr,
-					I915_CACHE_LLC);
+	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC);
 
 	while (num_entries) {
 		last_pte = first_pte + num_entries;
@@ -212,11 +265,13 @@
 	}
 }
 
-static void gen6_ppgtt_insert_entries(struct i915_hw_ppgtt *ppgtt,
+static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
 				      struct sg_table *pages,
 				      unsigned first_entry,
 				      enum i915_cache_level cache_level)
 {
+	struct i915_hw_ppgtt *ppgtt =
+		container_of(vm, struct i915_hw_ppgtt, base);
 	gen6_gtt_pte_t *pt_vaddr;
 	unsigned act_pt = first_entry / I915_PPGTT_PT_ENTRIES;
 	unsigned act_pte = first_entry % I915_PPGTT_PT_ENTRIES;
@@ -227,8 +282,7 @@
 		dma_addr_t page_addr;
 
 		page_addr = sg_page_iter_dma_address(&sg_iter);
-		pt_vaddr[act_pte] = ppgtt->pte_encode(ppgtt->dev, page_addr,
-						      cache_level);
+		pt_vaddr[act_pte] = vm->pte_encode(page_addr, cache_level);
 		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
 			kunmap_atomic(pt_vaddr);
 			act_pt++;
@@ -240,13 +294,17 @@
 	kunmap_atomic(pt_vaddr);
 }
 
-static void gen6_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt)
+static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
 {
+	struct i915_hw_ppgtt *ppgtt =
+		container_of(vm, struct i915_hw_ppgtt, base);
 	int i;
 
+	drm_mm_takedown(&ppgtt->base.mm);
+
 	if (ppgtt->pt_dma_addr) {
 		for (i = 0; i < ppgtt->num_pd_entries; i++)
-			pci_unmap_page(ppgtt->dev->pdev,
+			pci_unmap_page(ppgtt->base.dev->pdev,
 				       ppgtt->pt_dma_addr[i],
 				       4096, PCI_DMA_BIDIRECTIONAL);
 	}
@@ -260,7 +318,7 @@
 
 static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 {
-	struct drm_device *dev = ppgtt->dev;
+	struct drm_device *dev = ppgtt->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	unsigned first_pd_entry_in_global_pt;
 	int i;
@@ -271,18 +329,13 @@
 	 * now. */
 	first_pd_entry_in_global_pt = gtt_total_entries(dev_priv->gtt);
 
-	if (IS_HASWELL(dev)) {
-		ppgtt->pte_encode = hsw_pte_encode;
-	} else if (IS_VALLEYVIEW(dev)) {
-		ppgtt->pte_encode = byt_pte_encode;
-	} else {
-		ppgtt->pte_encode = gen6_pte_encode;
-	}
-	ppgtt->num_pd_entries = I915_PPGTT_PD_ENTRIES;
+	ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
+	ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES;
 	ppgtt->enable = gen6_ppgtt_enable;
-	ppgtt->clear_range = gen6_ppgtt_clear_range;
-	ppgtt->insert_entries = gen6_ppgtt_insert_entries;
-	ppgtt->cleanup = gen6_ppgtt_cleanup;
+	ppgtt->base.clear_range = gen6_ppgtt_clear_range;
+	ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
+	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
+	ppgtt->base.scratch = dev_priv->gtt.base.scratch;
 	ppgtt->pt_pages = kzalloc(sizeof(struct page *)*ppgtt->num_pd_entries,
 				  GFP_KERNEL);
 	if (!ppgtt->pt_pages)
@@ -313,8 +366,8 @@
 		ppgtt->pt_dma_addr[i] = pt_addr;
 	}
 
-	ppgtt->clear_range(ppgtt, 0,
-			   ppgtt->num_pd_entries*I915_PPGTT_PT_ENTRIES);
+	ppgtt->base.clear_range(&ppgtt->base, 0,
+				ppgtt->num_pd_entries * I915_PPGTT_PT_ENTRIES);
 
 	ppgtt->pd_offset = first_pd_entry_in_global_pt * sizeof(gen6_gtt_pte_t);
 
@@ -347,8 +400,7 @@
 	if (!ppgtt)
 		return -ENOMEM;
 
-	ppgtt->dev = dev;
-	ppgtt->scratch_page_dma_addr = dev_priv->gtt.scratch_page_dma;
+	ppgtt->base.dev = dev;
 
 	if (INTEL_INFO(dev)->gen < 8)
 		ret = gen6_ppgtt_init(ppgtt);
@@ -357,8 +409,11 @@
 
 	if (ret)
 		kfree(ppgtt);
-	else
+	else {
 		dev_priv->mm.aliasing_ppgtt = ppgtt;
+		drm_mm_init(&ppgtt->base.mm, ppgtt->base.start,
+			    ppgtt->base.total);
+	}
 
 	return ret;
 }
@@ -371,7 +426,7 @@
 	if (!ppgtt)
 		return;
 
-	ppgtt->cleanup(ppgtt);
+	ppgtt->base.cleanup(&ppgtt->base);
 	dev_priv->mm.aliasing_ppgtt = NULL;
 }
 
@@ -379,17 +434,17 @@
 			    struct drm_i915_gem_object *obj,
 			    enum i915_cache_level cache_level)
 {
-	ppgtt->insert_entries(ppgtt, obj->pages,
-			      obj->gtt_space->start >> PAGE_SHIFT,
-			      cache_level);
+	ppgtt->base.insert_entries(&ppgtt->base, obj->pages,
+				   i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
+				   cache_level);
 }
 
 void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
 			      struct drm_i915_gem_object *obj)
 {
-	ppgtt->clear_range(ppgtt,
-			   obj->gtt_space->start >> PAGE_SHIFT,
-			   obj->base.size >> PAGE_SHIFT);
+	ppgtt->base.clear_range(&ppgtt->base,
+				i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT,
+				obj->base.size >> PAGE_SHIFT);
 }
 
 extern int intel_iommu_gfx_mapped;
@@ -436,11 +491,12 @@
 	struct drm_i915_gem_object *obj;
 
 	/* First fill our portion of the GTT with scratch pages */
-	dev_priv->gtt.gtt_clear_range(dev, dev_priv->gtt.start / PAGE_SIZE,
-				      dev_priv->gtt.total / PAGE_SIZE);
+	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
+				       dev_priv->gtt.base.start / PAGE_SIZE,
+				       dev_priv->gtt.base.total / PAGE_SIZE);
 
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
-		i915_gem_clflush_object(obj);
+		i915_gem_clflush_object(obj, obj->pin_display);
 		i915_gem_gtt_bind_object(obj, obj->cache_level);
 	}
 
@@ -466,12 +522,12 @@
  * within the global GTT as well as accessible by the GPU through the GMADR
  * mapped BAR (dev_priv->mm.gtt->gtt).
  */
-static void gen6_ggtt_insert_entries(struct drm_device *dev,
+static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *st,
 				     unsigned int first_entry,
 				     enum i915_cache_level level)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = vm->dev->dev_private;
 	gen6_gtt_pte_t __iomem *gtt_entries =
 		(gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
 	int i = 0;
@@ -480,8 +536,7 @@
 
 	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
 		addr = sg_page_iter_dma_address(&sg_iter);
-		iowrite32(dev_priv->gtt.pte_encode(dev, addr, level),
-			  &gtt_entries[i]);
+		iowrite32(vm->pte_encode(addr, level), &gtt_entries[i]);
 		i++;
 	}
 
@@ -492,8 +547,8 @@
 	 * hardware should work, we must keep this posting read for paranoia.
 	 */
 	if (i != 0)
-		WARN_ON(readl(&gtt_entries[i-1])
-			!= dev_priv->gtt.pte_encode(dev, addr, level));
+		WARN_ON(readl(&gtt_entries[i-1]) !=
+			vm->pte_encode(addr, level));
 
 	/* This next bit makes the above posting read even more important. We
 	 * want to flush the TLBs only after we're certain all the PTE updates
@@ -503,11 +558,11 @@
 	POSTING_READ(GFX_FLSH_CNTL_GEN6);
 }
 
-static void gen6_ggtt_clear_range(struct drm_device *dev,
+static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 				  unsigned int first_entry,
 				  unsigned int num_entries)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = vm->dev->dev_private;
 	gen6_gtt_pte_t scratch_pte, __iomem *gtt_base =
 		(gen6_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
 	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
@@ -518,16 +573,14 @@
 		 first_entry, num_entries, max_entries))
 		num_entries = max_entries;
 
-	scratch_pte = dev_priv->gtt.pte_encode(dev,
-					       dev_priv->gtt.scratch_page_dma,
-					       I915_CACHE_LLC);
+	scratch_pte = vm->pte_encode(vm->scratch.addr, I915_CACHE_LLC);
 	for (i = 0; i < num_entries; i++)
 		iowrite32(scratch_pte, &gtt_base[i]);
 	readl(gtt_base);
 }
 
 
-static void i915_ggtt_insert_entries(struct drm_device *dev,
+static void i915_ggtt_insert_entries(struct i915_address_space *vm,
 				     struct sg_table *st,
 				     unsigned int pg_start,
 				     enum i915_cache_level cache_level)
@@ -539,7 +592,7 @@
 
 }
 
-static void i915_ggtt_clear_range(struct drm_device *dev,
+static void i915_ggtt_clear_range(struct i915_address_space *vm,
 				  unsigned int first_entry,
 				  unsigned int num_entries)
 {
@@ -552,10 +605,11 @@
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT;
 
-	dev_priv->gtt.gtt_insert_entries(dev, obj->pages,
-					 obj->gtt_space->start >> PAGE_SHIFT,
-					 cache_level);
+	dev_priv->gtt.base.insert_entries(&dev_priv->gtt.base, obj->pages,
+					  entry,
+					  cache_level);
 
 	obj->has_global_gtt_mapping = 1;
 }
@@ -564,10 +618,11 @@
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	const unsigned long entry = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT;
 
-	dev_priv->gtt.gtt_clear_range(obj->base.dev,
-				      obj->gtt_space->start >> PAGE_SHIFT,
-				      obj->base.size >> PAGE_SHIFT);
+	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
+				       entry,
+				       obj->base.size >> PAGE_SHIFT);
 
 	obj->has_global_gtt_mapping = 0;
 }
@@ -618,7 +673,8 @@
 	 * aperture.  One page should be enough to keep any prefetching inside
 	 * of the aperture.
 	 */
-	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_address_space *ggtt_vm = &dev_priv->gtt.base;
 	struct drm_mm_node *entry;
 	struct drm_i915_gem_object *obj;
 	unsigned long hole_start, hole_end;
@@ -626,37 +682,38 @@
 	BUG_ON(mappable_end > end);
 
 	/* Subtract the guard page ... */
-	drm_mm_init(&dev_priv->mm.gtt_space, start, end - start - PAGE_SIZE);
+	drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE);
 	if (!HAS_LLC(dev))
-		dev_priv->mm.gtt_space.color_adjust = i915_gtt_color_adjust;
+		dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust;
 
 	/* Mark any preallocated objects as occupied */
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
-		DRM_DEBUG_KMS("reserving preallocated space: %x + %zx\n",
-			      obj->gtt_offset, obj->base.size);
+		struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
+		int ret;
+		DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n",
+			      i915_gem_obj_ggtt_offset(obj), obj->base.size);
 
-		BUG_ON(obj->gtt_space != I915_GTT_RESERVED);
-		obj->gtt_space = drm_mm_create_block(&dev_priv->mm.gtt_space,
-						     obj->gtt_offset,
-						     obj->base.size,
-						     false);
+		WARN_ON(i915_gem_obj_ggtt_bound(obj));
+		ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node);
+		if (ret)
+			DRM_DEBUG_KMS("Reservation failed\n");
 		obj->has_global_gtt_mapping = 1;
+		list_add(&vma->vma_link, &obj->vma_list);
 	}
 
-	dev_priv->gtt.start = start;
-	dev_priv->gtt.total = end - start;
+	dev_priv->gtt.base.start = start;
+	dev_priv->gtt.base.total = end - start;
 
 	/* Clear any non-preallocated blocks */
-	drm_mm_for_each_hole(entry, &dev_priv->mm.gtt_space,
-			     hole_start, hole_end) {
+	drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) {
+		const unsigned long count = (hole_end - hole_start) / PAGE_SIZE;
 		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
 			      hole_start, hole_end);
-		dev_priv->gtt.gtt_clear_range(dev, hole_start / PAGE_SIZE,
-					      (hole_end-hole_start) / PAGE_SIZE);
+		ggtt_vm->clear_range(ggtt_vm, hole_start / PAGE_SIZE, count);
 	}
 
 	/* And finally clear the reserved guard page */
-	dev_priv->gtt.gtt_clear_range(dev, end / PAGE_SIZE - 1, 1);
+	ggtt_vm->clear_range(ggtt_vm, end / PAGE_SIZE - 1, 1);
 }
 
 static bool
@@ -679,7 +736,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	unsigned long gtt_size, mappable_size;
 
-	gtt_size = dev_priv->gtt.total;
+	gtt_size = dev_priv->gtt.base.total;
 	mappable_size = dev_priv->gtt.mappable_end;
 
 	if (intel_enable_ppgtt(dev) && HAS_ALIASING_PPGTT(dev)) {
@@ -688,7 +745,7 @@
 		if (INTEL_INFO(dev)->gen <= 7) {
 			/* PPGTT pdes are stolen from global gtt ptes, so shrink the
 			 * aperture accordingly when using aliasing ppgtt. */
-			gtt_size -= I915_PPGTT_PD_ENTRIES*PAGE_SIZE;
+			gtt_size -= GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE;
 		}
 
 		i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
@@ -698,8 +755,8 @@
 			return;
 
 		DRM_ERROR("Aliased PPGTT setup failed %d\n", ret);
-		drm_mm_takedown(&dev_priv->mm.gtt_space);
-		gtt_size += I915_PPGTT_PD_ENTRIES*PAGE_SIZE;
+		drm_mm_takedown(&dev_priv->gtt.base.mm);
+		gtt_size += GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE;
 	}
 	i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
 }
@@ -724,8 +781,8 @@
 #else
 	dma_addr = page_to_phys(page);
 #endif
-	dev_priv->gtt.scratch_page = page;
-	dev_priv->gtt.scratch_page_dma = dma_addr;
+	dev_priv->gtt.base.scratch.page = page;
+	dev_priv->gtt.base.scratch.addr = dma_addr;
 
 	return 0;
 }
@@ -733,11 +790,13 @@
 static void teardown_scratch_page(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	set_pages_wb(dev_priv->gtt.scratch_page, 1);
-	pci_unmap_page(dev->pdev, dev_priv->gtt.scratch_page_dma,
+	struct page *page = dev_priv->gtt.base.scratch.page;
+
+	set_pages_wb(page, 1);
+	pci_unmap_page(dev->pdev, dev_priv->gtt.base.scratch.addr,
 		       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-	put_page(dev_priv->gtt.scratch_page);
-	__free_page(dev_priv->gtt.scratch_page);
+	put_page(page);
+	__free_page(page);
 }
 
 static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
@@ -800,17 +859,18 @@
 	if (ret)
 		DRM_ERROR("Scratch setup failed\n");
 
-	dev_priv->gtt.gtt_clear_range = gen6_ggtt_clear_range;
-	dev_priv->gtt.gtt_insert_entries = gen6_ggtt_insert_entries;
+	dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
+	dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
 
 	return ret;
 }
 
-static void gen6_gmch_remove(struct drm_device *dev)
+static void gen6_gmch_remove(struct i915_address_space *vm)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	iounmap(dev_priv->gtt.gsm);
-	teardown_scratch_page(dev_priv->dev);
+
+	struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base);
+	iounmap(gtt->gsm);
+	teardown_scratch_page(vm->dev);
 }
 
 static int i915_gmch_probe(struct drm_device *dev,
@@ -831,13 +891,13 @@
 	intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
 
 	dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
-	dev_priv->gtt.gtt_clear_range = i915_ggtt_clear_range;
-	dev_priv->gtt.gtt_insert_entries = i915_ggtt_insert_entries;
+	dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
+	dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries;
 
 	return 0;
 }
 
-static void i915_gmch_remove(struct drm_device *dev)
+static void i915_gmch_remove(struct i915_address_space *vm)
 {
 	intel_gmch_remove();
 }
@@ -849,34 +909,35 @@
 	int ret;
 
 	if (INTEL_INFO(dev)->gen <= 5) {
-		dev_priv->gtt.gtt_probe = i915_gmch_probe;
-		dev_priv->gtt.gtt_remove = i915_gmch_remove;
+		gtt->gtt_probe = i915_gmch_probe;
+		gtt->base.cleanup = i915_gmch_remove;
 	} else {
-		dev_priv->gtt.gtt_probe = gen6_gmch_probe;
-		dev_priv->gtt.gtt_remove = gen6_gmch_remove;
-		if (IS_HASWELL(dev)) {
-			dev_priv->gtt.pte_encode = hsw_pte_encode;
-		} else if (IS_VALLEYVIEW(dev)) {
-			dev_priv->gtt.pte_encode = byt_pte_encode;
-		} else {
-			dev_priv->gtt.pte_encode = gen6_pte_encode;
-		}
+		gtt->gtt_probe = gen6_gmch_probe;
+		gtt->base.cleanup = gen6_gmch_remove;
+		if (IS_HASWELL(dev) && dev_priv->ellc_size)
+			gtt->base.pte_encode = iris_pte_encode;
+		else if (IS_HASWELL(dev))
+			gtt->base.pte_encode = hsw_pte_encode;
+		else if (IS_VALLEYVIEW(dev))
+			gtt->base.pte_encode = byt_pte_encode;
+		else if (INTEL_INFO(dev)->gen >= 7)
+			gtt->base.pte_encode = ivb_pte_encode;
+		else
+			gtt->base.pte_encode = snb_pte_encode;
 	}
 
-	ret = dev_priv->gtt.gtt_probe(dev, &dev_priv->gtt.total,
-				     &dev_priv->gtt.stolen_size,
-				     &gtt->mappable_base,
-				     &gtt->mappable_end);
+	ret = gtt->gtt_probe(dev, &gtt->base.total, &gtt->stolen_size,
+			     &gtt->mappable_base, &gtt->mappable_end);
 	if (ret)
 		return ret;
 
+	gtt->base.dev = dev;
+
 	/* GMADR is the PCI mmio aperture into the global GTT. */
 	DRM_INFO("Memory usable by graphics device = %zdM\n",
-		 dev_priv->gtt.total >> 20);
-	DRM_DEBUG_DRIVER("GMADR size = %ldM\n",
-			 dev_priv->gtt.mappable_end >> 20);
-	DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n",
-			 dev_priv->gtt.stolen_size >> 20);
+		 gtt->base.total >> 20);
+	DRM_DEBUG_DRIVER("GMADR size = %ldM\n", gtt->mappable_end >> 20);
+	DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 982d473..9969d10b 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -45,49 +45,48 @@
 static unsigned long i915_stolen_to_physical(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct pci_dev *pdev = dev_priv->bridge_dev;
+	struct resource *r;
 	u32 base;
 
-	/* On the machines I have tested the Graphics Base of Stolen Memory
-	 * is unreliable, so on those compute the base by subtracting the
-	 * stolen memory from the Top of Low Usable DRAM which is where the
-	 * BIOS places the graphics stolen memory.
+	/* Almost universally we can find the Graphics Base of Stolen Memory
+	 * at offset 0x5c in the igfx configuration space. On a few (desktop)
+	 * machines this is also mirrored in the bridge device at different
+	 * locations, or in the MCHBAR. On gen2, the layout is again slightly
+	 * different with the Graphics Segment immediately following Top of
+	 * Memory (or Top of Usable DRAM). Note it appears that TOUD is only
+	 * reported by 865g, so we just use the top of memory as determined
+	 * by the e820 probe.
 	 *
-	 * On gen2, the layout is slightly different with the Graphics Segment
-	 * immediately following Top of Memory (or Top of Usable DRAM). Note
-	 * it appears that TOUD is only reported by 865g, so we just use the
-	 * top of memory as determined by the e820 probe.
-	 *
-	 * XXX gen2 requires an unavailable symbol and 945gm fails with
-	 * its value of TOLUD.
+	 * XXX However gen2 requires an unavailable symbol.
 	 */
 	base = 0;
-	if (IS_VALLEYVIEW(dev)) {
+	if (INTEL_INFO(dev)->gen >= 3) {
+		/* Read Graphics Base of Stolen Memory directly */
 		pci_read_config_dword(dev->pdev, 0x5c, &base);
 		base &= ~((1<<20) - 1);
-	} else if (INTEL_INFO(dev)->gen >= 6) {
-		/* Read Base Data of Stolen Memory Register (BDSM) directly.
-		 * Note that there is also a MCHBAR miror at 0x1080c0 or
-		 * we could use device 2:0x5c instead.
-		*/
-		pci_read_config_dword(pdev, 0xB0, &base);
-		base &= ~4095; /* lower bits used for locking register */
-	} else if (INTEL_INFO(dev)->gen > 3 || IS_G33(dev)) {
-		/* Read Graphics Base of Stolen Memory directly */
-		pci_read_config_dword(pdev, 0xA4, &base);
+	} else { /* GEN2 */
 #if 0
-	} else if (IS_GEN3(dev)) {
-		u8 val;
-		/* Stolen is immediately below Top of Low Usable DRAM */
-		pci_read_config_byte(pdev, 0x9c, &val);
-		base = val >> 3 << 27;
-		base -= dev_priv->mm.gtt->stolen_size;
-	} else {
 		/* Stolen is immediately above Top of Memory */
 		base = max_low_pfn_mapped << PAGE_SHIFT;
 #endif
 	}
 
+	if (base == 0)
+		return 0;
+
+	/* Verify that nothing else uses this physical address. Stolen
+	 * memory should be reserved by the BIOS and hidden from the
+	 * kernel. So if the region is already marked as busy, something
+	 * is seriously wrong.
+	 */
+	r = devm_request_mem_region(dev->dev, base, dev_priv->gtt.stolen_size,
+				    "Graphics Stolen Memory");
+	if (r == NULL) {
+		DRM_ERROR("conflict detected with stolen region: [0x%08x - 0x%08x]\n",
+			  base, base + (uint32_t)dev_priv->gtt.stolen_size);
+		base = 0;
+	}
+
 	return base;
 }
 
@@ -95,32 +94,37 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_mm_node *compressed_fb, *uninitialized_var(compressed_llb);
+	int ret;
+
+	compressed_fb = kzalloc(sizeof(*compressed_fb), GFP_KERNEL);
+	if (!compressed_fb)
+		goto err_llb;
 
 	/* Try to over-allocate to reduce reallocations and fragmentation */
-	compressed_fb = drm_mm_search_free(&dev_priv->mm.stolen,
-					   size <<= 1, 4096, 0);
-	if (!compressed_fb)
-		compressed_fb = drm_mm_search_free(&dev_priv->mm.stolen,
-						   size >>= 1, 4096, 0);
-	if (compressed_fb)
-		compressed_fb = drm_mm_get_block(compressed_fb, size, 4096);
-	if (!compressed_fb)
-		goto err;
+	ret = drm_mm_insert_node(&dev_priv->mm.stolen, compressed_fb,
+				 size <<= 1, 4096, DRM_MM_SEARCH_DEFAULT);
+	if (ret)
+		ret = drm_mm_insert_node(&dev_priv->mm.stolen, compressed_fb,
+					 size >>= 1, 4096,
+					 DRM_MM_SEARCH_DEFAULT);
+	if (ret)
+		goto err_llb;
 
 	if (HAS_PCH_SPLIT(dev))
 		I915_WRITE(ILK_DPFC_CB_BASE, compressed_fb->start);
 	else if (IS_GM45(dev)) {
 		I915_WRITE(DPFC_CB_BASE, compressed_fb->start);
 	} else {
-		compressed_llb = drm_mm_search_free(&dev_priv->mm.stolen,
-						    4096, 4096, 0);
-		if (compressed_llb)
-			compressed_llb = drm_mm_get_block(compressed_llb,
-							  4096, 4096);
+		compressed_llb = kzalloc(sizeof(*compressed_llb), GFP_KERNEL);
 		if (!compressed_llb)
 			goto err_fb;
 
-		dev_priv->compressed_llb = compressed_llb;
+		ret = drm_mm_insert_node(&dev_priv->mm.stolen, compressed_llb,
+					 4096, 4096, DRM_MM_SEARCH_DEFAULT);
+		if (ret)
+			goto err_fb;
+
+		dev_priv->fbc.compressed_llb = compressed_llb;
 
 		I915_WRITE(FBC_CFB_BASE,
 			   dev_priv->mm.stolen_base + compressed_fb->start);
@@ -128,8 +132,8 @@
 			   dev_priv->mm.stolen_base + compressed_llb->start);
 	}
 
-	dev_priv->compressed_fb = compressed_fb;
-	dev_priv->cfb_size = size;
+	dev_priv->fbc.compressed_fb = compressed_fb;
+	dev_priv->fbc.size = size;
 
 	DRM_DEBUG_KMS("reserved %d bytes of contiguous stolen space for FBC\n",
 		      size);
@@ -137,8 +141,10 @@
 	return 0;
 
 err_fb:
-	drm_mm_put_block(compressed_fb);
-err:
+	kfree(compressed_llb);
+	drm_mm_remove_node(compressed_fb);
+err_llb:
+	kfree(compressed_fb);
 	pr_info_once("drm: not enough stolen space for compressed buffer (need %d more bytes), disabling. Hint: you may be able to increase stolen memory size in the BIOS to avoid this.\n", size);
 	return -ENOSPC;
 }
@@ -150,7 +156,7 @@
 	if (!drm_mm_initialized(&dev_priv->mm.stolen))
 		return -ENODEV;
 
-	if (size < dev_priv->cfb_size)
+	if (size < dev_priv->fbc.size)
 		return 0;
 
 	/* Release any current block */
@@ -163,16 +169,20 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	if (dev_priv->cfb_size == 0)
+	if (dev_priv->fbc.size == 0)
 		return;
 
-	if (dev_priv->compressed_fb)
-		drm_mm_put_block(dev_priv->compressed_fb);
+	if (dev_priv->fbc.compressed_fb) {
+		drm_mm_remove_node(dev_priv->fbc.compressed_fb);
+		kfree(dev_priv->fbc.compressed_fb);
+	}
 
-	if (dev_priv->compressed_llb)
-		drm_mm_put_block(dev_priv->compressed_llb);
+	if (dev_priv->fbc.compressed_llb) {
+		drm_mm_remove_node(dev_priv->fbc.compressed_llb);
+		kfree(dev_priv->fbc.compressed_llb);
+	}
 
-	dev_priv->cfb_size = 0;
+	dev_priv->fbc.size = 0;
 }
 
 void i915_gem_cleanup_stolen(struct drm_device *dev)
@@ -201,6 +211,9 @@
 	if (IS_VALLEYVIEW(dev))
 		bios_reserved = 1024*1024; /* top 1M on VLV/BYT */
 
+	if (WARN_ON(bios_reserved > dev_priv->gtt.stolen_size))
+		return 0;
+
 	/* Basic memrange allocator for stolen space */
 	drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->gtt.stolen_size -
 		    bios_reserved);
@@ -271,9 +284,7 @@
 	if (obj == NULL)
 		return NULL;
 
-	if (drm_gem_private_object_init(dev, &obj->base, stolen->size))
-		goto cleanup;
-
+	drm_gem_private_object_init(dev, &obj->base, stolen->size);
 	i915_gem_object_init(obj, &i915_gem_object_stolen_ops);
 
 	obj->pages = i915_pages_create_for_stolen(dev,
@@ -285,9 +296,8 @@
 	i915_gem_object_pin_pages(obj);
 	obj->stolen = stolen;
 
-	obj->base.write_domain = I915_GEM_DOMAIN_GTT;
-	obj->base.read_domains = I915_GEM_DOMAIN_GTT;
-	obj->cache_level = I915_CACHE_NONE;
+	obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
+	obj->cache_level = HAS_LLC(dev) ? I915_CACHE_LLC : I915_CACHE_NONE;
 
 	return obj;
 
@@ -302,6 +312,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_i915_gem_object *obj;
 	struct drm_mm_node *stolen;
+	int ret;
 
 	if (!drm_mm_initialized(&dev_priv->mm.stolen))
 		return NULL;
@@ -310,17 +321,23 @@
 	if (size == 0)
 		return NULL;
 
-	stolen = drm_mm_search_free(&dev_priv->mm.stolen, size, 4096, 0);
-	if (stolen)
-		stolen = drm_mm_get_block(stolen, size, 4096);
-	if (stolen == NULL)
+	stolen = kzalloc(sizeof(*stolen), GFP_KERNEL);
+	if (!stolen)
 		return NULL;
 
+	ret = drm_mm_insert_node(&dev_priv->mm.stolen, stolen, size,
+				 4096, DRM_MM_SEARCH_DEFAULT);
+	if (ret) {
+		kfree(stolen);
+		return NULL;
+	}
+
 	obj = _i915_gem_object_create_stolen(dev, stolen);
 	if (obj)
 		return obj;
 
-	drm_mm_put_block(stolen);
+	drm_mm_remove_node(stolen);
+	kfree(stolen);
 	return NULL;
 }
 
@@ -331,8 +348,11 @@
 					       u32 size)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_address_space *ggtt = &dev_priv->gtt.base;
 	struct drm_i915_gem_object *obj;
 	struct drm_mm_node *stolen;
+	struct i915_vma *vma;
+	int ret;
 
 	if (!drm_mm_initialized(&dev_priv->mm.stolen))
 		return NULL;
@@ -347,56 +367,74 @@
 	if (WARN_ON(size == 0))
 		return NULL;
 
-	stolen = drm_mm_create_block(&dev_priv->mm.stolen,
-				     stolen_offset, size,
-				     false);
-	if (stolen == NULL) {
+	stolen = kzalloc(sizeof(*stolen), GFP_KERNEL);
+	if (!stolen)
+		return NULL;
+
+	stolen->start = stolen_offset;
+	stolen->size = size;
+	ret = drm_mm_reserve_node(&dev_priv->mm.stolen, stolen);
+	if (ret) {
 		DRM_DEBUG_KMS("failed to allocate stolen space\n");
+		kfree(stolen);
 		return NULL;
 	}
 
 	obj = _i915_gem_object_create_stolen(dev, stolen);
 	if (obj == NULL) {
 		DRM_DEBUG_KMS("failed to allocate stolen object\n");
-		drm_mm_put_block(stolen);
+		drm_mm_remove_node(stolen);
+		kfree(stolen);
 		return NULL;
 	}
 
 	/* Some objects just need physical mem from stolen space */
-	if (gtt_offset == -1)
+	if (gtt_offset == I915_GTT_OFFSET_NONE)
 		return obj;
 
+	vma = i915_gem_vma_create(obj, ggtt);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto err_out;
+	}
+
 	/* To simplify the initialisation sequence between KMS and GTT,
 	 * we allow construction of the stolen object prior to
 	 * setting up the GTT space. The actual reservation will occur
 	 * later.
 	 */
-	if (drm_mm_initialized(&dev_priv->mm.gtt_space)) {
-		obj->gtt_space = drm_mm_create_block(&dev_priv->mm.gtt_space,
-						     gtt_offset, size,
-						     false);
-		if (obj->gtt_space == NULL) {
+	vma->node.start = gtt_offset;
+	vma->node.size = size;
+	if (drm_mm_initialized(&ggtt->mm)) {
+		ret = drm_mm_reserve_node(&ggtt->mm, &vma->node);
+		if (ret) {
 			DRM_DEBUG_KMS("failed to allocate stolen GTT space\n");
-			drm_gem_object_unreference(&obj->base);
-			return NULL;
+			goto err_vma;
 		}
-	} else
-		obj->gtt_space = I915_GTT_RESERVED;
+	}
 
-	obj->gtt_offset = gtt_offset;
 	obj->has_global_gtt_mapping = 1;
 
 	list_add_tail(&obj->global_list, &dev_priv->mm.bound_list);
-	list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
+	list_add_tail(&vma->mm_list, &ggtt->inactive_list);
 
 	return obj;
+
+err_vma:
+	i915_gem_vma_destroy(vma);
+err_out:
+	drm_mm_remove_node(stolen);
+	kfree(stolen);
+	drm_gem_object_unreference(&obj->base);
+	return NULL;
 }
 
 void
 i915_gem_object_release_stolen(struct drm_i915_gem_object *obj)
 {
 	if (obj->stolen) {
-		drm_mm_put_block(obj->stolen);
+		drm_mm_remove_node(obj->stolen);
+		kfree(obj->stolen);
 		obj->stolen = NULL;
 	}
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 537545b..032e9ef 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -268,18 +268,18 @@
 		return true;
 
 	if (INTEL_INFO(obj->base.dev)->gen == 3) {
-		if (obj->gtt_offset & ~I915_FENCE_START_MASK)
+		if (i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK)
 			return false;
 	} else {
-		if (obj->gtt_offset & ~I830_FENCE_START_MASK)
+		if (i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK)
 			return false;
 	}
 
 	size = i915_gem_get_gtt_size(obj->base.dev, obj->base.size, tiling_mode);
-	if (obj->gtt_space->size != size)
+	if (i915_gem_obj_ggtt_size(obj) != size)
 		return false;
 
-	if (obj->gtt_offset & (size - 1))
+	if (i915_gem_obj_ggtt_offset(obj) & (size - 1))
 		return false;
 
 	return true;
@@ -359,18 +359,19 @@
 		 */
 
 		obj->map_and_fenceable =
-			obj->gtt_space == NULL ||
-			(obj->gtt_offset + obj->base.size <= dev_priv->gtt.mappable_end &&
+			!i915_gem_obj_ggtt_bound(obj) ||
+			(i915_gem_obj_ggtt_offset(obj) +
+			 obj->base.size <= dev_priv->gtt.mappable_end &&
 			 i915_gem_object_fence_ok(obj, args->tiling_mode));
 
 		/* Rebind if we need a change of alignment */
 		if (!obj->map_and_fenceable) {
-			u32 unfenced_alignment =
+			u32 unfenced_align =
 				i915_gem_get_gtt_alignment(dev, obj->base.size,
 							    args->tiling_mode,
 							    false);
-			if (obj->gtt_offset & (unfenced_alignment - 1))
-				ret = i915_gem_object_unbind(obj);
+			if (i915_gem_obj_ggtt_offset(obj) & (unfenced_align - 1))
+				ret = i915_gem_object_ggtt_unbind(obj);
 		}
 
 		if (ret == 0) {
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
new file mode 100644
index 0000000..558e568
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -0,0 +1,1019 @@
+/*
+ * Copyright (c) 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Eric Anholt <eric@anholt.net>
+ *    Keith Packard <keithp@keithp.com>
+ *    Mika Kuoppala <mika.kuoppala@intel.com>
+ *
+ */
+
+#include <generated/utsrelease.h>
+#include "i915_drv.h"
+
+static const char *yesno(int v)
+{
+	return v ? "yes" : "no";
+}
+
+static const char *ring_str(int ring)
+{
+	switch (ring) {
+	case RCS: return "render";
+	case VCS: return "bsd";
+	case BCS: return "blt";
+	case VECS: return "vebox";
+	default: return "";
+	}
+}
+
+static const char *pin_flag(int pinned)
+{
+	if (pinned > 0)
+		return " P";
+	else if (pinned < 0)
+		return " p";
+	else
+		return "";
+}
+
+static const char *tiling_flag(int tiling)
+{
+	switch (tiling) {
+	default:
+	case I915_TILING_NONE: return "";
+	case I915_TILING_X: return " X";
+	case I915_TILING_Y: return " Y";
+	}
+}
+
+static const char *dirty_flag(int dirty)
+{
+	return dirty ? " dirty" : "";
+}
+
+static const char *purgeable_flag(int purgeable)
+{
+	return purgeable ? " purgeable" : "";
+}
+
+static bool __i915_error_ok(struct drm_i915_error_state_buf *e)
+{
+
+	if (!e->err && WARN(e->bytes > (e->size - 1), "overflow")) {
+		e->err = -ENOSPC;
+		return false;
+	}
+
+	if (e->bytes == e->size - 1 || e->err)
+		return false;
+
+	return true;
+}
+
+static bool __i915_error_seek(struct drm_i915_error_state_buf *e,
+			      unsigned len)
+{
+	if (e->pos + len <= e->start) {
+		e->pos += len;
+		return false;
+	}
+
+	/* First vsnprintf needs to fit in its entirety for memmove */
+	if (len >= e->size) {
+		e->err = -EIO;
+		return false;
+	}
+
+	return true;
+}
+
+static void __i915_error_advance(struct drm_i915_error_state_buf *e,
+				 unsigned len)
+{
+	/* If this is first printf in this window, adjust it so that
+	 * start position matches start of the buffer
+	 */
+
+	if (e->pos < e->start) {
+		const size_t off = e->start - e->pos;
+
+		/* Should not happen but be paranoid */
+		if (off > len || e->bytes) {
+			e->err = -EIO;
+			return;
+		}
+
+		memmove(e->buf, e->buf + off, len - off);
+		e->bytes = len - off;
+		e->pos = e->start;
+		return;
+	}
+
+	e->bytes += len;
+	e->pos += len;
+}
+
+static void i915_error_vprintf(struct drm_i915_error_state_buf *e,
+			       const char *f, va_list args)
+{
+	unsigned len;
+
+	if (!__i915_error_ok(e))
+		return;
+
+	/* Seek the first printf which is hits start position */
+	if (e->pos < e->start) {
+		len = vsnprintf(NULL, 0, f, args);
+		if (!__i915_error_seek(e, len))
+			return;
+	}
+
+	len = vsnprintf(e->buf + e->bytes, e->size - e->bytes, f, args);
+	if (len >= e->size - e->bytes)
+		len = e->size - e->bytes - 1;
+
+	__i915_error_advance(e, len);
+}
+
+static void i915_error_puts(struct drm_i915_error_state_buf *e,
+			    const char *str)
+{
+	unsigned len;
+
+	if (!__i915_error_ok(e))
+		return;
+
+	len = strlen(str);
+
+	/* Seek the first printf which is hits start position */
+	if (e->pos < e->start) {
+		if (!__i915_error_seek(e, len))
+			return;
+	}
+
+	if (len >= e->size - e->bytes)
+		len = e->size - e->bytes - 1;
+	memcpy(e->buf + e->bytes, str, len);
+
+	__i915_error_advance(e, len);
+}
+
+#define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__)
+#define err_puts(e, s) i915_error_puts(e, s)
+
+static void print_error_buffers(struct drm_i915_error_state_buf *m,
+				const char *name,
+				struct drm_i915_error_buffer *err,
+				int count)
+{
+	err_printf(m, "%s [%d]:\n", name, count);
+
+	while (count--) {
+		err_printf(m, "  %08x %8u %02x %02x %x %x",
+			   err->gtt_offset,
+			   err->size,
+			   err->read_domains,
+			   err->write_domain,
+			   err->rseqno, err->wseqno);
+		err_puts(m, pin_flag(err->pinned));
+		err_puts(m, tiling_flag(err->tiling));
+		err_puts(m, dirty_flag(err->dirty));
+		err_puts(m, purgeable_flag(err->purgeable));
+		err_puts(m, err->ring != -1 ? " " : "");
+		err_puts(m, ring_str(err->ring));
+		err_puts(m, i915_cache_level_str(err->cache_level));
+
+		if (err->name)
+			err_printf(m, " (name: %d)", err->name);
+		if (err->fence_reg != I915_FENCE_REG_NONE)
+			err_printf(m, " (fence: %d)", err->fence_reg);
+
+		err_puts(m, "\n");
+		err++;
+	}
+}
+
+static void i915_ring_error_state(struct drm_i915_error_state_buf *m,
+				  struct drm_device *dev,
+				  struct drm_i915_error_state *error,
+				  unsigned ring)
+{
+	BUG_ON(ring >= I915_NUM_RINGS); /* shut up confused gcc */
+	err_printf(m, "%s command stream:\n", ring_str(ring));
+	err_printf(m, "  HEAD: 0x%08x\n", error->head[ring]);
+	err_printf(m, "  TAIL: 0x%08x\n", error->tail[ring]);
+	err_printf(m, "  CTL: 0x%08x\n", error->ctl[ring]);
+	err_printf(m, "  ACTHD: 0x%08x\n", error->acthd[ring]);
+	err_printf(m, "  IPEIR: 0x%08x\n", error->ipeir[ring]);
+	err_printf(m, "  IPEHR: 0x%08x\n", error->ipehr[ring]);
+	err_printf(m, "  INSTDONE: 0x%08x\n", error->instdone[ring]);
+	if (ring == RCS && INTEL_INFO(dev)->gen >= 4)
+		err_printf(m, "  BBADDR: 0x%08llx\n", error->bbaddr);
+
+	if (INTEL_INFO(dev)->gen >= 4)
+		err_printf(m, "  INSTPS: 0x%08x\n", error->instps[ring]);
+	err_printf(m, "  INSTPM: 0x%08x\n", error->instpm[ring]);
+	err_printf(m, "  FADDR: 0x%08x\n", error->faddr[ring]);
+	if (INTEL_INFO(dev)->gen >= 6) {
+		err_printf(m, "  RC PSMI: 0x%08x\n", error->rc_psmi[ring]);
+		err_printf(m, "  FAULT_REG: 0x%08x\n", error->fault_reg[ring]);
+		err_printf(m, "  SYNC_0: 0x%08x [last synced 0x%08x]\n",
+			   error->semaphore_mboxes[ring][0],
+			   error->semaphore_seqno[ring][0]);
+		err_printf(m, "  SYNC_1: 0x%08x [last synced 0x%08x]\n",
+			   error->semaphore_mboxes[ring][1],
+			   error->semaphore_seqno[ring][1]);
+		if (HAS_VEBOX(dev)) {
+			err_printf(m, "  SYNC_2: 0x%08x [last synced 0x%08x]\n",
+				   error->semaphore_mboxes[ring][2],
+				   error->semaphore_seqno[ring][2]);
+		}
+	}
+	err_printf(m, "  seqno: 0x%08x\n", error->seqno[ring]);
+	err_printf(m, "  waiting: %s\n", yesno(error->waiting[ring]));
+	err_printf(m, "  ring->head: 0x%08x\n", error->cpu_ring_head[ring]);
+	err_printf(m, "  ring->tail: 0x%08x\n", error->cpu_ring_tail[ring]);
+}
+
+void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
+{
+	va_list args;
+
+	va_start(args, f);
+	i915_error_vprintf(e, f, args);
+	va_end(args);
+}
+
+int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
+			    const struct i915_error_state_file_priv *error_priv)
+{
+	struct drm_device *dev = error_priv->dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_i915_error_state *error = error_priv->error;
+	struct intel_ring_buffer *ring;
+	int i, j, page, offset, elt;
+
+	if (!error) {
+		err_printf(m, "no error state collected\n");
+		goto out;
+	}
+
+	err_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec,
+		   error->time.tv_usec);
+	err_printf(m, "Kernel: " UTS_RELEASE "\n");
+	err_printf(m, "PCI ID: 0x%04x\n", dev->pci_device);
+	err_printf(m, "EIR: 0x%08x\n", error->eir);
+	err_printf(m, "IER: 0x%08x\n", error->ier);
+	err_printf(m, "PGTBL_ER: 0x%08x\n", error->pgtbl_er);
+	err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake);
+	err_printf(m, "DERRMR: 0x%08x\n", error->derrmr);
+	err_printf(m, "CCID: 0x%08x\n", error->ccid);
+
+	for (i = 0; i < dev_priv->num_fence_regs; i++)
+		err_printf(m, "  fence[%d] = %08llx\n", i, error->fence[i]);
+
+	for (i = 0; i < ARRAY_SIZE(error->extra_instdone); i++)
+		err_printf(m, "  INSTDONE_%d: 0x%08x\n", i,
+			   error->extra_instdone[i]);
+
+	if (INTEL_INFO(dev)->gen >= 6) {
+		err_printf(m, "ERROR: 0x%08x\n", error->error);
+		err_printf(m, "DONE_REG: 0x%08x\n", error->done_reg);
+	}
+
+	if (INTEL_INFO(dev)->gen == 7)
+		err_printf(m, "ERR_INT: 0x%08x\n", error->err_int);
+
+	for_each_ring(ring, dev_priv, i)
+		i915_ring_error_state(m, dev, error, i);
+
+	if (error->active_bo)
+		print_error_buffers(m, "Active",
+				    error->active_bo[0],
+				    error->active_bo_count[0]);
+
+	if (error->pinned_bo)
+		print_error_buffers(m, "Pinned",
+				    error->pinned_bo[0],
+				    error->pinned_bo_count[0]);
+
+	for (i = 0; i < ARRAY_SIZE(error->ring); i++) {
+		struct drm_i915_error_object *obj;
+
+		if ((obj = error->ring[i].batchbuffer)) {
+			err_printf(m, "%s --- gtt_offset = 0x%08x\n",
+				   dev_priv->ring[i].name,
+				   obj->gtt_offset);
+			offset = 0;
+			for (page = 0; page < obj->page_count; page++) {
+				for (elt = 0; elt < PAGE_SIZE/4; elt++) {
+					err_printf(m, "%08x :  %08x\n", offset,
+						   obj->pages[page][elt]);
+					offset += 4;
+				}
+			}
+		}
+
+		if (error->ring[i].num_requests) {
+			err_printf(m, "%s --- %d requests\n",
+				   dev_priv->ring[i].name,
+				   error->ring[i].num_requests);
+			for (j = 0; j < error->ring[i].num_requests; j++) {
+				err_printf(m, "  seqno 0x%08x, emitted %ld, tail 0x%08x\n",
+					   error->ring[i].requests[j].seqno,
+					   error->ring[i].requests[j].jiffies,
+					   error->ring[i].requests[j].tail);
+			}
+		}
+
+		if ((obj = error->ring[i].ringbuffer)) {
+			err_printf(m, "%s --- ringbuffer = 0x%08x\n",
+				   dev_priv->ring[i].name,
+				   obj->gtt_offset);
+			offset = 0;
+			for (page = 0; page < obj->page_count; page++) {
+				for (elt = 0; elt < PAGE_SIZE/4; elt++) {
+					err_printf(m, "%08x :  %08x\n",
+						   offset,
+						   obj->pages[page][elt]);
+					offset += 4;
+				}
+			}
+		}
+
+		obj = error->ring[i].ctx;
+		if (obj) {
+			err_printf(m, "%s --- HW Context = 0x%08x\n",
+				   dev_priv->ring[i].name,
+				   obj->gtt_offset);
+			offset = 0;
+			for (elt = 0; elt < PAGE_SIZE/16; elt += 4) {
+				err_printf(m, "[%04x] %08x %08x %08x %08x\n",
+					   offset,
+					   obj->pages[0][elt],
+					   obj->pages[0][elt+1],
+					   obj->pages[0][elt+2],
+					   obj->pages[0][elt+3]);
+					offset += 16;
+			}
+		}
+	}
+
+	if (error->overlay)
+		intel_overlay_print_error_state(m, error->overlay);
+
+	if (error->display)
+		intel_display_print_error_state(m, dev, error->display);
+
+out:
+	if (m->bytes == 0 && m->err)
+		return m->err;
+
+	return 0;
+}
+
+int i915_error_state_buf_init(struct drm_i915_error_state_buf *ebuf,
+			      size_t count, loff_t pos)
+{
+	memset(ebuf, 0, sizeof(*ebuf));
+
+	/* We need to have enough room to store any i915_error_state printf
+	 * so that we can move it to start position.
+	 */
+	ebuf->size = count + 1 > PAGE_SIZE ? count + 1 : PAGE_SIZE;
+	ebuf->buf = kmalloc(ebuf->size,
+				GFP_TEMPORARY | __GFP_NORETRY | __GFP_NOWARN);
+
+	if (ebuf->buf == NULL) {
+		ebuf->size = PAGE_SIZE;
+		ebuf->buf = kmalloc(ebuf->size, GFP_TEMPORARY);
+	}
+
+	if (ebuf->buf == NULL) {
+		ebuf->size = 128;
+		ebuf->buf = kmalloc(ebuf->size, GFP_TEMPORARY);
+	}
+
+	if (ebuf->buf == NULL)
+		return -ENOMEM;
+
+	ebuf->start = pos;
+
+	return 0;
+}
+
+static void i915_error_object_free(struct drm_i915_error_object *obj)
+{
+	int page;
+
+	if (obj == NULL)
+		return;
+
+	for (page = 0; page < obj->page_count; page++)
+		kfree(obj->pages[page]);
+
+	kfree(obj);
+}
+
+static void i915_error_state_free(struct kref *error_ref)
+{
+	struct drm_i915_error_state *error = container_of(error_ref,
+							  typeof(*error), ref);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(error->ring); i++) {
+		i915_error_object_free(error->ring[i].batchbuffer);
+		i915_error_object_free(error->ring[i].ringbuffer);
+		i915_error_object_free(error->ring[i].ctx);
+		kfree(error->ring[i].requests);
+	}
+
+	kfree(error->active_bo);
+	kfree(error->overlay);
+	kfree(error->display);
+	kfree(error);
+}
+
+static struct drm_i915_error_object *
+i915_error_object_create_sized(struct drm_i915_private *dev_priv,
+			       struct drm_i915_gem_object *src,
+			       const int num_pages)
+{
+	struct drm_i915_error_object *dst;
+	int i;
+	u32 reloc_offset;
+
+	if (src == NULL || src->pages == NULL)
+		return NULL;
+
+	dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), GFP_ATOMIC);
+	if (dst == NULL)
+		return NULL;
+
+	reloc_offset = dst->gtt_offset = i915_gem_obj_ggtt_offset(src);
+	for (i = 0; i < num_pages; i++) {
+		unsigned long flags;
+		void *d;
+
+		d = kmalloc(PAGE_SIZE, GFP_ATOMIC);
+		if (d == NULL)
+			goto unwind;
+
+		local_irq_save(flags);
+		if (reloc_offset < dev_priv->gtt.mappable_end &&
+		    src->has_global_gtt_mapping) {
+			void __iomem *s;
+
+			/* Simply ignore tiling or any overlapping fence.
+			 * It's part of the error state, and this hopefully
+			 * captures what the GPU read.
+			 */
+
+			s = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
+						     reloc_offset);
+			memcpy_fromio(d, s, PAGE_SIZE);
+			io_mapping_unmap_atomic(s);
+		} else if (src->stolen) {
+			unsigned long offset;
+
+			offset = dev_priv->mm.stolen_base;
+			offset += src->stolen->start;
+			offset += i << PAGE_SHIFT;
+
+			memcpy_fromio(d, (void __iomem *) offset, PAGE_SIZE);
+		} else {
+			struct page *page;
+			void *s;
+
+			page = i915_gem_object_get_page(src, i);
+
+			drm_clflush_pages(&page, 1);
+
+			s = kmap_atomic(page);
+			memcpy(d, s, PAGE_SIZE);
+			kunmap_atomic(s);
+
+			drm_clflush_pages(&page, 1);
+		}
+		local_irq_restore(flags);
+
+		dst->pages[i] = d;
+
+		reloc_offset += PAGE_SIZE;
+	}
+	dst->page_count = num_pages;
+
+	return dst;
+
+unwind:
+	while (i--)
+		kfree(dst->pages[i]);
+	kfree(dst);
+	return NULL;
+}
+#define i915_error_object_create(dev_priv, src) \
+	i915_error_object_create_sized((dev_priv), (src), \
+				       (src)->base.size>>PAGE_SHIFT)
+
+static void capture_bo(struct drm_i915_error_buffer *err,
+		       struct drm_i915_gem_object *obj)
+{
+	err->size = obj->base.size;
+	err->name = obj->base.name;
+	err->rseqno = obj->last_read_seqno;
+	err->wseqno = obj->last_write_seqno;
+	err->gtt_offset = i915_gem_obj_ggtt_offset(obj);
+	err->read_domains = obj->base.read_domains;
+	err->write_domain = obj->base.write_domain;
+	err->fence_reg = obj->fence_reg;
+	err->pinned = 0;
+	if (obj->pin_count > 0)
+		err->pinned = 1;
+	if (obj->user_pin_count > 0)
+		err->pinned = -1;
+	err->tiling = obj->tiling_mode;
+	err->dirty = obj->dirty;
+	err->purgeable = obj->madv != I915_MADV_WILLNEED;
+	err->ring = obj->ring ? obj->ring->id : -1;
+	err->cache_level = obj->cache_level;
+}
+
+static u32 capture_active_bo(struct drm_i915_error_buffer *err,
+			     int count, struct list_head *head)
+{
+	struct i915_vma *vma;
+	int i = 0;
+
+	list_for_each_entry(vma, head, mm_list) {
+		capture_bo(err++, vma->obj);
+		if (++i == count)
+			break;
+	}
+
+	return i;
+}
+
+static u32 capture_pinned_bo(struct drm_i915_error_buffer *err,
+			     int count, struct list_head *head)
+{
+	struct drm_i915_gem_object *obj;
+	int i = 0;
+
+	list_for_each_entry(obj, head, global_list) {
+		if (obj->pin_count == 0)
+			continue;
+
+		capture_bo(err++, obj);
+		if (++i == count)
+			break;
+	}
+
+	return i;
+}
+
+static void i915_gem_record_fences(struct drm_device *dev,
+				   struct drm_i915_error_state *error)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int i;
+
+	/* Fences */
+	switch (INTEL_INFO(dev)->gen) {
+	case 7:
+	case 6:
+		for (i = 0; i < dev_priv->num_fence_regs; i++)
+			error->fence[i] = I915_READ64(FENCE_REG_SANDYBRIDGE_0 + (i * 8));
+		break;
+	case 5:
+	case 4:
+		for (i = 0; i < 16; i++)
+			error->fence[i] = I915_READ64(FENCE_REG_965_0 + (i * 8));
+		break;
+	case 3:
+		if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
+			for (i = 0; i < 8; i++)
+				error->fence[i+8] = I915_READ(FENCE_REG_945_8 + (i * 4));
+	case 2:
+		for (i = 0; i < 8; i++)
+			error->fence[i] = I915_READ(FENCE_REG_830_0 + (i * 4));
+		break;
+
+	default:
+		BUG();
+	}
+}
+
+static struct drm_i915_error_object *
+i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
+			     struct intel_ring_buffer *ring)
+{
+	struct i915_address_space *vm;
+	struct i915_vma *vma;
+	struct drm_i915_gem_object *obj;
+	u32 seqno;
+
+	if (!ring->get_seqno)
+		return NULL;
+
+	if (HAS_BROKEN_CS_TLB(dev_priv->dev)) {
+		u32 acthd = I915_READ(ACTHD);
+
+		if (WARN_ON(ring->id != RCS))
+			return NULL;
+
+		obj = ring->private;
+		if (acthd >= i915_gem_obj_ggtt_offset(obj) &&
+		    acthd < i915_gem_obj_ggtt_offset(obj) + obj->base.size)
+			return i915_error_object_create(dev_priv, obj);
+	}
+
+	seqno = ring->get_seqno(ring, false);
+	list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
+		list_for_each_entry(vma, &vm->active_list, mm_list) {
+			obj = vma->obj;
+			if (obj->ring != ring)
+				continue;
+
+			if (i915_seqno_passed(seqno, obj->last_read_seqno))
+				continue;
+
+			if ((obj->base.read_domains & I915_GEM_DOMAIN_COMMAND) == 0)
+				continue;
+
+			/* We need to copy these to an anonymous buffer as the simplest
+			 * method to avoid being overwritten by userspace.
+			 */
+			return i915_error_object_create(dev_priv, obj);
+		}
+	}
+
+	return NULL;
+}
+
+static void i915_record_ring_state(struct drm_device *dev,
+				   struct drm_i915_error_state *error,
+				   struct intel_ring_buffer *ring)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (INTEL_INFO(dev)->gen >= 6) {
+		error->rc_psmi[ring->id] = I915_READ(ring->mmio_base + 0x50);
+		error->fault_reg[ring->id] = I915_READ(RING_FAULT_REG(ring));
+		error->semaphore_mboxes[ring->id][0]
+			= I915_READ(RING_SYNC_0(ring->mmio_base));
+		error->semaphore_mboxes[ring->id][1]
+			= I915_READ(RING_SYNC_1(ring->mmio_base));
+		error->semaphore_seqno[ring->id][0] = ring->sync_seqno[0];
+		error->semaphore_seqno[ring->id][1] = ring->sync_seqno[1];
+	}
+
+	if (HAS_VEBOX(dev)) {
+		error->semaphore_mboxes[ring->id][2] =
+			I915_READ(RING_SYNC_2(ring->mmio_base));
+		error->semaphore_seqno[ring->id][2] = ring->sync_seqno[2];
+	}
+
+	if (INTEL_INFO(dev)->gen >= 4) {
+		error->faddr[ring->id] = I915_READ(RING_DMA_FADD(ring->mmio_base));
+		error->ipeir[ring->id] = I915_READ(RING_IPEIR(ring->mmio_base));
+		error->ipehr[ring->id] = I915_READ(RING_IPEHR(ring->mmio_base));
+		error->instdone[ring->id] = I915_READ(RING_INSTDONE(ring->mmio_base));
+		error->instps[ring->id] = I915_READ(RING_INSTPS(ring->mmio_base));
+		if (ring->id == RCS)
+			error->bbaddr = I915_READ64(BB_ADDR);
+	} else {
+		error->faddr[ring->id] = I915_READ(DMA_FADD_I8XX);
+		error->ipeir[ring->id] = I915_READ(IPEIR);
+		error->ipehr[ring->id] = I915_READ(IPEHR);
+		error->instdone[ring->id] = I915_READ(INSTDONE);
+	}
+
+	error->waiting[ring->id] = waitqueue_active(&ring->irq_queue);
+	error->instpm[ring->id] = I915_READ(RING_INSTPM(ring->mmio_base));
+	error->seqno[ring->id] = ring->get_seqno(ring, false);
+	error->acthd[ring->id] = intel_ring_get_active_head(ring);
+	error->head[ring->id] = I915_READ_HEAD(ring);
+	error->tail[ring->id] = I915_READ_TAIL(ring);
+	error->ctl[ring->id] = I915_READ_CTL(ring);
+
+	error->cpu_ring_head[ring->id] = ring->head;
+	error->cpu_ring_tail[ring->id] = ring->tail;
+}
+
+
+static void i915_gem_record_active_context(struct intel_ring_buffer *ring,
+					   struct drm_i915_error_state *error,
+					   struct drm_i915_error_ring *ering)
+{
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	struct drm_i915_gem_object *obj;
+
+	/* Currently render ring is the only HW context user */
+	if (ring->id != RCS || !error->ccid)
+		return;
+
+	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
+		if ((error->ccid & PAGE_MASK) == i915_gem_obj_ggtt_offset(obj)) {
+			ering->ctx = i915_error_object_create_sized(dev_priv,
+								    obj, 1);
+			break;
+		}
+	}
+}
+
+static void i915_gem_record_rings(struct drm_device *dev,
+				  struct drm_i915_error_state *error)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_ring_buffer *ring;
+	struct drm_i915_gem_request *request;
+	int i, count;
+
+	for_each_ring(ring, dev_priv, i) {
+		i915_record_ring_state(dev, error, ring);
+
+		error->ring[i].batchbuffer =
+			i915_error_first_batchbuffer(dev_priv, ring);
+
+		error->ring[i].ringbuffer =
+			i915_error_object_create(dev_priv, ring->obj);
+
+
+		i915_gem_record_active_context(ring, error, &error->ring[i]);
+
+		count = 0;
+		list_for_each_entry(request, &ring->request_list, list)
+			count++;
+
+		error->ring[i].num_requests = count;
+		error->ring[i].requests =
+			kmalloc(count*sizeof(struct drm_i915_error_request),
+				GFP_ATOMIC);
+		if (error->ring[i].requests == NULL) {
+			error->ring[i].num_requests = 0;
+			continue;
+		}
+
+		count = 0;
+		list_for_each_entry(request, &ring->request_list, list) {
+			struct drm_i915_error_request *erq;
+
+			erq = &error->ring[i].requests[count++];
+			erq->seqno = request->seqno;
+			erq->jiffies = request->emitted_jiffies;
+			erq->tail = request->tail;
+		}
+	}
+}
+
+/* FIXME: Since pin count/bound list is global, we duplicate what we capture per
+ * VM.
+ */
+static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
+				struct drm_i915_error_state *error,
+				struct i915_address_space *vm,
+				const int ndx)
+{
+	struct drm_i915_error_buffer *active_bo = NULL, *pinned_bo = NULL;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	int i;
+
+	i = 0;
+	list_for_each_entry(vma, &vm->active_list, mm_list)
+		i++;
+	error->active_bo_count[ndx] = i;
+	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
+		if (obj->pin_count)
+			i++;
+	error->pinned_bo_count[ndx] = i - error->active_bo_count[ndx];
+
+	if (i) {
+		active_bo = kmalloc(sizeof(*active_bo)*i, GFP_ATOMIC);
+		if (active_bo)
+			pinned_bo = active_bo + error->active_bo_count[ndx];
+	}
+
+	if (active_bo)
+		error->active_bo_count[ndx] =
+			capture_active_bo(active_bo,
+					  error->active_bo_count[ndx],
+					  &vm->active_list);
+
+	if (pinned_bo)
+		error->pinned_bo_count[ndx] =
+			capture_pinned_bo(pinned_bo,
+					  error->pinned_bo_count[ndx],
+					  &dev_priv->mm.bound_list);
+	error->active_bo[ndx] = active_bo;
+	error->pinned_bo[ndx] = pinned_bo;
+}
+
+static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv,
+				     struct drm_i915_error_state *error)
+{
+	struct i915_address_space *vm;
+	int cnt = 0, i = 0;
+
+	list_for_each_entry(vm, &dev_priv->vm_list, global_link)
+		cnt++;
+
+	if (WARN(cnt > 1, "Multiple VMs not yet supported\n"))
+		cnt = 1;
+
+	vm = &dev_priv->gtt.base;
+
+	error->active_bo = kcalloc(cnt, sizeof(*error->active_bo), GFP_ATOMIC);
+	error->pinned_bo = kcalloc(cnt, sizeof(*error->pinned_bo), GFP_ATOMIC);
+	error->active_bo_count = kcalloc(cnt, sizeof(*error->active_bo_count),
+					 GFP_ATOMIC);
+	error->pinned_bo_count = kcalloc(cnt, sizeof(*error->pinned_bo_count),
+					 GFP_ATOMIC);
+
+	list_for_each_entry(vm, &dev_priv->vm_list, global_link)
+		i915_gem_capture_vm(dev_priv, error, vm, i++);
+}
+
+/**
+ * i915_capture_error_state - capture an error record for later analysis
+ * @dev: drm device
+ *
+ * Should be called when an error is detected (either a hang or an error
+ * interrupt) to capture error state from the time of the error.  Fills
+ * out a structure which becomes available in debugfs for user level tools
+ * to pick up.
+ */
+void i915_capture_error_state(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_error_state *error;
+	unsigned long flags;
+	int pipe;
+
+	spin_lock_irqsave(&dev_priv->gpu_error.lock, flags);
+	error = dev_priv->gpu_error.first_error;
+	spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags);
+	if (error)
+		return;
+
+	/* Account for pipe specific data like PIPE*STAT */
+	error = kzalloc(sizeof(*error), GFP_ATOMIC);
+	if (!error) {
+		DRM_DEBUG_DRIVER("out of memory, not capturing error state\n");
+		return;
+	}
+
+	DRM_INFO("capturing error event; look for more information in "
+		 "/sys/class/drm/card%d/error\n", dev->primary->index);
+
+	kref_init(&error->ref);
+	error->eir = I915_READ(EIR);
+	error->pgtbl_er = I915_READ(PGTBL_ER);
+	if (HAS_HW_CONTEXTS(dev))
+		error->ccid = I915_READ(CCID);
+
+	if (HAS_PCH_SPLIT(dev))
+		error->ier = I915_READ(DEIER) | I915_READ(GTIER);
+	else if (IS_VALLEYVIEW(dev))
+		error->ier = I915_READ(GTIER) | I915_READ(VLV_IER);
+	else if (IS_GEN2(dev))
+		error->ier = I915_READ16(IER);
+	else
+		error->ier = I915_READ(IER);
+
+	if (INTEL_INFO(dev)->gen >= 6)
+		error->derrmr = I915_READ(DERRMR);
+
+	if (IS_VALLEYVIEW(dev))
+		error->forcewake = I915_READ(FORCEWAKE_VLV);
+	else if (INTEL_INFO(dev)->gen >= 7)
+		error->forcewake = I915_READ(FORCEWAKE_MT);
+	else if (INTEL_INFO(dev)->gen == 6)
+		error->forcewake = I915_READ(FORCEWAKE);
+
+	if (!HAS_PCH_SPLIT(dev))
+		for_each_pipe(pipe)
+			error->pipestat[pipe] = I915_READ(PIPESTAT(pipe));
+
+	if (INTEL_INFO(dev)->gen >= 6) {
+		error->error = I915_READ(ERROR_GEN6);
+		error->done_reg = I915_READ(DONE_REG);
+	}
+
+	if (INTEL_INFO(dev)->gen == 7)
+		error->err_int = I915_READ(GEN7_ERR_INT);
+
+	i915_get_extra_instdone(dev, error->extra_instdone);
+
+	i915_gem_capture_buffers(dev_priv, error);
+	i915_gem_record_fences(dev, error);
+	i915_gem_record_rings(dev, error);
+
+	do_gettimeofday(&error->time);
+
+	error->overlay = intel_overlay_capture_error_state(dev);
+	error->display = intel_display_capture_error_state(dev);
+
+	spin_lock_irqsave(&dev_priv->gpu_error.lock, flags);
+	if (dev_priv->gpu_error.first_error == NULL) {
+		dev_priv->gpu_error.first_error = error;
+		error = NULL;
+	}
+	spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags);
+
+	if (error)
+		i915_error_state_free(&error->ref);
+}
+
+void i915_error_state_get(struct drm_device *dev,
+			  struct i915_error_state_file_priv *error_priv)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev_priv->gpu_error.lock, flags);
+	error_priv->error = dev_priv->gpu_error.first_error;
+	if (error_priv->error)
+		kref_get(&error_priv->error->ref);
+	spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags);
+
+}
+
+void i915_error_state_put(struct i915_error_state_file_priv *error_priv)
+{
+	if (error_priv->error)
+		kref_put(&error_priv->error->ref, i915_error_state_free);
+}
+
+void i915_destroy_error_state(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_error_state *error;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev_priv->gpu_error.lock, flags);
+	error = dev_priv->gpu_error.first_error;
+	dev_priv->gpu_error.first_error = NULL;
+	spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags);
+
+	if (error)
+		kref_put(&error->ref, i915_error_state_free);
+}
+
+const char *i915_cache_level_str(int type)
+{
+	switch (type) {
+	case I915_CACHE_NONE: return " uncached";
+	case I915_CACHE_LLC: return " snooped or LLC";
+	case I915_CACHE_L3_LLC: return " L3+LLC";
+	default: return "";
+	}
+}
+
+/* NB: please notice the memset */
+void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	memset(instdone, 0, sizeof(*instdone) * I915_NUM_INSTDONE_REG);
+
+	switch (INTEL_INFO(dev)->gen) {
+	case 2:
+	case 3:
+		instdone[0] = I915_READ(INSTDONE);
+		break;
+	case 4:
+	case 5:
+	case 6:
+		instdone[0] = I915_READ(INSTDONE_I965);
+		instdone[1] = I915_READ(INSTDONE1);
+		break;
+	default:
+		WARN_ONCE(1, "Unsupported platform\n");
+	case 7:
+		instdone[0] = I915_READ(GEN7_INSTDONE_1);
+		instdone[1] = I915_READ(GEN7_SC_INSTDONE);
+		instdone[2] = I915_READ(GEN7_SAMPLER_INSTDONE);
+		instdone[3] = I915_READ(GEN7_ROW_INSTDONE);
+		break;
+	}
+}
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 3d92a7c..a03b445 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -85,6 +85,12 @@
 {
 	assert_spin_locked(&dev_priv->irq_lock);
 
+	if (dev_priv->pc8.irqs_disabled) {
+		WARN(1, "IRQs disabled\n");
+		dev_priv->pc8.regsave.deimr &= ~mask;
+		return;
+	}
+
 	if ((dev_priv->irq_mask & mask) != 0) {
 		dev_priv->irq_mask &= ~mask;
 		I915_WRITE(DEIMR, dev_priv->irq_mask);
@@ -97,6 +103,12 @@
 {
 	assert_spin_locked(&dev_priv->irq_lock);
 
+	if (dev_priv->pc8.irqs_disabled) {
+		WARN(1, "IRQs disabled\n");
+		dev_priv->pc8.regsave.deimr |= mask;
+		return;
+	}
+
 	if ((dev_priv->irq_mask & mask) != mask) {
 		dev_priv->irq_mask |= mask;
 		I915_WRITE(DEIMR, dev_priv->irq_mask);
@@ -104,6 +116,85 @@
 	}
 }
 
+/**
+ * ilk_update_gt_irq - update GTIMR
+ * @dev_priv: driver private
+ * @interrupt_mask: mask of interrupt bits to update
+ * @enabled_irq_mask: mask of interrupt bits to enable
+ */
+static void ilk_update_gt_irq(struct drm_i915_private *dev_priv,
+			      uint32_t interrupt_mask,
+			      uint32_t enabled_irq_mask)
+{
+	assert_spin_locked(&dev_priv->irq_lock);
+
+	if (dev_priv->pc8.irqs_disabled) {
+		WARN(1, "IRQs disabled\n");
+		dev_priv->pc8.regsave.gtimr &= ~interrupt_mask;
+		dev_priv->pc8.regsave.gtimr |= (~enabled_irq_mask &
+						interrupt_mask);
+		return;
+	}
+
+	dev_priv->gt_irq_mask &= ~interrupt_mask;
+	dev_priv->gt_irq_mask |= (~enabled_irq_mask & interrupt_mask);
+	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
+	POSTING_READ(GTIMR);
+}
+
+void ilk_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask)
+{
+	ilk_update_gt_irq(dev_priv, mask, mask);
+}
+
+void ilk_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask)
+{
+	ilk_update_gt_irq(dev_priv, mask, 0);
+}
+
+/**
+  * snb_update_pm_irq - update GEN6_PMIMR
+  * @dev_priv: driver private
+  * @interrupt_mask: mask of interrupt bits to update
+  * @enabled_irq_mask: mask of interrupt bits to enable
+  */
+static void snb_update_pm_irq(struct drm_i915_private *dev_priv,
+			      uint32_t interrupt_mask,
+			      uint32_t enabled_irq_mask)
+{
+	uint32_t new_val;
+
+	assert_spin_locked(&dev_priv->irq_lock);
+
+	if (dev_priv->pc8.irqs_disabled) {
+		WARN(1, "IRQs disabled\n");
+		dev_priv->pc8.regsave.gen6_pmimr &= ~interrupt_mask;
+		dev_priv->pc8.regsave.gen6_pmimr |= (~enabled_irq_mask &
+						     interrupt_mask);
+		return;
+	}
+
+	new_val = dev_priv->pm_irq_mask;
+	new_val &= ~interrupt_mask;
+	new_val |= (~enabled_irq_mask & interrupt_mask);
+
+	if (new_val != dev_priv->pm_irq_mask) {
+		dev_priv->pm_irq_mask = new_val;
+		I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask);
+		POSTING_READ(GEN6_PMIMR);
+	}
+}
+
+void snb_enable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask)
+{
+	snb_update_pm_irq(dev_priv, mask, mask);
+}
+
+void snb_disable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask)
+{
+	snb_update_pm_irq(dev_priv, mask, 0);
+}
+
 static bool ivb_can_enable_err_int(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -128,6 +219,8 @@
 	enum pipe pipe;
 	struct intel_crtc *crtc;
 
+	assert_spin_locked(&dev_priv->irq_lock);
+
 	for_each_pipe(pipe) {
 		crtc = to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]);
 
@@ -152,38 +245,75 @@
 }
 
 static void ivybridge_set_fifo_underrun_reporting(struct drm_device *dev,
-						  bool enable)
+						  enum pipe pipe, bool enable)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-
 	if (enable) {
+		I915_WRITE(GEN7_ERR_INT, ERR_INT_FIFO_UNDERRUN(pipe));
+
 		if (!ivb_can_enable_err_int(dev))
 			return;
 
-		I915_WRITE(GEN7_ERR_INT, ERR_INT_FIFO_UNDERRUN_A |
-					 ERR_INT_FIFO_UNDERRUN_B |
-					 ERR_INT_FIFO_UNDERRUN_C);
-
 		ironlake_enable_display_irq(dev_priv, DE_ERR_INT_IVB);
 	} else {
+		bool was_enabled = !(I915_READ(DEIMR) & DE_ERR_INT_IVB);
+
+		/* Change the state _after_ we've read out the current one. */
 		ironlake_disable_display_irq(dev_priv, DE_ERR_INT_IVB);
+
+		if (!was_enabled &&
+		    (I915_READ(GEN7_ERR_INT) & ERR_INT_FIFO_UNDERRUN(pipe))) {
+			DRM_DEBUG_KMS("uncleared fifo underrun on pipe %c\n",
+				      pipe_name(pipe));
+		}
 	}
 }
 
-static void ibx_set_fifo_underrun_reporting(struct intel_crtc *crtc,
+/**
+ * ibx_display_interrupt_update - update SDEIMR
+ * @dev_priv: driver private
+ * @interrupt_mask: mask of interrupt bits to update
+ * @enabled_irq_mask: mask of interrupt bits to enable
+ */
+static void ibx_display_interrupt_update(struct drm_i915_private *dev_priv,
+					 uint32_t interrupt_mask,
+					 uint32_t enabled_irq_mask)
+{
+	uint32_t sdeimr = I915_READ(SDEIMR);
+	sdeimr &= ~interrupt_mask;
+	sdeimr |= (~enabled_irq_mask & interrupt_mask);
+
+	assert_spin_locked(&dev_priv->irq_lock);
+
+	if (dev_priv->pc8.irqs_disabled &&
+	    (interrupt_mask & SDE_HOTPLUG_MASK_CPT)) {
+		WARN(1, "IRQs disabled\n");
+		dev_priv->pc8.regsave.sdeimr &= ~interrupt_mask;
+		dev_priv->pc8.regsave.sdeimr |= (~enabled_irq_mask &
+						 interrupt_mask);
+		return;
+	}
+
+	I915_WRITE(SDEIMR, sdeimr);
+	POSTING_READ(SDEIMR);
+}
+#define ibx_enable_display_interrupt(dev_priv, bits) \
+	ibx_display_interrupt_update((dev_priv), (bits), (bits))
+#define ibx_disable_display_interrupt(dev_priv, bits) \
+	ibx_display_interrupt_update((dev_priv), (bits), 0)
+
+static void ibx_set_fifo_underrun_reporting(struct drm_device *dev,
+					    enum transcoder pch_transcoder,
 					    bool enable)
 {
-	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	uint32_t bit = (crtc->pipe == PIPE_A) ? SDE_TRANSA_FIFO_UNDER :
-						SDE_TRANSB_FIFO_UNDER;
+	uint32_t bit = (pch_transcoder == TRANSCODER_A) ?
+		       SDE_TRANSA_FIFO_UNDER : SDE_TRANSB_FIFO_UNDER;
 
 	if (enable)
-		I915_WRITE(SDEIMR, I915_READ(SDEIMR) & ~bit);
+		ibx_enable_display_interrupt(dev_priv, bit);
 	else
-		I915_WRITE(SDEIMR, I915_READ(SDEIMR) | bit);
-
-	POSTING_READ(SDEIMR);
+		ibx_disable_display_interrupt(dev_priv, bit);
 }
 
 static void cpt_set_fifo_underrun_reporting(struct drm_device *dev,
@@ -193,19 +323,26 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	if (enable) {
+		I915_WRITE(SERR_INT,
+			   SERR_INT_TRANS_FIFO_UNDERRUN(pch_transcoder));
+
 		if (!cpt_can_enable_serr_int(dev))
 			return;
 
-		I915_WRITE(SERR_INT, SERR_INT_TRANS_A_FIFO_UNDERRUN |
-				     SERR_INT_TRANS_B_FIFO_UNDERRUN |
-				     SERR_INT_TRANS_C_FIFO_UNDERRUN);
-
-		I915_WRITE(SDEIMR, I915_READ(SDEIMR) & ~SDE_ERROR_CPT);
+		ibx_enable_display_interrupt(dev_priv, SDE_ERROR_CPT);
 	} else {
-		I915_WRITE(SDEIMR, I915_READ(SDEIMR) | SDE_ERROR_CPT);
-	}
+		uint32_t tmp = I915_READ(SERR_INT);
+		bool was_enabled = !(I915_READ(SDEIMR) & SDE_ERROR_CPT);
 
-	POSTING_READ(SDEIMR);
+		/* Change the state _after_ we've read out the current one. */
+		ibx_disable_display_interrupt(dev_priv, SDE_ERROR_CPT);
+
+		if (!was_enabled &&
+		    (tmp & SERR_INT_TRANS_FIFO_UNDERRUN(pch_transcoder))) {
+			DRM_DEBUG_KMS("uncleared pch fifo underrun on pch transcoder %c\n",
+				      transcoder_name(pch_transcoder));
+		}
+	}
 }
 
 /**
@@ -243,7 +380,7 @@
 	if (IS_GEN5(dev) || IS_GEN6(dev))
 		ironlake_set_fifo_underrun_reporting(dev, pipe, enable);
 	else if (IS_GEN7(dev))
-		ivybridge_set_fifo_underrun_reporting(dev, enable);
+		ivybridge_set_fifo_underrun_reporting(dev, pipe, enable);
 
 done:
 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
@@ -269,29 +406,19 @@
 					   bool enable)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	enum pipe p;
-	struct drm_crtc *crtc;
-	struct intel_crtc *intel_crtc;
+	struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pch_transcoder];
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	unsigned long flags;
 	bool ret;
 
-	if (HAS_PCH_LPT(dev)) {
-		crtc = NULL;
-		for_each_pipe(p) {
-			struct drm_crtc *c = dev_priv->pipe_to_crtc_mapping[p];
-			if (intel_pipe_has_type(c, INTEL_OUTPUT_ANALOG)) {
-				crtc = c;
-				break;
-			}
-		}
-		if (!crtc) {
-			DRM_ERROR("PCH FIFO underrun, but no CRTC using the PCH found\n");
-			return false;
-		}
-	} else {
-		crtc = dev_priv->pipe_to_crtc_mapping[pch_transcoder];
-	}
-	intel_crtc = to_intel_crtc(crtc);
+	/*
+	 * NOTE: Pre-LPT has a fixed cpu pipe -> pch transcoder mapping, but LPT
+	 * has only one pch transcoder A that all pipes can use. To avoid racy
+	 * pch transcoder -> pipe lookups from interrupt code simply store the
+	 * underrun statistics in crtc A. Since we never expose this anywhere
+	 * nor use it outside of the fifo underrun code here using the "wrong"
+	 * crtc on LPT won't cause issues.
+	 */
 
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
 
@@ -303,7 +430,7 @@
 	intel_crtc->pch_fifo_underrun_disabled = !enable;
 
 	if (HAS_PCH_IBX(dev))
-		ibx_set_fifo_underrun_reporting(intel_crtc, enable);
+		ibx_set_fifo_underrun_reporting(dev, pch_transcoder, enable);
 	else
 		cpt_set_fifo_underrun_reporting(dev, pch_transcoder, enable);
 
@@ -319,6 +446,8 @@
 	u32 reg = PIPESTAT(pipe);
 	u32 pipestat = I915_READ(reg) & 0x7fff0000;
 
+	assert_spin_locked(&dev_priv->irq_lock);
+
 	if ((pipestat & mask) == mask)
 		return;
 
@@ -334,6 +463,8 @@
 	u32 reg = PIPESTAT(pipe);
 	u32 pipestat = I915_READ(reg) & 0x7fff0000;
 
+	assert_spin_locked(&dev_priv->irq_lock);
+
 	if ((pipestat & mask) == 0)
 		return;
 
@@ -625,14 +756,13 @@
 		drm_kms_helper_hotplug_event(dev);
 }
 
-static void ironlake_handle_rps_change(struct drm_device *dev)
+static void ironlake_rps_change_irq_handler(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	u32 busy_up, busy_down, max_avg, min_avg;
 	u8 new_delay;
-	unsigned long flags;
 
-	spin_lock_irqsave(&mchdev_lock, flags);
+	spin_lock(&mchdev_lock);
 
 	I915_WRITE16(MEMINTRSTS, I915_READ(MEMINTRSTS));
 
@@ -660,7 +790,7 @@
 	if (ironlake_set_drps(dev, new_delay))
 		dev_priv->ips.cur_delay = new_delay;
 
-	spin_unlock_irqrestore(&mchdev_lock, flags);
+	spin_unlock(&mchdev_lock);
 
 	return;
 }
@@ -668,34 +798,31 @@
 static void notify_ring(struct drm_device *dev,
 			struct intel_ring_buffer *ring)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
 	if (ring->obj == NULL)
 		return;
 
 	trace_i915_gem_request_complete(ring, ring->get_seqno(ring, false));
 
 	wake_up_all(&ring->irq_queue);
-	if (i915_enable_hangcheck) {
-		mod_timer(&dev_priv->gpu_error.hangcheck_timer,
-			  round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
-	}
+	i915_queue_hangcheck(dev);
 }
 
 static void gen6_pm_rps_work(struct work_struct *work)
 {
 	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
 						    rps.work);
-	u32 pm_iir, pm_imr;
+	u32 pm_iir;
 	u8 new_delay;
 
-	spin_lock_irq(&dev_priv->rps.lock);
+	spin_lock_irq(&dev_priv->irq_lock);
 	pm_iir = dev_priv->rps.pm_iir;
 	dev_priv->rps.pm_iir = 0;
-	pm_imr = I915_READ(GEN6_PMIMR);
 	/* Make sure not to corrupt PMIMR state used by ringbuffer code */
-	I915_WRITE(GEN6_PMIMR, pm_imr & ~GEN6_PM_RPS_EVENTS);
-	spin_unlock_irq(&dev_priv->rps.lock);
+	snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
+	spin_unlock_irq(&dev_priv->irq_lock);
+
+	/* Make sure we didn't queue anything we're not going to process. */
+	WARN_ON(pm_iir & ~GEN6_PM_RPS_EVENTS);
 
 	if ((pm_iir & GEN6_PM_RPS_EVENTS) == 0)
 		return;
@@ -781,13 +908,12 @@
 	I915_WRITE(GEN7_MISCCPCTL, misccpctl);
 
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
-	dev_priv->gt_irq_mask &= ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
-	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
+	ilk_enable_gt_irq(dev_priv, GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 
 	mutex_unlock(&dev_priv->dev->struct_mutex);
 
-	parity_event[0] = "L3_PARITY_ERROR=1";
+	parity_event[0] = I915_L3_PARITY_UEVENT "=1";
 	parity_event[1] = kasprintf(GFP_KERNEL, "ROW=%d", row);
 	parity_event[2] = kasprintf(GFP_KERNEL, "BANK=%d", bank);
 	parity_event[3] = kasprintf(GFP_KERNEL, "SUBBANK=%d", subbank);
@@ -804,22 +930,31 @@
 	kfree(parity_event[1]);
 }
 
-static void ivybridge_handle_parity_error(struct drm_device *dev)
+static void ivybridge_parity_error_irq_handler(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-	unsigned long flags;
 
 	if (!HAS_L3_GPU_CACHE(dev))
 		return;
 
-	spin_lock_irqsave(&dev_priv->irq_lock, flags);
-	dev_priv->gt_irq_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
-	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
-	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
+	spin_lock(&dev_priv->irq_lock);
+	ilk_disable_gt_irq(dev_priv, GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
+	spin_unlock(&dev_priv->irq_lock);
 
 	queue_work(dev_priv->wq, &dev_priv->l3_parity.error_work);
 }
 
+static void ilk_gt_irq_handler(struct drm_device *dev,
+			       struct drm_i915_private *dev_priv,
+			       u32 gt_iir)
+{
+	if (gt_iir &
+	    (GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT))
+		notify_ring(dev, &dev_priv->ring[RCS]);
+	if (gt_iir & ILK_BSD_USER_INTERRUPT)
+		notify_ring(dev, &dev_priv->ring[VCS]);
+}
+
 static void snb_gt_irq_handler(struct drm_device *dev,
 			       struct drm_i915_private *dev_priv,
 			       u32 gt_iir)
@@ -841,32 +976,7 @@
 	}
 
 	if (gt_iir & GT_RENDER_L3_PARITY_ERROR_INTERRUPT)
-		ivybridge_handle_parity_error(dev);
-}
-
-/* Legacy way of handling PM interrupts */
-static void gen6_queue_rps_work(struct drm_i915_private *dev_priv,
-				u32 pm_iir)
-{
-	unsigned long flags;
-
-	/*
-	 * IIR bits should never already be set because IMR should
-	 * prevent an interrupt from being shown in IIR. The warning
-	 * displays a case where we've unsafely cleared
-	 * dev_priv->rps.pm_iir. Although missing an interrupt of the same
-	 * type is not a problem, it displays a problem in the logic.
-	 *
-	 * The mask bit in IMR is cleared by dev_priv->rps.work.
-	 */
-
-	spin_lock_irqsave(&dev_priv->rps.lock, flags);
-	dev_priv->rps.pm_iir |= pm_iir;
-	I915_WRITE(GEN6_PMIMR, dev_priv->rps.pm_iir);
-	POSTING_READ(GEN6_PMIMR);
-	spin_unlock_irqrestore(&dev_priv->rps.lock, flags);
-
-	queue_work(dev_priv->wq, &dev_priv->rps.work);
+		ivybridge_parity_error_irq_handler(dev);
 }
 
 #define HPD_STORM_DETECT_PERIOD 1000
@@ -886,6 +996,10 @@
 	spin_lock(&dev_priv->irq_lock);
 	for (i = 1; i < HPD_NUM_PINS; i++) {
 
+		WARN(((hpd[i] & hotplug_trigger) &&
+		      dev_priv->hpd_stats[i].hpd_mark != HPD_ENABLED),
+		     "Received HPD interrupt although disabled\n");
+
 		if (!(hpd[i] & hotplug_trigger) ||
 		    dev_priv->hpd_stats[i].hpd_mark != HPD_ENABLED)
 			continue;
@@ -896,6 +1010,7 @@
 				   + msecs_to_jiffies(HPD_STORM_DETECT_PERIOD))) {
 			dev_priv->hpd_stats[i].hpd_last_jiffies = jiffies;
 			dev_priv->hpd_stats[i].hpd_cnt = 0;
+			DRM_DEBUG_KMS("Received HPD interrupt on PIN %d - cnt: 0\n", i);
 		} else if (dev_priv->hpd_stats[i].hpd_cnt > HPD_STORM_THRESHOLD) {
 			dev_priv->hpd_stats[i].hpd_mark = HPD_MARK_DISABLED;
 			dev_priv->hpd_event_bits &= ~(1 << i);
@@ -903,6 +1018,8 @@
 			storm_detected = true;
 		} else {
 			dev_priv->hpd_stats[i].hpd_cnt++;
+			DRM_DEBUG_KMS("Received HPD interrupt on PIN %d - cnt: %d\n", i,
+				      dev_priv->hpd_stats[i].hpd_cnt);
 		}
 	}
 
@@ -928,28 +1045,21 @@
 	wake_up_all(&dev_priv->gmbus_wait_queue);
 }
 
-/* Unlike gen6_queue_rps_work() from which this function is originally derived,
- * we must be able to deal with other PM interrupts. This is complicated because
- * of the way in which we use the masks to defer the RPS work (which for
- * posterity is necessary because of forcewake).
- */
-static void hsw_pm_irq_handler(struct drm_i915_private *dev_priv,
-			       u32 pm_iir)
+/* The RPS events need forcewake, so we add them to a work queue and mask their
+ * IMR bits until the work is done. Other interrupts can be processed without
+ * the work queue. */
+static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
 {
-	unsigned long flags;
+	if (pm_iir & GEN6_PM_RPS_EVENTS) {
+		spin_lock(&dev_priv->irq_lock);
+		dev_priv->rps.pm_iir |= pm_iir & GEN6_PM_RPS_EVENTS;
+		snb_disable_pm_irq(dev_priv, pm_iir & GEN6_PM_RPS_EVENTS);
+		spin_unlock(&dev_priv->irq_lock);
 
-	spin_lock_irqsave(&dev_priv->rps.lock, flags);
-	dev_priv->rps.pm_iir |= pm_iir & GEN6_PM_RPS_EVENTS;
-	if (dev_priv->rps.pm_iir) {
-		I915_WRITE(GEN6_PMIMR, dev_priv->rps.pm_iir);
-		/* never want to mask useful interrupts. (also posting read) */
-		WARN_ON(I915_READ_NOTRACE(GEN6_PMIMR) & ~GEN6_PM_RPS_EVENTS);
-		/* TODO: if queue_work is slow, move it out of the spinlock */
 		queue_work(dev_priv->wq, &dev_priv->rps.work);
 	}
-	spin_unlock_irqrestore(&dev_priv->rps.lock, flags);
 
-	if (pm_iir & ~GEN6_PM_RPS_EVENTS) {
+	if (HAS_VEBOX(dev_priv->dev)) {
 		if (pm_iir & PM_VEBOX_USER_INTERRUPT)
 			notify_ring(dev_priv->dev, &dev_priv->ring[VECS]);
 
@@ -1028,8 +1138,8 @@
 		if (pipe_stats[0] & PIPE_GMBUS_INTERRUPT_STATUS)
 			gmbus_irq_handler(dev);
 
-		if (pm_iir & GEN6_PM_RPS_EVENTS)
-			gen6_queue_rps_work(dev_priv, pm_iir);
+		if (pm_iir)
+			gen6_rps_irq_handler(dev_priv, pm_iir);
 
 		I915_WRITE(GTIIR, gt_iir);
 		I915_WRITE(GEN6_PMIIR, pm_iir);
@@ -1179,163 +1289,9 @@
 		cpt_serr_int_handler(dev);
 }
 
-static irqreturn_t ivybridge_irq_handler(int irq, void *arg)
+static void ilk_display_irq_handler(struct drm_device *dev, u32 de_iir)
 {
-	struct drm_device *dev = (struct drm_device *) arg;
-	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-	u32 de_iir, gt_iir, de_ier, pm_iir, sde_ier = 0;
-	irqreturn_t ret = IRQ_NONE;
-	int i;
-
-	atomic_inc(&dev_priv->irq_received);
-
-	/* We get interrupts on unclaimed registers, so check for this before we
-	 * do any I915_{READ,WRITE}. */
-	if (IS_HASWELL(dev) &&
-	    (I915_READ_NOTRACE(FPGA_DBG) & FPGA_DBG_RM_NOCLAIM)) {
-		DRM_ERROR("Unclaimed register before interrupt\n");
-		I915_WRITE_NOTRACE(FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
-	}
-
-	/* disable master interrupt before clearing iir  */
-	de_ier = I915_READ(DEIER);
-	I915_WRITE(DEIER, de_ier & ~DE_MASTER_IRQ_CONTROL);
-
-	/* Disable south interrupts. We'll only write to SDEIIR once, so further
-	 * interrupts will will be stored on its back queue, and then we'll be
-	 * able to process them after we restore SDEIER (as soon as we restore
-	 * it, we'll get an interrupt if SDEIIR still has something to process
-	 * due to its back queue). */
-	if (!HAS_PCH_NOP(dev)) {
-		sde_ier = I915_READ(SDEIER);
-		I915_WRITE(SDEIER, 0);
-		POSTING_READ(SDEIER);
-	}
-
-	/* On Haswell, also mask ERR_INT because we don't want to risk
-	 * generating "unclaimed register" interrupts from inside the interrupt
-	 * handler. */
-	if (IS_HASWELL(dev)) {
-		spin_lock(&dev_priv->irq_lock);
-		ironlake_disable_display_irq(dev_priv, DE_ERR_INT_IVB);
-		spin_unlock(&dev_priv->irq_lock);
-	}
-
-	gt_iir = I915_READ(GTIIR);
-	if (gt_iir) {
-		snb_gt_irq_handler(dev, dev_priv, gt_iir);
-		I915_WRITE(GTIIR, gt_iir);
-		ret = IRQ_HANDLED;
-	}
-
-	de_iir = I915_READ(DEIIR);
-	if (de_iir) {
-		if (de_iir & DE_ERR_INT_IVB)
-			ivb_err_int_handler(dev);
-
-		if (de_iir & DE_AUX_CHANNEL_A_IVB)
-			dp_aux_irq_handler(dev);
-
-		if (de_iir & DE_GSE_IVB)
-			intel_opregion_asle_intr(dev);
-
-		for (i = 0; i < 3; i++) {
-			if (de_iir & (DE_PIPEA_VBLANK_IVB << (5 * i)))
-				drm_handle_vblank(dev, i);
-			if (de_iir & (DE_PLANEA_FLIP_DONE_IVB << (5 * i))) {
-				intel_prepare_page_flip(dev, i);
-				intel_finish_page_flip_plane(dev, i);
-			}
-		}
-
-		/* check event from PCH */
-		if (!HAS_PCH_NOP(dev) && (de_iir & DE_PCH_EVENT_IVB)) {
-			u32 pch_iir = I915_READ(SDEIIR);
-
-			cpt_irq_handler(dev, pch_iir);
-
-			/* clear PCH hotplug event before clear CPU irq */
-			I915_WRITE(SDEIIR, pch_iir);
-		}
-
-		I915_WRITE(DEIIR, de_iir);
-		ret = IRQ_HANDLED;
-	}
-
-	pm_iir = I915_READ(GEN6_PMIIR);
-	if (pm_iir) {
-		if (IS_HASWELL(dev))
-			hsw_pm_irq_handler(dev_priv, pm_iir);
-		else if (pm_iir & GEN6_PM_RPS_EVENTS)
-			gen6_queue_rps_work(dev_priv, pm_iir);
-		I915_WRITE(GEN6_PMIIR, pm_iir);
-		ret = IRQ_HANDLED;
-	}
-
-	if (IS_HASWELL(dev)) {
-		spin_lock(&dev_priv->irq_lock);
-		if (ivb_can_enable_err_int(dev))
-			ironlake_enable_display_irq(dev_priv, DE_ERR_INT_IVB);
-		spin_unlock(&dev_priv->irq_lock);
-	}
-
-	I915_WRITE(DEIER, de_ier);
-	POSTING_READ(DEIER);
-	if (!HAS_PCH_NOP(dev)) {
-		I915_WRITE(SDEIER, sde_ier);
-		POSTING_READ(SDEIER);
-	}
-
-	return ret;
-}
-
-static void ilk_gt_irq_handler(struct drm_device *dev,
-			       struct drm_i915_private *dev_priv,
-			       u32 gt_iir)
-{
-	if (gt_iir &
-	    (GT_RENDER_USER_INTERRUPT | GT_RENDER_PIPECTL_NOTIFY_INTERRUPT))
-		notify_ring(dev, &dev_priv->ring[RCS]);
-	if (gt_iir & ILK_BSD_USER_INTERRUPT)
-		notify_ring(dev, &dev_priv->ring[VCS]);
-}
-
-static irqreturn_t ironlake_irq_handler(int irq, void *arg)
-{
-	struct drm_device *dev = (struct drm_device *) arg;
-	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-	int ret = IRQ_NONE;
-	u32 de_iir, gt_iir, de_ier, pm_iir, sde_ier;
-
-	atomic_inc(&dev_priv->irq_received);
-
-	/* disable master interrupt before clearing iir  */
-	de_ier = I915_READ(DEIER);
-	I915_WRITE(DEIER, de_ier & ~DE_MASTER_IRQ_CONTROL);
-	POSTING_READ(DEIER);
-
-	/* Disable south interrupts. We'll only write to SDEIIR once, so further
-	 * interrupts will will be stored on its back queue, and then we'll be
-	 * able to process them after we restore SDEIER (as soon as we restore
-	 * it, we'll get an interrupt if SDEIIR still has something to process
-	 * due to its back queue). */
-	sde_ier = I915_READ(SDEIER);
-	I915_WRITE(SDEIER, 0);
-	POSTING_READ(SDEIER);
-
-	de_iir = I915_READ(DEIIR);
-	gt_iir = I915_READ(GTIIR);
-	pm_iir = I915_READ(GEN6_PMIIR);
-
-	if (de_iir == 0 && gt_iir == 0 && (!IS_GEN6(dev) || pm_iir == 0))
-		goto done;
-
-	ret = IRQ_HANDLED;
-
-	if (IS_GEN5(dev))
-		ilk_gt_irq_handler(dev, dev_priv, gt_iir);
-	else
-		snb_gt_irq_handler(dev, dev_priv, gt_iir);
+	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	if (de_iir & DE_AUX_CHANNEL_A)
 		dp_aux_irq_handler(dev);
@@ -1383,21 +1339,127 @@
 		I915_WRITE(SDEIIR, pch_iir);
 	}
 
-	if (IS_GEN5(dev) &&  de_iir & DE_PCU_EVENT)
-		ironlake_handle_rps_change(dev);
+	if (IS_GEN5(dev) && de_iir & DE_PCU_EVENT)
+		ironlake_rps_change_irq_handler(dev);
+}
 
-	if (IS_GEN6(dev) && pm_iir & GEN6_PM_RPS_EVENTS)
-		gen6_queue_rps_work(dev_priv, pm_iir);
+static void ivb_display_irq_handler(struct drm_device *dev, u32 de_iir)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int i;
 
-	I915_WRITE(GTIIR, gt_iir);
-	I915_WRITE(DEIIR, de_iir);
-	I915_WRITE(GEN6_PMIIR, pm_iir);
+	if (de_iir & DE_ERR_INT_IVB)
+		ivb_err_int_handler(dev);
 
-done:
+	if (de_iir & DE_AUX_CHANNEL_A_IVB)
+		dp_aux_irq_handler(dev);
+
+	if (de_iir & DE_GSE_IVB)
+		intel_opregion_asle_intr(dev);
+
+	for (i = 0; i < 3; i++) {
+		if (de_iir & (DE_PIPEA_VBLANK_IVB << (5 * i)))
+			drm_handle_vblank(dev, i);
+		if (de_iir & (DE_PLANEA_FLIP_DONE_IVB << (5 * i))) {
+			intel_prepare_page_flip(dev, i);
+			intel_finish_page_flip_plane(dev, i);
+		}
+	}
+
+	/* check event from PCH */
+	if (!HAS_PCH_NOP(dev) && (de_iir & DE_PCH_EVENT_IVB)) {
+		u32 pch_iir = I915_READ(SDEIIR);
+
+		cpt_irq_handler(dev, pch_iir);
+
+		/* clear PCH hotplug event before clear CPU irq */
+		I915_WRITE(SDEIIR, pch_iir);
+	}
+}
+
+static irqreturn_t ironlake_irq_handler(int irq, void *arg)
+{
+	struct drm_device *dev = (struct drm_device *) arg;
+	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+	u32 de_iir, gt_iir, de_ier, sde_ier = 0;
+	irqreturn_t ret = IRQ_NONE;
+	bool err_int_reenable = false;
+
+	atomic_inc(&dev_priv->irq_received);
+
+	/* We get interrupts on unclaimed registers, so check for this before we
+	 * do any I915_{READ,WRITE}. */
+	intel_uncore_check_errors(dev);
+
+	/* disable master interrupt before clearing iir  */
+	de_ier = I915_READ(DEIER);
+	I915_WRITE(DEIER, de_ier & ~DE_MASTER_IRQ_CONTROL);
+	POSTING_READ(DEIER);
+
+	/* Disable south interrupts. We'll only write to SDEIIR once, so further
+	 * interrupts will will be stored on its back queue, and then we'll be
+	 * able to process them after we restore SDEIER (as soon as we restore
+	 * it, we'll get an interrupt if SDEIIR still has something to process
+	 * due to its back queue). */
+	if (!HAS_PCH_NOP(dev)) {
+		sde_ier = I915_READ(SDEIER);
+		I915_WRITE(SDEIER, 0);
+		POSTING_READ(SDEIER);
+	}
+
+	/* On Haswell, also mask ERR_INT because we don't want to risk
+	 * generating "unclaimed register" interrupts from inside the interrupt
+	 * handler. */
+	if (IS_HASWELL(dev)) {
+		spin_lock(&dev_priv->irq_lock);
+		err_int_reenable = ~dev_priv->irq_mask & DE_ERR_INT_IVB;
+		if (err_int_reenable)
+			ironlake_disable_display_irq(dev_priv, DE_ERR_INT_IVB);
+		spin_unlock(&dev_priv->irq_lock);
+	}
+
+	gt_iir = I915_READ(GTIIR);
+	if (gt_iir) {
+		if (INTEL_INFO(dev)->gen >= 6)
+			snb_gt_irq_handler(dev, dev_priv, gt_iir);
+		else
+			ilk_gt_irq_handler(dev, dev_priv, gt_iir);
+		I915_WRITE(GTIIR, gt_iir);
+		ret = IRQ_HANDLED;
+	}
+
+	de_iir = I915_READ(DEIIR);
+	if (de_iir) {
+		if (INTEL_INFO(dev)->gen >= 7)
+			ivb_display_irq_handler(dev, de_iir);
+		else
+			ilk_display_irq_handler(dev, de_iir);
+		I915_WRITE(DEIIR, de_iir);
+		ret = IRQ_HANDLED;
+	}
+
+	if (INTEL_INFO(dev)->gen >= 6) {
+		u32 pm_iir = I915_READ(GEN6_PMIIR);
+		if (pm_iir) {
+			gen6_rps_irq_handler(dev_priv, pm_iir);
+			I915_WRITE(GEN6_PMIIR, pm_iir);
+			ret = IRQ_HANDLED;
+		}
+	}
+
+	if (err_int_reenable) {
+		spin_lock(&dev_priv->irq_lock);
+		if (ivb_can_enable_err_int(dev))
+			ironlake_enable_display_irq(dev_priv, DE_ERR_INT_IVB);
+		spin_unlock(&dev_priv->irq_lock);
+	}
+
 	I915_WRITE(DEIER, de_ier);
 	POSTING_READ(DEIER);
-	I915_WRITE(SDEIER, sde_ier);
-	POSTING_READ(SDEIER);
+	if (!HAS_PCH_NOP(dev)) {
+		I915_WRITE(SDEIER, sde_ier);
+		POSTING_READ(SDEIER);
+	}
 
 	return ret;
 }
@@ -1417,9 +1479,9 @@
 						    gpu_error);
 	struct drm_device *dev = dev_priv->dev;
 	struct intel_ring_buffer *ring;
-	char *error_event[] = { "ERROR=1", NULL };
-	char *reset_event[] = { "RESET=1", NULL };
-	char *reset_done_event[] = { "ERROR=0", NULL };
+	char *error_event[] = { I915_ERROR_UEVENT "=1", NULL };
+	char *reset_event[] = { I915_RESET_UEVENT "=1", NULL };
+	char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL };
 	int i, ret;
 
 	kobject_uevent_env(&dev->primary->kdev.kobj, KOBJ_CHANGE, error_event);
@@ -1470,535 +1532,6 @@
 	}
 }
 
-/* NB: please notice the memset */
-static void i915_get_extra_instdone(struct drm_device *dev,
-				    uint32_t *instdone)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	memset(instdone, 0, sizeof(*instdone) * I915_NUM_INSTDONE_REG);
-
-	switch(INTEL_INFO(dev)->gen) {
-	case 2:
-	case 3:
-		instdone[0] = I915_READ(INSTDONE);
-		break;
-	case 4:
-	case 5:
-	case 6:
-		instdone[0] = I915_READ(INSTDONE_I965);
-		instdone[1] = I915_READ(INSTDONE1);
-		break;
-	default:
-		WARN_ONCE(1, "Unsupported platform\n");
-	case 7:
-		instdone[0] = I915_READ(GEN7_INSTDONE_1);
-		instdone[1] = I915_READ(GEN7_SC_INSTDONE);
-		instdone[2] = I915_READ(GEN7_SAMPLER_INSTDONE);
-		instdone[3] = I915_READ(GEN7_ROW_INSTDONE);
-		break;
-	}
-}
-
-#ifdef CONFIG_DEBUG_FS
-static struct drm_i915_error_object *
-i915_error_object_create_sized(struct drm_i915_private *dev_priv,
-			       struct drm_i915_gem_object *src,
-			       const int num_pages)
-{
-	struct drm_i915_error_object *dst;
-	int i;
-	u32 reloc_offset;
-
-	if (src == NULL || src->pages == NULL)
-		return NULL;
-
-	dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), GFP_ATOMIC);
-	if (dst == NULL)
-		return NULL;
-
-	reloc_offset = src->gtt_offset;
-	for (i = 0; i < num_pages; i++) {
-		unsigned long flags;
-		void *d;
-
-		d = kmalloc(PAGE_SIZE, GFP_ATOMIC);
-		if (d == NULL)
-			goto unwind;
-
-		local_irq_save(flags);
-		if (reloc_offset < dev_priv->gtt.mappable_end &&
-		    src->has_global_gtt_mapping) {
-			void __iomem *s;
-
-			/* Simply ignore tiling or any overlapping fence.
-			 * It's part of the error state, and this hopefully
-			 * captures what the GPU read.
-			 */
-
-			s = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
-						     reloc_offset);
-			memcpy_fromio(d, s, PAGE_SIZE);
-			io_mapping_unmap_atomic(s);
-		} else if (src->stolen) {
-			unsigned long offset;
-
-			offset = dev_priv->mm.stolen_base;
-			offset += src->stolen->start;
-			offset += i << PAGE_SHIFT;
-
-			memcpy_fromio(d, (void __iomem *) offset, PAGE_SIZE);
-		} else {
-			struct page *page;
-			void *s;
-
-			page = i915_gem_object_get_page(src, i);
-
-			drm_clflush_pages(&page, 1);
-
-			s = kmap_atomic(page);
-			memcpy(d, s, PAGE_SIZE);
-			kunmap_atomic(s);
-
-			drm_clflush_pages(&page, 1);
-		}
-		local_irq_restore(flags);
-
-		dst->pages[i] = d;
-
-		reloc_offset += PAGE_SIZE;
-	}
-	dst->page_count = num_pages;
-	dst->gtt_offset = src->gtt_offset;
-
-	return dst;
-
-unwind:
-	while (i--)
-		kfree(dst->pages[i]);
-	kfree(dst);
-	return NULL;
-}
-#define i915_error_object_create(dev_priv, src) \
-	i915_error_object_create_sized((dev_priv), (src), \
-				       (src)->base.size>>PAGE_SHIFT)
-
-static void
-i915_error_object_free(struct drm_i915_error_object *obj)
-{
-	int page;
-
-	if (obj == NULL)
-		return;
-
-	for (page = 0; page < obj->page_count; page++)
-		kfree(obj->pages[page]);
-
-	kfree(obj);
-}
-
-void
-i915_error_state_free(struct kref *error_ref)
-{
-	struct drm_i915_error_state *error = container_of(error_ref,
-							  typeof(*error), ref);
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(error->ring); i++) {
-		i915_error_object_free(error->ring[i].batchbuffer);
-		i915_error_object_free(error->ring[i].ringbuffer);
-		i915_error_object_free(error->ring[i].ctx);
-		kfree(error->ring[i].requests);
-	}
-
-	kfree(error->active_bo);
-	kfree(error->overlay);
-	kfree(error->display);
-	kfree(error);
-}
-static void capture_bo(struct drm_i915_error_buffer *err,
-		       struct drm_i915_gem_object *obj)
-{
-	err->size = obj->base.size;
-	err->name = obj->base.name;
-	err->rseqno = obj->last_read_seqno;
-	err->wseqno = obj->last_write_seqno;
-	err->gtt_offset = obj->gtt_offset;
-	err->read_domains = obj->base.read_domains;
-	err->write_domain = obj->base.write_domain;
-	err->fence_reg = obj->fence_reg;
-	err->pinned = 0;
-	if (obj->pin_count > 0)
-		err->pinned = 1;
-	if (obj->user_pin_count > 0)
-		err->pinned = -1;
-	err->tiling = obj->tiling_mode;
-	err->dirty = obj->dirty;
-	err->purgeable = obj->madv != I915_MADV_WILLNEED;
-	err->ring = obj->ring ? obj->ring->id : -1;
-	err->cache_level = obj->cache_level;
-}
-
-static u32 capture_active_bo(struct drm_i915_error_buffer *err,
-			     int count, struct list_head *head)
-{
-	struct drm_i915_gem_object *obj;
-	int i = 0;
-
-	list_for_each_entry(obj, head, mm_list) {
-		capture_bo(err++, obj);
-		if (++i == count)
-			break;
-	}
-
-	return i;
-}
-
-static u32 capture_pinned_bo(struct drm_i915_error_buffer *err,
-			     int count, struct list_head *head)
-{
-	struct drm_i915_gem_object *obj;
-	int i = 0;
-
-	list_for_each_entry(obj, head, global_list) {
-		if (obj->pin_count == 0)
-			continue;
-
-		capture_bo(err++, obj);
-		if (++i == count)
-			break;
-	}
-
-	return i;
-}
-
-static void i915_gem_record_fences(struct drm_device *dev,
-				   struct drm_i915_error_state *error)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int i;
-
-	/* Fences */
-	switch (INTEL_INFO(dev)->gen) {
-	case 7:
-	case 6:
-		for (i = 0; i < dev_priv->num_fence_regs; i++)
-			error->fence[i] = I915_READ64(FENCE_REG_SANDYBRIDGE_0 + (i * 8));
-		break;
-	case 5:
-	case 4:
-		for (i = 0; i < 16; i++)
-			error->fence[i] = I915_READ64(FENCE_REG_965_0 + (i * 8));
-		break;
-	case 3:
-		if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
-			for (i = 0; i < 8; i++)
-				error->fence[i+8] = I915_READ(FENCE_REG_945_8 + (i * 4));
-	case 2:
-		for (i = 0; i < 8; i++)
-			error->fence[i] = I915_READ(FENCE_REG_830_0 + (i * 4));
-		break;
-
-	default:
-		BUG();
-	}
-}
-
-static struct drm_i915_error_object *
-i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
-			     struct intel_ring_buffer *ring)
-{
-	struct drm_i915_gem_object *obj;
-	u32 seqno;
-
-	if (!ring->get_seqno)
-		return NULL;
-
-	if (HAS_BROKEN_CS_TLB(dev_priv->dev)) {
-		u32 acthd = I915_READ(ACTHD);
-
-		if (WARN_ON(ring->id != RCS))
-			return NULL;
-
-		obj = ring->private;
-		if (acthd >= obj->gtt_offset &&
-		    acthd < obj->gtt_offset + obj->base.size)
-			return i915_error_object_create(dev_priv, obj);
-	}
-
-	seqno = ring->get_seqno(ring, false);
-	list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) {
-		if (obj->ring != ring)
-			continue;
-
-		if (i915_seqno_passed(seqno, obj->last_read_seqno))
-			continue;
-
-		if ((obj->base.read_domains & I915_GEM_DOMAIN_COMMAND) == 0)
-			continue;
-
-		/* We need to copy these to an anonymous buffer as the simplest
-		 * method to avoid being overwritten by userspace.
-		 */
-		return i915_error_object_create(dev_priv, obj);
-	}
-
-	return NULL;
-}
-
-static void i915_record_ring_state(struct drm_device *dev,
-				   struct drm_i915_error_state *error,
-				   struct intel_ring_buffer *ring)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	if (INTEL_INFO(dev)->gen >= 6) {
-		error->rc_psmi[ring->id] = I915_READ(ring->mmio_base + 0x50);
-		error->fault_reg[ring->id] = I915_READ(RING_FAULT_REG(ring));
-		error->semaphore_mboxes[ring->id][0]
-			= I915_READ(RING_SYNC_0(ring->mmio_base));
-		error->semaphore_mboxes[ring->id][1]
-			= I915_READ(RING_SYNC_1(ring->mmio_base));
-		error->semaphore_seqno[ring->id][0] = ring->sync_seqno[0];
-		error->semaphore_seqno[ring->id][1] = ring->sync_seqno[1];
-	}
-
-	if (INTEL_INFO(dev)->gen >= 4) {
-		error->faddr[ring->id] = I915_READ(RING_DMA_FADD(ring->mmio_base));
-		error->ipeir[ring->id] = I915_READ(RING_IPEIR(ring->mmio_base));
-		error->ipehr[ring->id] = I915_READ(RING_IPEHR(ring->mmio_base));
-		error->instdone[ring->id] = I915_READ(RING_INSTDONE(ring->mmio_base));
-		error->instps[ring->id] = I915_READ(RING_INSTPS(ring->mmio_base));
-		if (ring->id == RCS)
-			error->bbaddr = I915_READ64(BB_ADDR);
-	} else {
-		error->faddr[ring->id] = I915_READ(DMA_FADD_I8XX);
-		error->ipeir[ring->id] = I915_READ(IPEIR);
-		error->ipehr[ring->id] = I915_READ(IPEHR);
-		error->instdone[ring->id] = I915_READ(INSTDONE);
-	}
-
-	error->waiting[ring->id] = waitqueue_active(&ring->irq_queue);
-	error->instpm[ring->id] = I915_READ(RING_INSTPM(ring->mmio_base));
-	error->seqno[ring->id] = ring->get_seqno(ring, false);
-	error->acthd[ring->id] = intel_ring_get_active_head(ring);
-	error->head[ring->id] = I915_READ_HEAD(ring);
-	error->tail[ring->id] = I915_READ_TAIL(ring);
-	error->ctl[ring->id] = I915_READ_CTL(ring);
-
-	error->cpu_ring_head[ring->id] = ring->head;
-	error->cpu_ring_tail[ring->id] = ring->tail;
-}
-
-
-static void i915_gem_record_active_context(struct intel_ring_buffer *ring,
-					   struct drm_i915_error_state *error,
-					   struct drm_i915_error_ring *ering)
-{
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-	struct drm_i915_gem_object *obj;
-
-	/* Currently render ring is the only HW context user */
-	if (ring->id != RCS || !error->ccid)
-		return;
-
-	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
-		if ((error->ccid & PAGE_MASK) == obj->gtt_offset) {
-			ering->ctx = i915_error_object_create_sized(dev_priv,
-								    obj, 1);
-		}
-	}
-}
-
-static void i915_gem_record_rings(struct drm_device *dev,
-				  struct drm_i915_error_state *error)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_ring_buffer *ring;
-	struct drm_i915_gem_request *request;
-	int i, count;
-
-	for_each_ring(ring, dev_priv, i) {
-		i915_record_ring_state(dev, error, ring);
-
-		error->ring[i].batchbuffer =
-			i915_error_first_batchbuffer(dev_priv, ring);
-
-		error->ring[i].ringbuffer =
-			i915_error_object_create(dev_priv, ring->obj);
-
-
-		i915_gem_record_active_context(ring, error, &error->ring[i]);
-
-		count = 0;
-		list_for_each_entry(request, &ring->request_list, list)
-			count++;
-
-		error->ring[i].num_requests = count;
-		error->ring[i].requests =
-			kmalloc(count*sizeof(struct drm_i915_error_request),
-				GFP_ATOMIC);
-		if (error->ring[i].requests == NULL) {
-			error->ring[i].num_requests = 0;
-			continue;
-		}
-
-		count = 0;
-		list_for_each_entry(request, &ring->request_list, list) {
-			struct drm_i915_error_request *erq;
-
-			erq = &error->ring[i].requests[count++];
-			erq->seqno = request->seqno;
-			erq->jiffies = request->emitted_jiffies;
-			erq->tail = request->tail;
-		}
-	}
-}
-
-/**
- * i915_capture_error_state - capture an error record for later analysis
- * @dev: drm device
- *
- * Should be called when an error is detected (either a hang or an error
- * interrupt) to capture error state from the time of the error.  Fills
- * out a structure which becomes available in debugfs for user level tools
- * to pick up.
- */
-static void i915_capture_error_state(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_gem_object *obj;
-	struct drm_i915_error_state *error;
-	unsigned long flags;
-	int i, pipe;
-
-	spin_lock_irqsave(&dev_priv->gpu_error.lock, flags);
-	error = dev_priv->gpu_error.first_error;
-	spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags);
-	if (error)
-		return;
-
-	/* Account for pipe specific data like PIPE*STAT */
-	error = kzalloc(sizeof(*error), GFP_ATOMIC);
-	if (!error) {
-		DRM_DEBUG_DRIVER("out of memory, not capturing error state\n");
-		return;
-	}
-
-	DRM_INFO("capturing error event; look for more information in "
-		 "/sys/kernel/debug/dri/%d/i915_error_state\n",
-		 dev->primary->index);
-
-	kref_init(&error->ref);
-	error->eir = I915_READ(EIR);
-	error->pgtbl_er = I915_READ(PGTBL_ER);
-	if (HAS_HW_CONTEXTS(dev))
-		error->ccid = I915_READ(CCID);
-
-	if (HAS_PCH_SPLIT(dev))
-		error->ier = I915_READ(DEIER) | I915_READ(GTIER);
-	else if (IS_VALLEYVIEW(dev))
-		error->ier = I915_READ(GTIER) | I915_READ(VLV_IER);
-	else if (IS_GEN2(dev))
-		error->ier = I915_READ16(IER);
-	else
-		error->ier = I915_READ(IER);
-
-	if (INTEL_INFO(dev)->gen >= 6)
-		error->derrmr = I915_READ(DERRMR);
-
-	if (IS_VALLEYVIEW(dev))
-		error->forcewake = I915_READ(FORCEWAKE_VLV);
-	else if (INTEL_INFO(dev)->gen >= 7)
-		error->forcewake = I915_READ(FORCEWAKE_MT);
-	else if (INTEL_INFO(dev)->gen == 6)
-		error->forcewake = I915_READ(FORCEWAKE);
-
-	if (!HAS_PCH_SPLIT(dev))
-		for_each_pipe(pipe)
-			error->pipestat[pipe] = I915_READ(PIPESTAT(pipe));
-
-	if (INTEL_INFO(dev)->gen >= 6) {
-		error->error = I915_READ(ERROR_GEN6);
-		error->done_reg = I915_READ(DONE_REG);
-	}
-
-	if (INTEL_INFO(dev)->gen == 7)
-		error->err_int = I915_READ(GEN7_ERR_INT);
-
-	i915_get_extra_instdone(dev, error->extra_instdone);
-
-	i915_gem_record_fences(dev, error);
-	i915_gem_record_rings(dev, error);
-
-	/* Record buffers on the active and pinned lists. */
-	error->active_bo = NULL;
-	error->pinned_bo = NULL;
-
-	i = 0;
-	list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list)
-		i++;
-	error->active_bo_count = i;
-	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
-		if (obj->pin_count)
-			i++;
-	error->pinned_bo_count = i - error->active_bo_count;
-
-	error->active_bo = NULL;
-	error->pinned_bo = NULL;
-	if (i) {
-		error->active_bo = kmalloc(sizeof(*error->active_bo)*i,
-					   GFP_ATOMIC);
-		if (error->active_bo)
-			error->pinned_bo =
-				error->active_bo + error->active_bo_count;
-	}
-
-	if (error->active_bo)
-		error->active_bo_count =
-			capture_active_bo(error->active_bo,
-					  error->active_bo_count,
-					  &dev_priv->mm.active_list);
-
-	if (error->pinned_bo)
-		error->pinned_bo_count =
-			capture_pinned_bo(error->pinned_bo,
-					  error->pinned_bo_count,
-					  &dev_priv->mm.bound_list);
-
-	do_gettimeofday(&error->time);
-
-	error->overlay = intel_overlay_capture_error_state(dev);
-	error->display = intel_display_capture_error_state(dev);
-
-	spin_lock_irqsave(&dev_priv->gpu_error.lock, flags);
-	if (dev_priv->gpu_error.first_error == NULL) {
-		dev_priv->gpu_error.first_error = error;
-		error = NULL;
-	}
-	spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags);
-
-	if (error)
-		i915_error_state_free(&error->ref);
-}
-
-void i915_destroy_error_state(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_i915_error_state *error;
-	unsigned long flags;
-
-	spin_lock_irqsave(&dev_priv->gpu_error.lock, flags);
-	error = dev_priv->gpu_error.first_error;
-	dev_priv->gpu_error.first_error = NULL;
-	spin_unlock_irqrestore(&dev_priv->gpu_error.lock, flags);
-
-	if (error)
-		kref_put(&error->ref, i915_error_state_free);
-}
-#else
-#define i915_capture_error_state(x)
-#endif
-
 static void i915_report_and_clear_eir(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -2155,10 +1688,10 @@
 	if (INTEL_INFO(dev)->gen >= 4) {
 		int dspsurf = DSPSURF(intel_crtc->plane);
 		stall_detected = I915_HI_DISPBASE(I915_READ(dspsurf)) ==
-					obj->gtt_offset;
+					i915_gem_obj_ggtt_offset(obj);
 	} else {
 		int dspaddr = DSPADDR(intel_crtc->plane);
-		stall_detected = I915_READ(dspaddr) == (obj->gtt_offset +
+		stall_detected = I915_READ(dspaddr) == (i915_gem_obj_ggtt_offset(obj) +
 							crtc->y * crtc->fb->pitches[0] +
 							crtc->x * crtc->fb->bits_per_pixel/8);
 	}
@@ -2202,29 +1735,14 @@
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	unsigned long irqflags;
+	uint32_t bit = (INTEL_INFO(dev)->gen >= 7) ? DE_PIPE_VBLANK_IVB(pipe) :
+						     DE_PIPE_VBLANK_ILK(pipe);
 
 	if (!i915_pipe_enabled(dev, pipe))
 		return -EINVAL;
 
 	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-	ironlake_enable_display_irq(dev_priv, (pipe == 0) ?
-				    DE_PIPEA_VBLANK : DE_PIPEB_VBLANK);
-	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
-
-	return 0;
-}
-
-static int ivybridge_enable_vblank(struct drm_device *dev, int pipe)
-{
-	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-	unsigned long irqflags;
-
-	if (!i915_pipe_enabled(dev, pipe))
-		return -EINVAL;
-
-	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-	ironlake_enable_display_irq(dev_priv,
-				    DE_PIPEA_VBLANK_IVB << (5 * pipe));
+	ironlake_enable_display_irq(dev_priv, bit);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 
 	return 0;
@@ -2275,21 +1793,11 @@
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	unsigned long irqflags;
+	uint32_t bit = (INTEL_INFO(dev)->gen >= 7) ? DE_PIPE_VBLANK_IVB(pipe) :
+						     DE_PIPE_VBLANK_ILK(pipe);
 
 	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-	ironlake_disable_display_irq(dev_priv, (pipe == 0) ?
-				     DE_PIPEA_VBLANK : DE_PIPEB_VBLANK);
-	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
-}
-
-static void ivybridge_disable_vblank(struct drm_device *dev, int pipe)
-{
-	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-	unsigned long irqflags;
-
-	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-	ironlake_disable_display_irq(dev_priv,
-				     DE_PIPEA_VBLANK_IVB << (pipe * 5));
+	ironlake_disable_display_irq(dev_priv, bit);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 }
 
@@ -2392,10 +1900,10 @@
 	u32 tmp;
 
 	if (ring->hangcheck.acthd != acthd)
-		return active;
+		return HANGCHECK_ACTIVE;
 
 	if (IS_GEN2(dev))
-		return hung;
+		return HANGCHECK_HUNG;
 
 	/* Is the chip hanging on a WAIT_FOR_EVENT?
 	 * If so we can simply poke the RB_WAIT bit
@@ -2407,24 +1915,24 @@
 		DRM_ERROR("Kicking stuck wait on %s\n",
 			  ring->name);
 		I915_WRITE_CTL(ring, tmp);
-		return kick;
+		return HANGCHECK_KICK;
 	}
 
 	if (INTEL_INFO(dev)->gen >= 6 && tmp & RING_WAIT_SEMAPHORE) {
 		switch (semaphore_passed(ring)) {
 		default:
-			return hung;
+			return HANGCHECK_HUNG;
 		case 1:
 			DRM_ERROR("Kicking stuck semaphore on %s\n",
 				  ring->name);
 			I915_WRITE_CTL(ring, tmp);
-			return kick;
+			return HANGCHECK_KICK;
 		case 0:
-			return wait;
+			return HANGCHECK_WAIT;
 		}
 	}
 
-	return hung;
+	return HANGCHECK_HUNG;
 }
 
 /**
@@ -2435,7 +1943,7 @@
  * we kick the ring. If we see no progress on three subsequent calls
  * we assume chip is wedged and try to fix it by resetting the chip.
  */
-void i915_hangcheck_elapsed(unsigned long data)
+static void i915_hangcheck_elapsed(unsigned long data)
 {
 	struct drm_device *dev = (struct drm_device *)data;
 	drm_i915_private_t *dev_priv = dev->dev_private;
@@ -2471,8 +1979,6 @@
 				} else
 					busy = false;
 			} else {
-				int score;
-
 				/* We always increment the hangcheck score
 				 * if the ring is busy and still processing
 				 * the same request, so that no single request
@@ -2492,21 +1998,19 @@
 								    acthd);
 
 				switch (ring->hangcheck.action) {
-				case wait:
-					score = 0;
+				case HANGCHECK_WAIT:
 					break;
-				case active:
-					score = BUSY;
+				case HANGCHECK_ACTIVE:
+					ring->hangcheck.score += BUSY;
 					break;
-				case kick:
-					score = KICK;
+				case HANGCHECK_KICK:
+					ring->hangcheck.score += KICK;
 					break;
-				case hung:
-					score = HUNG;
+				case HANGCHECK_HUNG:
+					ring->hangcheck.score += HUNG;
 					stuck[i] = true;
 					break;
 				}
-				ring->hangcheck.score += score;
 			}
 		} else {
 			/* Gradually reduce the count so that we catch DoS
@@ -2536,9 +2040,17 @@
 	if (busy_count)
 		/* Reset timer case chip hangs without another request
 		 * being added */
-		mod_timer(&dev_priv->gpu_error.hangcheck_timer,
-			  round_jiffies_up(jiffies +
-					   DRM_I915_HANGCHECK_JIFFIES));
+		i915_queue_hangcheck(dev);
+}
+
+void i915_queue_hangcheck(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	if (!i915_enable_hangcheck)
+		return;
+
+	mod_timer(&dev_priv->gpu_error.hangcheck_timer,
+		  round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES));
 }
 
 static void ibx_irq_preinstall(struct drm_device *dev)
@@ -2560,6 +2072,23 @@
 	POSTING_READ(SDEIER);
 }
 
+static void gen5_gt_irq_preinstall(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	/* and GT */
+	I915_WRITE(GTIMR, 0xffffffff);
+	I915_WRITE(GTIER, 0x0);
+	POSTING_READ(GTIER);
+
+	if (INTEL_INFO(dev)->gen >= 6) {
+		/* and PM */
+		I915_WRITE(GEN6_PMIMR, 0xffffffff);
+		I915_WRITE(GEN6_PMIER, 0x0);
+		POSTING_READ(GEN6_PMIER);
+	}
+}
+
 /* drm_dma.h hooks
 */
 static void ironlake_irq_preinstall(struct drm_device *dev)
@@ -2570,43 +2099,11 @@
 
 	I915_WRITE(HWSTAM, 0xeffe);
 
-	/* XXX hotplug from PCH */
-
 	I915_WRITE(DEIMR, 0xffffffff);
 	I915_WRITE(DEIER, 0x0);
 	POSTING_READ(DEIER);
 
-	/* and GT */
-	I915_WRITE(GTIMR, 0xffffffff);
-	I915_WRITE(GTIER, 0x0);
-	POSTING_READ(GTIER);
-
-	ibx_irq_preinstall(dev);
-}
-
-static void ivybridge_irq_preinstall(struct drm_device *dev)
-{
-	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-
-	atomic_set(&dev_priv->irq_received, 0);
-
-	I915_WRITE(HWSTAM, 0xeffe);
-
-	/* XXX hotplug from PCH */
-
-	I915_WRITE(DEIMR, 0xffffffff);
-	I915_WRITE(DEIER, 0x0);
-	POSTING_READ(DEIER);
-
-	/* and GT */
-	I915_WRITE(GTIMR, 0xffffffff);
-	I915_WRITE(GTIER, 0x0);
-	POSTING_READ(GTIER);
-
-	/* Power management */
-	I915_WRITE(GEN6_PMIMR, 0xffffffff);
-	I915_WRITE(GEN6_PMIER, 0x0);
-	POSTING_READ(GEN6_PMIER);
+	gen5_gt_irq_preinstall(dev);
 
 	ibx_irq_preinstall(dev);
 }
@@ -2627,9 +2124,8 @@
 	/* and GT */
 	I915_WRITE(GTIIR, I915_READ(GTIIR));
 	I915_WRITE(GTIIR, I915_READ(GTIIR));
-	I915_WRITE(GTIMR, 0xffffffff);
-	I915_WRITE(GTIER, 0x0);
-	POSTING_READ(GTIER);
+
+	gen5_gt_irq_preinstall(dev);
 
 	I915_WRITE(DPINVGTT, 0xff);
 
@@ -2648,22 +2144,21 @@
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	struct drm_mode_config *mode_config = &dev->mode_config;
 	struct intel_encoder *intel_encoder;
-	u32 mask = ~I915_READ(SDEIMR);
-	u32 hotplug;
+	u32 hotplug_irqs, hotplug, enabled_irqs = 0;
 
 	if (HAS_PCH_IBX(dev)) {
-		mask &= ~SDE_HOTPLUG_MASK;
+		hotplug_irqs = SDE_HOTPLUG_MASK;
 		list_for_each_entry(intel_encoder, &mode_config->encoder_list, base.head)
 			if (dev_priv->hpd_stats[intel_encoder->hpd_pin].hpd_mark == HPD_ENABLED)
-				mask |= hpd_ibx[intel_encoder->hpd_pin];
+				enabled_irqs |= hpd_ibx[intel_encoder->hpd_pin];
 	} else {
-		mask &= ~SDE_HOTPLUG_MASK_CPT;
+		hotplug_irqs = SDE_HOTPLUG_MASK_CPT;
 		list_for_each_entry(intel_encoder, &mode_config->encoder_list, base.head)
 			if (dev_priv->hpd_stats[intel_encoder->hpd_pin].hpd_mark == HPD_ENABLED)
-				mask |= hpd_cpt[intel_encoder->hpd_pin];
+				enabled_irqs |= hpd_cpt[intel_encoder->hpd_pin];
 	}
 
-	I915_WRITE(SDEIMR, ~mask);
+	ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs);
 
 	/*
 	 * Enable digital hotplug on the PCH, and configure the DP short pulse
@@ -2700,42 +2195,80 @@
 	I915_WRITE(SDEIMR, ~mask);
 }
 
+static void gen5_gt_irq_postinstall(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 pm_irqs, gt_irqs;
+
+	pm_irqs = gt_irqs = 0;
+
+	dev_priv->gt_irq_mask = ~0;
+	if (HAS_L3_GPU_CACHE(dev)) {
+		/* L3 parity interrupt is always unmasked. */
+		dev_priv->gt_irq_mask = ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
+		gt_irqs |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
+	}
+
+	gt_irqs |= GT_RENDER_USER_INTERRUPT;
+	if (IS_GEN5(dev)) {
+		gt_irqs |= GT_RENDER_PIPECTL_NOTIFY_INTERRUPT |
+			   ILK_BSD_USER_INTERRUPT;
+	} else {
+		gt_irqs |= GT_BLT_USER_INTERRUPT | GT_BSD_USER_INTERRUPT;
+	}
+
+	I915_WRITE(GTIIR, I915_READ(GTIIR));
+	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
+	I915_WRITE(GTIER, gt_irqs);
+	POSTING_READ(GTIER);
+
+	if (INTEL_INFO(dev)->gen >= 6) {
+		pm_irqs |= GEN6_PM_RPS_EVENTS;
+
+		if (HAS_VEBOX(dev))
+			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
+
+		dev_priv->pm_irq_mask = 0xffffffff;
+		I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
+		I915_WRITE(GEN6_PMIMR, dev_priv->pm_irq_mask);
+		I915_WRITE(GEN6_PMIER, pm_irqs);
+		POSTING_READ(GEN6_PMIER);
+	}
+}
+
 static int ironlake_irq_postinstall(struct drm_device *dev)
 {
 	unsigned long irqflags;
-
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-	/* enable kind of interrupts always enabled */
-	u32 display_mask = DE_MASTER_IRQ_CONTROL | DE_GSE | DE_PCH_EVENT |
-			   DE_PLANEA_FLIP_DONE | DE_PLANEB_FLIP_DONE |
-			   DE_AUX_CHANNEL_A | DE_PIPEB_FIFO_UNDERRUN |
-			   DE_PIPEA_FIFO_UNDERRUN | DE_POISON;
-	u32 gt_irqs;
+	u32 display_mask, extra_mask;
+
+	if (INTEL_INFO(dev)->gen >= 7) {
+		display_mask = (DE_MASTER_IRQ_CONTROL | DE_GSE_IVB |
+				DE_PCH_EVENT_IVB | DE_PLANEC_FLIP_DONE_IVB |
+				DE_PLANEB_FLIP_DONE_IVB |
+				DE_PLANEA_FLIP_DONE_IVB | DE_AUX_CHANNEL_A_IVB |
+				DE_ERR_INT_IVB);
+		extra_mask = (DE_PIPEC_VBLANK_IVB | DE_PIPEB_VBLANK_IVB |
+			      DE_PIPEA_VBLANK_IVB);
+
+		I915_WRITE(GEN7_ERR_INT, I915_READ(GEN7_ERR_INT));
+	} else {
+		display_mask = (DE_MASTER_IRQ_CONTROL | DE_GSE | DE_PCH_EVENT |
+				DE_PLANEA_FLIP_DONE | DE_PLANEB_FLIP_DONE |
+				DE_AUX_CHANNEL_A | DE_PIPEB_FIFO_UNDERRUN |
+				DE_PIPEA_FIFO_UNDERRUN | DE_POISON);
+		extra_mask = DE_PIPEA_VBLANK | DE_PIPEB_VBLANK | DE_PCU_EVENT;
+	}
 
 	dev_priv->irq_mask = ~display_mask;
 
 	/* should always can generate irq */
 	I915_WRITE(DEIIR, I915_READ(DEIIR));
 	I915_WRITE(DEIMR, dev_priv->irq_mask);
-	I915_WRITE(DEIER, display_mask |
-			  DE_PIPEA_VBLANK | DE_PIPEB_VBLANK | DE_PCU_EVENT);
+	I915_WRITE(DEIER, display_mask | extra_mask);
 	POSTING_READ(DEIER);
 
-	dev_priv->gt_irq_mask = ~0;
-
-	I915_WRITE(GTIIR, I915_READ(GTIIR));
-	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
-
-	gt_irqs = GT_RENDER_USER_INTERRUPT;
-
-	if (IS_GEN6(dev))
-		gt_irqs |= GT_BLT_USER_INTERRUPT | GT_BSD_USER_INTERRUPT;
-	else
-		gt_irqs |= GT_RENDER_PIPECTL_NOTIFY_INTERRUPT |
-			   ILK_BSD_USER_INTERRUPT;
-
-	I915_WRITE(GTIER, gt_irqs);
-	POSTING_READ(GTIER);
+	gen5_gt_irq_postinstall(dev);
 
 	ibx_irq_postinstall(dev);
 
@@ -2753,70 +2286,12 @@
 	return 0;
 }
 
-static int ivybridge_irq_postinstall(struct drm_device *dev)
-{
-	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-	/* enable kind of interrupts always enabled */
-	u32 display_mask =
-		DE_MASTER_IRQ_CONTROL | DE_GSE_IVB | DE_PCH_EVENT_IVB |
-		DE_PLANEC_FLIP_DONE_IVB |
-		DE_PLANEB_FLIP_DONE_IVB |
-		DE_PLANEA_FLIP_DONE_IVB |
-		DE_AUX_CHANNEL_A_IVB |
-		DE_ERR_INT_IVB;
-	u32 pm_irqs = GEN6_PM_RPS_EVENTS;
-	u32 gt_irqs;
-
-	dev_priv->irq_mask = ~display_mask;
-
-	/* should always can generate irq */
-	I915_WRITE(GEN7_ERR_INT, I915_READ(GEN7_ERR_INT));
-	I915_WRITE(DEIIR, I915_READ(DEIIR));
-	I915_WRITE(DEIMR, dev_priv->irq_mask);
-	I915_WRITE(DEIER,
-		   display_mask |
-		   DE_PIPEC_VBLANK_IVB |
-		   DE_PIPEB_VBLANK_IVB |
-		   DE_PIPEA_VBLANK_IVB);
-	POSTING_READ(DEIER);
-
-	dev_priv->gt_irq_mask = ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
-
-	I915_WRITE(GTIIR, I915_READ(GTIIR));
-	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
-
-	gt_irqs = GT_RENDER_USER_INTERRUPT | GT_BSD_USER_INTERRUPT |
-		  GT_BLT_USER_INTERRUPT | GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
-	I915_WRITE(GTIER, gt_irqs);
-	POSTING_READ(GTIER);
-
-	I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
-	if (HAS_VEBOX(dev))
-		pm_irqs |= PM_VEBOX_USER_INTERRUPT |
-			PM_VEBOX_CS_ERROR_INTERRUPT;
-
-	/* Our enable/disable rps functions may touch these registers so
-	 * make sure to set a known state for only the non-RPS bits.
-	 * The RMW is extra paranoia since this should be called after being set
-	 * to a known state in preinstall.
-	 * */
-	I915_WRITE(GEN6_PMIMR,
-		   (I915_READ(GEN6_PMIMR) | ~GEN6_PM_RPS_EVENTS) & ~pm_irqs);
-	I915_WRITE(GEN6_PMIER,
-		   (I915_READ(GEN6_PMIER) & GEN6_PM_RPS_EVENTS) | pm_irqs);
-	POSTING_READ(GEN6_PMIER);
-
-	ibx_irq_postinstall(dev);
-
-	return 0;
-}
-
 static int valleyview_irq_postinstall(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
-	u32 gt_irqs;
 	u32 enable_mask;
 	u32 pipestat_enable = PLANE_FLIP_DONE_INT_EN_VLV;
+	unsigned long irqflags;
 
 	enable_mask = I915_DISPLAY_PORT_INTERRUPT;
 	enable_mask |= I915_DISPLAY_PIPE_A_EVENT_INTERRUPT |
@@ -2842,20 +2317,18 @@
 	I915_WRITE(PIPESTAT(1), 0xffff);
 	POSTING_READ(VLV_IER);
 
+	/* Interrupt setup is already guaranteed to be single-threaded, this is
+	 * just to make the assert_spin_locked check happy. */
+	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
 	i915_enable_pipestat(dev_priv, 0, pipestat_enable);
 	i915_enable_pipestat(dev_priv, 0, PIPE_GMBUS_EVENT_ENABLE);
 	i915_enable_pipestat(dev_priv, 1, pipestat_enable);
+	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 
 	I915_WRITE(VLV_IIR, 0xffffffff);
 	I915_WRITE(VLV_IIR, 0xffffffff);
 
-	I915_WRITE(GTIIR, I915_READ(GTIIR));
-	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
-
-	gt_irqs = GT_RENDER_USER_INTERRUPT | GT_BSD_USER_INTERRUPT |
-		GT_BLT_USER_INTERRUPT;
-	I915_WRITE(GTIER, gt_irqs);
-	POSTING_READ(GTIER);
+	gen5_gt_irq_postinstall(dev);
 
 	/* ack & enable invalid PTE error interrupts */
 #if 0 /* FIXME: add support to irq handler for checking these bits */
@@ -3001,7 +2474,6 @@
 	u16 iir, new_iir;
 	u32 pipe_stats[2];
 	unsigned long irqflags;
-	int irq_received;
 	int pipe;
 	u16 flip_mask =
 		I915_DISPLAY_PLANE_A_FLIP_PENDING_INTERRUPT |
@@ -3035,7 +2507,6 @@
 					DRM_DEBUG_DRIVER("pipe %c underrun\n",
 							 pipe_name(pipe));
 				I915_WRITE(reg, pipe_stats[pipe]);
-				irq_received = 1;
 			}
 		}
 		spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
@@ -3323,6 +2794,7 @@
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	u32 enable_mask;
 	u32 error_mask;
+	unsigned long irqflags;
 
 	/* Unmask the interrupts that we always want on. */
 	dev_priv->irq_mask = ~(I915_ASLE_INTERRUPT |
@@ -3341,7 +2813,11 @@
 	if (IS_G4X(dev))
 		enable_mask |= I915_BSD_USER_INTERRUPT;
 
+	/* Interrupt setup is already guaranteed to be single-threaded, this is
+	 * just to make the assert_spin_locked check happy. */
+	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
 	i915_enable_pipestat(dev_priv, 0, PIPE_GMBUS_EVENT_ENABLE);
+	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 
 	/*
 	 * Enable some error detection, note the instruction error mask
@@ -3616,15 +3092,6 @@
 		dev->driver->enable_vblank = valleyview_enable_vblank;
 		dev->driver->disable_vblank = valleyview_disable_vblank;
 		dev_priv->display.hpd_irq_setup = i915_hpd_irq_setup;
-	} else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) {
-		/* Share uninstall handlers with ILK/SNB */
-		dev->driver->irq_handler = ivybridge_irq_handler;
-		dev->driver->irq_preinstall = ivybridge_irq_preinstall;
-		dev->driver->irq_postinstall = ivybridge_irq_postinstall;
-		dev->driver->irq_uninstall = ironlake_irq_uninstall;
-		dev->driver->enable_vblank = ivybridge_enable_vblank;
-		dev->driver->disable_vblank = ivybridge_disable_vblank;
-		dev_priv->display.hpd_irq_setup = ibx_hpd_irq_setup;
 	} else if (HAS_PCH_SPLIT(dev)) {
 		dev->driver->irq_handler = ironlake_irq_handler;
 		dev->driver->irq_preinstall = ironlake_irq_preinstall;
@@ -3683,3 +3150,67 @@
 		dev_priv->display.hpd_irq_setup(dev);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 }
+
+/* Disable interrupts so we can allow Package C8+. */
+void hsw_pc8_disable_interrupts(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+
+	dev_priv->pc8.regsave.deimr = I915_READ(DEIMR);
+	dev_priv->pc8.regsave.sdeimr = I915_READ(SDEIMR);
+	dev_priv->pc8.regsave.gtimr = I915_READ(GTIMR);
+	dev_priv->pc8.regsave.gtier = I915_READ(GTIER);
+	dev_priv->pc8.regsave.gen6_pmimr = I915_READ(GEN6_PMIMR);
+
+	ironlake_disable_display_irq(dev_priv, ~DE_PCH_EVENT_IVB);
+	ibx_disable_display_interrupt(dev_priv, ~SDE_HOTPLUG_MASK_CPT);
+	ilk_disable_gt_irq(dev_priv, 0xffffffff);
+	snb_disable_pm_irq(dev_priv, 0xffffffff);
+
+	dev_priv->pc8.irqs_disabled = true;
+
+	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+}
+
+/* Restore interrupts so we can recover from Package C8+. */
+void hsw_pc8_restore_interrupts(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	unsigned long irqflags;
+	uint32_t val, expected;
+
+	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+
+	val = I915_READ(DEIMR);
+	expected = ~DE_PCH_EVENT_IVB;
+	WARN(val != expected, "DEIMR is 0x%08x, not 0x%08x\n", val, expected);
+
+	val = I915_READ(SDEIMR) & ~SDE_HOTPLUG_MASK_CPT;
+	expected = ~SDE_HOTPLUG_MASK_CPT;
+	WARN(val != expected, "SDEIMR non-HPD bits are 0x%08x, not 0x%08x\n",
+	     val, expected);
+
+	val = I915_READ(GTIMR);
+	expected = 0xffffffff;
+	WARN(val != expected, "GTIMR is 0x%08x, not 0x%08x\n", val, expected);
+
+	val = I915_READ(GEN6_PMIMR);
+	expected = 0xffffffff;
+	WARN(val != expected, "GEN6_PMIMR is 0x%08x, not 0x%08x\n", val,
+	     expected);
+
+	dev_priv->pc8.irqs_disabled = false;
+
+	ironlake_enable_display_irq(dev_priv, ~dev_priv->pc8.regsave.deimr);
+	ibx_enable_display_interrupt(dev_priv,
+				     ~dev_priv->pc8.regsave.sdeimr &
+				     ~SDE_HOTPLUG_MASK_CPT);
+	ilk_enable_gt_irq(dev_priv, ~dev_priv->pc8.regsave.gtimr);
+	snb_enable_pm_irq(dev_priv, ~dev_priv->pc8.regsave.gen6_pmimr);
+	I915_WRITE(GTIER, dev_priv->pc8.regsave.gtier);
+
+	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+}
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f2326fc..b6a58f7 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -61,6 +61,12 @@
 #define   GC_LOW_FREQUENCY_ENABLE	(1 << 7)
 #define   GC_DISPLAY_CLOCK_190_200_MHZ	(0 << 4)
 #define   GC_DISPLAY_CLOCK_333_MHZ	(4 << 4)
+#define   GC_DISPLAY_CLOCK_267_MHZ_PNV	(0 << 4)
+#define   GC_DISPLAY_CLOCK_333_MHZ_PNV	(1 << 4)
+#define   GC_DISPLAY_CLOCK_444_MHZ_PNV	(2 << 4)
+#define   GC_DISPLAY_CLOCK_200_MHZ_PNV	(5 << 4)
+#define   GC_DISPLAY_CLOCK_133_MHZ_PNV	(6 << 4)
+#define   GC_DISPLAY_CLOCK_167_MHZ_PNV	(7 << 4)
 #define   GC_DISPLAY_CLOCK_MASK		(7 << 4)
 #define   GM45_GC_RENDER_CLOCK_MASK	(0xf << 0)
 #define   GM45_GC_RENDER_CLOCK_266_MHZ	(8 << 0)
@@ -363,6 +369,7 @@
 #define PUNIT_REG_GPU_LFM			0xd3
 #define PUNIT_REG_GPU_FREQ_REQ			0xd4
 #define PUNIT_REG_GPU_FREQ_STS			0xd8
+#define   GENFREQSTATUS				(1<<0)
 #define PUNIT_REG_MEDIA_TURBO_FREQ_REQ		0xdc
 
 #define PUNIT_FUSE_BUS2				0xf6 /* bits 47:40 */
@@ -680,6 +687,7 @@
 #define   ERR_INT_FIFO_UNDERRUN_C	(1<<6)
 #define   ERR_INT_FIFO_UNDERRUN_B	(1<<3)
 #define   ERR_INT_FIFO_UNDERRUN_A	(1<<0)
+#define   ERR_INT_FIFO_UNDERRUN(pipe)	(1<<(pipe*3))
 
 #define FPGA_DBG		0x42300
 #define   FPGA_DBG_RM_NOCLAIM	(1<<31)
@@ -752,6 +760,8 @@
 					will not assert AGPBUSY# and will only
 					be delivered when out of C3. */
 #define   INSTPM_FORCE_ORDERING				(1<<7) /* GEN6+ */
+#define   INSTPM_TLB_INVALIDATE	(1<<9)
+#define   INSTPM_SYNC_FLUSH	(1<<5)
 #define ACTHD	        0x020c8
 #define FW_BLC		0x020d8
 #define FW_BLC2		0x020dc
@@ -1125,7 +1135,8 @@
 #define _DPLL_B	(dev_priv->info->display_mmio_offset + 0x6018)
 #define DPLL(pipe) _PIPE(pipe, _DPLL_A, _DPLL_B)
 #define   DPLL_VCO_ENABLE		(1 << 31)
-#define   DPLL_DVO_HIGH_SPEED		(1 << 30)
+#define   DPLL_SDVO_HIGH_SPEED		(1 << 30)
+#define   DPLL_DVO_2X_MODE		(1 << 30)
 #define   DPLL_EXT_BUFFER_ENABLE_VLV	(1 << 30)
 #define   DPLL_SYNCLOCK_ENABLE		(1 << 29)
 #define   DPLL_REFA_CLK_ENABLE_VLV	(1 << 29)
@@ -1438,6 +1449,8 @@
 #define   MCH_SSKPD_WM0_MASK		0x3f
 #define   MCH_SSKPD_WM0_VAL		0xc
 
+#define MCH_SECP_NRG_STTS		(MCHBAR_MIRROR_BASE_SNB + 0x592c)
+
 /* Clocking configuration register */
 #define CLKCFG			0x10c00
 #define CLKCFG_FSB_400					(5 << 0)	/* hrawclk 100 */
@@ -1694,15 +1707,26 @@
  */
 #define CCID			0x2180
 #define   CCID_EN		(1<<0)
+/*
+ * Notes on SNB/IVB/VLV context size:
+ * - Power context is saved elsewhere (LLC or stolen)
+ * - Ring/execlist context is saved on SNB, not on IVB
+ * - Extended context size already includes render context size
+ * - We always need to follow the extended context size.
+ *   SNB BSpec has comments indicating that we should use the
+ *   render context size instead if execlists are disabled, but
+ *   based on empirical testing that's just nonsense.
+ * - Pipelined/VF state is saved on SNB/IVB respectively
+ * - GT1 size just indicates how much of render context
+ *   doesn't need saving on GT1
+ */
 #define CXT_SIZE		0x21a0
 #define GEN6_CXT_POWER_SIZE(cxt_reg)	((cxt_reg >> 24) & 0x3f)
 #define GEN6_CXT_RING_SIZE(cxt_reg)	((cxt_reg >> 18) & 0x3f)
 #define GEN6_CXT_RENDER_SIZE(cxt_reg)	((cxt_reg >> 12) & 0x3f)
 #define GEN6_CXT_EXTENDED_SIZE(cxt_reg)	((cxt_reg >> 6) & 0x3f)
 #define GEN6_CXT_PIPELINE_SIZE(cxt_reg)	((cxt_reg >> 0) & 0x3f)
-#define GEN6_CXT_TOTAL_SIZE(cxt_reg)	(GEN6_CXT_POWER_SIZE(cxt_reg) + \
-					GEN6_CXT_RING_SIZE(cxt_reg) + \
-					GEN6_CXT_RENDER_SIZE(cxt_reg) + \
+#define GEN6_CXT_TOTAL_SIZE(cxt_reg)	(GEN6_CXT_RING_SIZE(cxt_reg) + \
 					GEN6_CXT_EXTENDED_SIZE(cxt_reg) + \
 					GEN6_CXT_PIPELINE_SIZE(cxt_reg))
 #define GEN7_CXT_SIZE		0x21a8
@@ -1712,11 +1736,7 @@
 #define GEN7_CXT_EXTENDED_SIZE(ctx_reg)	((ctx_reg >> 9) & 0x7f)
 #define GEN7_CXT_GT1_SIZE(ctx_reg)	((ctx_reg >> 6) & 0x7)
 #define GEN7_CXT_VFSTATE_SIZE(ctx_reg)	((ctx_reg >> 0) & 0x3f)
-#define GEN7_CXT_TOTAL_SIZE(ctx_reg)	(GEN7_CXT_POWER_SIZE(ctx_reg) + \
-					 GEN7_CXT_RING_SIZE(ctx_reg) + \
-					 GEN7_CXT_RENDER_SIZE(ctx_reg) + \
-					 GEN7_CXT_EXTENDED_SIZE(ctx_reg) + \
-					 GEN7_CXT_GT1_SIZE(ctx_reg) + \
+#define GEN7_CXT_TOTAL_SIZE(ctx_reg)	(GEN7_CXT_EXTENDED_SIZE(ctx_reg) + \
 					 GEN7_CXT_VFSTATE_SIZE(ctx_reg))
 /* Haswell does have the CXT_SIZE register however it does not appear to be
  * valid. Now, docs explain in dwords what is in the context object. The full
@@ -1776,6 +1796,71 @@
 #define BCLRPAT(pipe) _PIPE(pipe, _BCLRPAT_A, _BCLRPAT_B)
 #define VSYNCSHIFT(trans) _TRANSCODER(trans, _VSYNCSHIFT_A, _VSYNCSHIFT_B)
 
+/* HSW eDP PSR registers */
+#define EDP_PSR_CTL				0x64800
+#define   EDP_PSR_ENABLE			(1<<31)
+#define   EDP_PSR_LINK_DISABLE			(0<<27)
+#define   EDP_PSR_LINK_STANDBY			(1<<27)
+#define   EDP_PSR_MIN_LINK_ENTRY_TIME_MASK	(3<<25)
+#define   EDP_PSR_MIN_LINK_ENTRY_TIME_8_LINES	(0<<25)
+#define   EDP_PSR_MIN_LINK_ENTRY_TIME_4_LINES	(1<<25)
+#define   EDP_PSR_MIN_LINK_ENTRY_TIME_2_LINES	(2<<25)
+#define   EDP_PSR_MIN_LINK_ENTRY_TIME_0_LINES	(3<<25)
+#define   EDP_PSR_MAX_SLEEP_TIME_SHIFT		20
+#define   EDP_PSR_SKIP_AUX_EXIT			(1<<12)
+#define   EDP_PSR_TP1_TP2_SEL			(0<<11)
+#define   EDP_PSR_TP1_TP3_SEL			(1<<11)
+#define   EDP_PSR_TP2_TP3_TIME_500us		(0<<8)
+#define   EDP_PSR_TP2_TP3_TIME_100us		(1<<8)
+#define   EDP_PSR_TP2_TP3_TIME_2500us		(2<<8)
+#define   EDP_PSR_TP2_TP3_TIME_0us		(3<<8)
+#define   EDP_PSR_TP1_TIME_500us		(0<<4)
+#define   EDP_PSR_TP1_TIME_100us		(1<<4)
+#define   EDP_PSR_TP1_TIME_2500us		(2<<4)
+#define   EDP_PSR_TP1_TIME_0us			(3<<4)
+#define   EDP_PSR_IDLE_FRAME_SHIFT		0
+
+#define EDP_PSR_AUX_CTL			0x64810
+#define EDP_PSR_AUX_DATA1		0x64814
+#define   EDP_PSR_DPCD_COMMAND		0x80060000
+#define EDP_PSR_AUX_DATA2		0x64818
+#define   EDP_PSR_DPCD_NORMAL_OPERATION	(1<<24)
+#define EDP_PSR_AUX_DATA3		0x6481c
+#define EDP_PSR_AUX_DATA4		0x64820
+#define EDP_PSR_AUX_DATA5		0x64824
+
+#define EDP_PSR_STATUS_CTL			0x64840
+#define   EDP_PSR_STATUS_STATE_MASK		(7<<29)
+#define   EDP_PSR_STATUS_STATE_IDLE		(0<<29)
+#define   EDP_PSR_STATUS_STATE_SRDONACK		(1<<29)
+#define   EDP_PSR_STATUS_STATE_SRDENT		(2<<29)
+#define   EDP_PSR_STATUS_STATE_BUFOFF		(3<<29)
+#define   EDP_PSR_STATUS_STATE_BUFON		(4<<29)
+#define   EDP_PSR_STATUS_STATE_AUXACK		(5<<29)
+#define   EDP_PSR_STATUS_STATE_SRDOFFACK	(6<<29)
+#define   EDP_PSR_STATUS_LINK_MASK		(3<<26)
+#define   EDP_PSR_STATUS_LINK_FULL_OFF		(0<<26)
+#define   EDP_PSR_STATUS_LINK_FULL_ON		(1<<26)
+#define   EDP_PSR_STATUS_LINK_STANDBY		(2<<26)
+#define   EDP_PSR_STATUS_MAX_SLEEP_TIMER_SHIFT	20
+#define   EDP_PSR_STATUS_MAX_SLEEP_TIMER_MASK	0x1f
+#define   EDP_PSR_STATUS_COUNT_SHIFT		16
+#define   EDP_PSR_STATUS_COUNT_MASK		0xf
+#define   EDP_PSR_STATUS_AUX_ERROR		(1<<15)
+#define   EDP_PSR_STATUS_AUX_SENDING		(1<<12)
+#define   EDP_PSR_STATUS_SENDING_IDLE		(1<<9)
+#define   EDP_PSR_STATUS_SENDING_TP2_TP3	(1<<8)
+#define   EDP_PSR_STATUS_SENDING_TP1		(1<<4)
+#define   EDP_PSR_STATUS_IDLE_MASK		0xf
+
+#define EDP_PSR_PERF_CNT		0x64844
+#define   EDP_PSR_PERF_CNT_MASK		0xffffff
+
+#define EDP_PSR_DEBUG_CTL		0x64860
+#define   EDP_PSR_DEBUG_MASK_LPSP	(1<<27)
+#define   EDP_PSR_DEBUG_MASK_MEMUP	(1<<26)
+#define   EDP_PSR_DEBUG_MASK_HPD	(1<<25)
+
 /* VGA port control */
 #define ADPA			0x61100
 #define PCH_ADPA                0xe1100
@@ -1856,10 +1941,16 @@
 #define CRT_HOTPLUG_DETECT_VOLTAGE_475MV	(1 << 2)
 
 #define PORT_HOTPLUG_STAT	(dev_priv->info->display_mmio_offset + 0x61114)
-/* HDMI/DP bits are gen4+ */
-#define   PORTB_HOTPLUG_LIVE_STATUS               (1 << 29)
+/*
+ * HDMI/DP bits are gen4+
+ *
+ * WARNING: Bspec for hpd status bits on gen4 seems to be completely confused.
+ * Please check the detailed lore in the commit message for for experimental
+ * evidence.
+ */
+#define   PORTD_HOTPLUG_LIVE_STATUS               (1 << 29)
 #define   PORTC_HOTPLUG_LIVE_STATUS               (1 << 28)
-#define   PORTD_HOTPLUG_LIVE_STATUS               (1 << 27)
+#define   PORTB_HOTPLUG_LIVE_STATUS               (1 << 27)
 #define   PORTD_HOTPLUG_INT_STATUS		(3 << 21)
 #define   PORTC_HOTPLUG_INT_STATUS		(3 << 19)
 #define   PORTB_HOTPLUG_INT_STATUS		(3 << 17)
@@ -2045,6 +2136,7 @@
  * (Haswell and newer) to see which VIDEO_DIP_DATA byte corresponds to each byte
  * of the infoframe structure specified by CEA-861. */
 #define   VIDEO_DIP_DATA_SIZE	32
+#define   VIDEO_DIP_VSC_DATA_SIZE	36
 #define VIDEO_DIP_CTL		0x61170
 /* Pre HSW: */
 #define   VIDEO_DIP_ENABLE		(1 << 31)
@@ -2192,6 +2284,8 @@
 #define BLC_PWM_CPU_CTL2	0x48250
 #define BLC_PWM_CPU_CTL		0x48254
 
+#define HSW_BLC_PWM2_CTL	0x48350
+
 /* PCH CTL1 is totally different, all but the below bits are reserved. CTL2 is
  * like the normal CTL from gen4 and earlier. Hooray for confusing naming. */
 #define BLC_PWM_PCH_CTL1	0xc8250
@@ -2200,6 +2294,12 @@
 #define   BLM_PCH_POLARITY			(1 << 29)
 #define BLC_PWM_PCH_CTL2	0xc8254
 
+#define UTIL_PIN_CTL		0x48400
+#define   UTIL_PIN_ENABLE	(1 << 31)
+
+#define PCH_GTC_CTL		0xe7000
+#define   PCH_GTC_ENABLE	(1 << 31)
+
 /* TV port control */
 #define TV_CTL			0x68000
 /** Enables the TV encoder */
@@ -3113,9 +3213,6 @@
 #define  MLTR_WM2_SHIFT		8
 /* the unit of memory self-refresh latency time is 0.5us */
 #define  ILK_SRLT_MASK		0x3f
-#define ILK_LATENCY(shift)	(I915_READ(MLTR_ILK) >> (shift) & ILK_SRLT_MASK)
-#define ILK_READ_WM1_LATENCY()	ILK_LATENCY(MLTR_WM1_SHIFT)
-#define ILK_READ_WM2_LATENCY()	ILK_LATENCY(MLTR_WM2_SHIFT)
 
 /* define the fifo size on Ironlake */
 #define ILK_DISPLAY_FIFO	128
@@ -3162,12 +3259,6 @@
 #define SSKPD_WM2_SHIFT		16
 #define SSKPD_WM3_SHIFT		24
 
-#define SNB_LATENCY(shift)	(I915_READ(MCHBAR_MIRROR_BASE_SNB + SSKPD) >> (shift) & SSKPD_WM_MASK)
-#define SNB_READ_WM0_LATENCY()		SNB_LATENCY(SSKPD_WM0_SHIFT)
-#define SNB_READ_WM1_LATENCY()		SNB_LATENCY(SSKPD_WM1_SHIFT)
-#define SNB_READ_WM2_LATENCY()		SNB_LATENCY(SSKPD_WM2_SHIFT)
-#define SNB_READ_WM3_LATENCY()		SNB_LATENCY(SSKPD_WM3_SHIFT)
-
 /*
  * The two pipe frame counter registers are not synchronized, so
  * reading a stable value is somewhat tricky. The following code
@@ -3718,6 +3809,9 @@
 #define DE_PLANEA_FLIP_DONE_IVB		(1<<3)
 #define DE_PIPEA_VBLANK_IVB		(1<<0)
 
+#define DE_PIPE_VBLANK_ILK(pipe)	(1 << ((pipe * 8) + 7))
+#define DE_PIPE_VBLANK_IVB(pipe)	(1 << (pipe * 5))
+
 #define VLV_MASTER_IER			0x4400c /* Gunit master IER */
 #define   MASTER_INTERRUPT_ENABLE	(1<<31)
 
@@ -3880,6 +3974,7 @@
 #define  SERR_INT_TRANS_C_FIFO_UNDERRUN	(1<<6)
 #define  SERR_INT_TRANS_B_FIFO_UNDERRUN	(1<<3)
 #define  SERR_INT_TRANS_A_FIFO_UNDERRUN	(1<<0)
+#define  SERR_INT_TRANS_FIFO_UNDERRUN(pipe)	(1<<(pipe*3))
 
 /* digital port hotplug */
 #define PCH_PORT_HOTPLUG        0xc4030		/* SHOTPLUG_CTL */
@@ -4073,6 +4168,8 @@
 	 _TRANSCODER(trans, HSW_VIDEO_DIP_CTL_A, HSW_VIDEO_DIP_CTL_B)
 #define HSW_TVIDEO_DIP_AVI_DATA(trans) \
 	 _TRANSCODER(trans, HSW_VIDEO_DIP_AVI_DATA_A, HSW_VIDEO_DIP_AVI_DATA_B)
+#define HSW_TVIDEO_DIP_VS_DATA(trans) \
+	 _TRANSCODER(trans, HSW_VIDEO_DIP_VS_DATA_A, HSW_VIDEO_DIP_VS_DATA_B)
 #define HSW_TVIDEO_DIP_SPD_DATA(trans) \
 	 _TRANSCODER(trans, HSW_VIDEO_DIP_SPD_DATA_A, HSW_VIDEO_DIP_SPD_DATA_B)
 #define HSW_TVIDEO_DIP_GCP(trans) \
@@ -4080,6 +4177,13 @@
 #define HSW_TVIDEO_DIP_VSC_DATA(trans) \
 	 _TRANSCODER(trans, HSW_VIDEO_DIP_VSC_DATA_A, HSW_VIDEO_DIP_VSC_DATA_B)
 
+#define HSW_STEREO_3D_CTL_A	0x70020
+#define   S3D_ENABLE		(1<<31)
+#define HSW_STEREO_3D_CTL_B	0x71020
+
+#define HSW_STEREO_3D_CTL(trans) \
+	_TRANSCODER(trans, HSW_STEREO_3D_CTL_A, HSW_STEREO_3D_CTL_A)
+
 #define _PCH_TRANS_HTOTAL_B          0xe1000
 #define _PCH_TRANS_HBLANK_B          0xe1004
 #define _PCH_TRANS_HSYNC_B           0xe1008
@@ -4432,7 +4536,7 @@
 #define EDP_LINK_TRAIN_600MV_0DB_IVB		(0x30 <<22)
 #define EDP_LINK_TRAIN_600MV_3_5DB_IVB		(0x36 <<22)
 #define EDP_LINK_TRAIN_800MV_0DB_IVB		(0x38 <<22)
-#define EDP_LINK_TRAIN_800MV_3_5DB_IVB		(0x33 <<22)
+#define EDP_LINK_TRAIN_800MV_3_5DB_IVB		(0x3e <<22)
 
 /* legacy values */
 #define EDP_LINK_TRAIN_500MV_0DB_IVB		(0x00 <<22)
@@ -4468,6 +4572,10 @@
 #define  GT_FIFO_FREE_ENTRIES			0x120008
 #define    GT_FIFO_NUM_RESERVED_ENTRIES		20
 
+#define  HSW_IDICR				0x9008
+#define    IDIHASHMSK(x)			(((x) & 0x3f) << 16)
+#define  HSW_EDRAM_PRESENT			0x120010
+
 #define GEN6_UCGCTL1				0x9400
 # define GEN6_BLBUNIT_CLOCK_GATE_DISABLE		(1 << 5)
 # define GEN6_CSUNIT_CLOCK_GATE_DISABLE			(1 << 7)
@@ -4736,8 +4844,8 @@
 #define HSW_PWR_WELL_DRIVER			0x45404 /* CTL2 */
 #define HSW_PWR_WELL_KVMR			0x45408 /* CTL3 */
 #define HSW_PWR_WELL_DEBUG			0x4540C /* CTL4 */
-#define   HSW_PWR_WELL_ENABLE			(1<<31)
-#define   HSW_PWR_WELL_STATE			(1<<30)
+#define   HSW_PWR_WELL_ENABLE_REQUEST		(1<<31)
+#define   HSW_PWR_WELL_STATE_ENABLED		(1<<30)
 #define HSW_PWR_WELL_CTL5			0x45410
 #define   HSW_PWR_WELL_ENABLE_SINGLE_STEP	(1<<31)
 #define   HSW_PWR_WELL_PWR_GATE_OVERRIDE	(1<<20)
@@ -4858,7 +4966,8 @@
 #define  SBI_SSCAUXDIV6				0x0610
 #define   SBI_SSCAUXDIV_FINALDIV2SEL(x)		((x)<<4)
 #define  SBI_DBUFF0				0x2a00
-#define   SBI_DBUFF0_ENABLE			(1<<0)
+#define  SBI_GEN0				0x1f00
+#define   SBI_GEN0_CFG_BUFFENABLE_DISABLE	(1<<0)
 
 /* LPT PIXCLK_GATE */
 #define PIXCLK_GATE			0xC6020
@@ -4924,7 +5033,14 @@
 #define  LCPLL_CLK_FREQ_450		(0<<26)
 #define  LCPLL_CD_CLOCK_DISABLE		(1<<25)
 #define  LCPLL_CD2X_CLOCK_DISABLE	(1<<23)
+#define  LCPLL_POWER_DOWN_ALLOW		(1<<22)
 #define  LCPLL_CD_SOURCE_FCLK		(1<<21)
+#define  LCPLL_CD_SOURCE_FCLK_DONE	(1<<19)
+
+#define D_COMP				(MCHBAR_MIRROR_BASE_SNB + 0x5F0C)
+#define  D_COMP_RCOMP_IN_PROGRESS	(1<<9)
+#define  D_COMP_COMP_FORCE		(1<<8)
+#define  D_COMP_COMP_DISABLE		(1<<0)
 
 /* Pipe WM_LINETIME - watermark line time */
 #define PIPE_WM_LINETIME_A		0x45270
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 6875b56..a777e7f 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -409,6 +409,71 @@
 	NULL,
 };
 
+static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
+				struct bin_attribute *attr, char *buf,
+				loff_t off, size_t count)
+{
+
+	struct device *kdev = container_of(kobj, struct device, kobj);
+	struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev);
+	struct drm_device *dev = minor->dev;
+	struct i915_error_state_file_priv error_priv;
+	struct drm_i915_error_state_buf error_str;
+	ssize_t ret_count = 0;
+	int ret;
+
+	memset(&error_priv, 0, sizeof(error_priv));
+
+	ret = i915_error_state_buf_init(&error_str, count, off);
+	if (ret)
+		return ret;
+
+	error_priv.dev = dev;
+	i915_error_state_get(dev, &error_priv);
+
+	ret = i915_error_state_to_str(&error_str, &error_priv);
+	if (ret)
+		goto out;
+
+	ret_count = count < error_str.bytes ? count : error_str.bytes;
+
+	memcpy(buf, error_str.buf, ret_count);
+out:
+	i915_error_state_put(&error_priv);
+	i915_error_state_buf_release(&error_str);
+
+	return ret ?: ret_count;
+}
+
+static ssize_t error_state_write(struct file *file, struct kobject *kobj,
+				 struct bin_attribute *attr, char *buf,
+				 loff_t off, size_t count)
+{
+	struct device *kdev = container_of(kobj, struct device, kobj);
+	struct drm_minor *minor = container_of(kdev, struct drm_minor, kdev);
+	struct drm_device *dev = minor->dev;
+	int ret;
+
+	DRM_DEBUG_DRIVER("Resetting error state\n");
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	i915_destroy_error_state(dev);
+	mutex_unlock(&dev->struct_mutex);
+
+	return count;
+}
+
+static struct bin_attribute error_state_attr = {
+	.attr.name = "error",
+	.attr.mode = S_IRUSR | S_IWUSR,
+	.size = 0,
+	.read = error_state_read,
+	.write = error_state_write,
+};
+
 void i915_setup_sysfs(struct drm_device *dev)
 {
 	int ret;
@@ -432,10 +497,16 @@
 		if (ret)
 			DRM_ERROR("gen6 sysfs setup failed\n");
 	}
+
+	ret = sysfs_create_bin_file(&dev->primary->kdev.kobj,
+				    &error_state_attr);
+	if (ret)
+		DRM_ERROR("error_state sysfs setup failed\n");
 }
 
 void i915_teardown_sysfs(struct drm_device *dev)
 {
+	sysfs_remove_bin_file(&dev->primary->kdev.kobj, &error_state_attr);
 	sysfs_remove_files(&dev->primary->kdev.kobj, gen6_attrs);
 	device_remove_bin_file(&dev->primary->kdev,  &dpf_attrs);
 #ifdef CONFIG_PM
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 3db4a68..e2c5ee6 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -33,47 +33,52 @@
 	    TP_printk("obj=%p, size=%u", __entry->obj, __entry->size)
 );
 
-TRACE_EVENT(i915_gem_object_bind,
-	    TP_PROTO(struct drm_i915_gem_object *obj, bool mappable),
-	    TP_ARGS(obj, mappable),
+TRACE_EVENT(i915_vma_bind,
+	    TP_PROTO(struct i915_vma *vma, bool mappable),
+	    TP_ARGS(vma, mappable),
 
 	    TP_STRUCT__entry(
 			     __field(struct drm_i915_gem_object *, obj)
+			     __field(struct i915_address_space *, vm)
 			     __field(u32, offset)
 			     __field(u32, size)
 			     __field(bool, mappable)
 			     ),
 
 	    TP_fast_assign(
-			   __entry->obj = obj;
-			   __entry->offset = obj->gtt_space->start;
-			   __entry->size = obj->gtt_space->size;
+			   __entry->obj = vma->obj;
+			   __entry->vm = vma->vm;
+			   __entry->offset = vma->node.start;
+			   __entry->size = vma->node.size;
 			   __entry->mappable = mappable;
 			   ),
 
-	    TP_printk("obj=%p, offset=%08x size=%x%s",
+	    TP_printk("obj=%p, offset=%08x size=%x%s vm=%p",
 		      __entry->obj, __entry->offset, __entry->size,
-		      __entry->mappable ? ", mappable" : "")
+		      __entry->mappable ? ", mappable" : "",
+		      __entry->vm)
 );
 
-TRACE_EVENT(i915_gem_object_unbind,
-	    TP_PROTO(struct drm_i915_gem_object *obj),
-	    TP_ARGS(obj),
+TRACE_EVENT(i915_vma_unbind,
+	    TP_PROTO(struct i915_vma *vma),
+	    TP_ARGS(vma),
 
 	    TP_STRUCT__entry(
 			     __field(struct drm_i915_gem_object *, obj)
+			     __field(struct i915_address_space *, vm)
 			     __field(u32, offset)
 			     __field(u32, size)
 			     ),
 
 	    TP_fast_assign(
-			   __entry->obj = obj;
-			   __entry->offset = obj->gtt_space->start;
-			   __entry->size = obj->gtt_space->size;
+			   __entry->obj = vma->obj;
+			   __entry->vm = vma->vm;
+			   __entry->offset = vma->node.start;
+			   __entry->size = vma->node.size;
 			   ),
 
-	    TP_printk("obj=%p, offset=%08x size=%x",
-		      __entry->obj, __entry->offset, __entry->size)
+	    TP_printk("obj=%p, offset=%08x size=%x vm=%p",
+		      __entry->obj, __entry->offset, __entry->size, __entry->vm)
 );
 
 TRACE_EVENT(i915_gem_object_change_domain,
@@ -406,10 +411,12 @@
 	    TP_printk("plane=%d, obj=%p", __entry->plane, __entry->obj)
 );
 
-TRACE_EVENT(i915_reg_rw,
-	TP_PROTO(bool write, u32 reg, u64 val, int len),
+TRACE_EVENT_CONDITION(i915_reg_rw,
+	TP_PROTO(bool write, u32 reg, u64 val, int len, bool trace),
 
-	TP_ARGS(write, reg, val, len),
+	TP_ARGS(write, reg, val, len, trace),
+
+	TP_CONDITION(trace),
 
 	TP_STRUCT__entry(
 		__field(u64, val)
diff --git a/drivers/gpu/drm/i915/intel_acpi.c b/drivers/gpu/drm/i915/intel_acpi.c
index bcbbaea..57fe1ae 100644
--- a/drivers/gpu/drm/i915/intel_acpi.c
+++ b/drivers/gpu/drm/i915/intel_acpi.c
@@ -28,7 +28,7 @@
 	0x0f, 0x13, 0x17, 0xb0, 0x1c, 0x2c
 };
 
-static int intel_dsm(acpi_handle handle, int func, int arg)
+static int intel_dsm(acpi_handle handle, int func)
 {
 	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
 	struct acpi_object_list input;
@@ -46,8 +46,9 @@
 	params[1].integer.value = INTEL_DSM_REVISION_ID;
 	params[2].type = ACPI_TYPE_INTEGER;
 	params[2].integer.value = func;
-	params[3].type = ACPI_TYPE_INTEGER;
-	params[3].integer.value = arg;
+	params[3].type = ACPI_TYPE_PACKAGE;
+	params[3].package.count = 0;
+	params[3].package.elements = NULL;
 
 	ret = acpi_evaluate_object(handle, "_DSM", &input, &output);
 	if (ret) {
@@ -151,8 +152,9 @@
 	params[1].integer.value = INTEL_DSM_REVISION_ID;
 	params[2].type = ACPI_TYPE_INTEGER;
 	params[2].integer.value = INTEL_DSM_FN_PLATFORM_MUX_INFO;
-	params[3].type = ACPI_TYPE_INTEGER;
-	params[3].integer.value = 0;
+	params[3].type = ACPI_TYPE_PACKAGE;
+	params[3].package.count = 0;
+	params[3].package.elements = NULL;
 
 	ret = acpi_evaluate_object(intel_dsm_priv.dhandle, "_DSM", &input,
 				   &output);
@@ -205,7 +207,7 @@
 		return false;
 	}
 
-	ret = intel_dsm(dhandle, INTEL_DSM_FN_SUPPORTED_FUNCTIONS, 0);
+	ret = intel_dsm(dhandle, INTEL_DSM_FN_SUPPORTED_FUNCTIONS);
 	if (ret < 0) {
 		DRM_DEBUG_KMS("failed to get supported _DSM functions\n");
 		return false;
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 3acec8c..b5a3875 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -52,17 +52,16 @@
 	u32 adpa_reg;
 };
 
-static struct intel_crt *intel_attached_crt(struct drm_connector *connector)
-{
-	return container_of(intel_attached_encoder(connector),
-			    struct intel_crt, base);
-}
-
 static struct intel_crt *intel_encoder_to_crt(struct intel_encoder *encoder)
 {
 	return container_of(encoder, struct intel_crt, base);
 }
 
+static struct intel_crt *intel_attached_crt(struct drm_connector *connector)
+{
+	return intel_encoder_to_crt(intel_attached_encoder(connector));
+}
+
 static bool intel_crt_get_hw_state(struct intel_encoder *encoder,
 				   enum pipe *pipe)
 {
@@ -238,17 +237,14 @@
 	return true;
 }
 
-static void intel_crt_mode_set(struct drm_encoder *encoder,
-			       struct drm_display_mode *mode,
-			       struct drm_display_mode *adjusted_mode)
+static void intel_crt_mode_set(struct intel_encoder *encoder)
 {
 
-	struct drm_device *dev = encoder->dev;
-	struct drm_crtc *crtc = encoder->crtc;
-	struct intel_crt *crt =
-		intel_encoder_to_crt(to_intel_encoder(encoder));
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct drm_device *dev = encoder->base.dev;
+	struct intel_crt *crt = intel_encoder_to_crt(encoder);
+	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_display_mode *adjusted_mode = &crtc->config.adjusted_mode;
 	u32 adpa;
 
 	if (HAS_PCH_SPLIT(dev))
@@ -265,14 +261,14 @@
 	if (HAS_PCH_LPT(dev))
 		; /* Those bits don't exist here */
 	else if (HAS_PCH_CPT(dev))
-		adpa |= PORT_TRANS_SEL_CPT(intel_crtc->pipe);
-	else if (intel_crtc->pipe == 0)
+		adpa |= PORT_TRANS_SEL_CPT(crtc->pipe);
+	else if (crtc->pipe == 0)
 		adpa |= ADPA_PIPE_A_SELECT;
 	else
 		adpa |= ADPA_PIPE_B_SELECT;
 
 	if (!HAS_PCH_SPLIT(dev))
-		I915_WRITE(BCLRPAT(intel_crtc->pipe), 0);
+		I915_WRITE(BCLRPAT(crtc->pipe), 0);
 
 	I915_WRITE(crt->adpa_reg, adpa);
 }
@@ -613,6 +609,10 @@
 	enum drm_connector_status status;
 	struct intel_load_detect_pipe tmp;
 
+	DRM_DEBUG_KMS("[CONNECTOR:%d:%s] force=%d\n",
+		      connector->base.id, drm_get_connector_name(connector),
+		      force);
+
 	if (I915_HAS_HOTPLUG(dev)) {
 		/* We can not rely on the HPD pin always being correctly wired
 		 * up, for example many KVM do not pass it through, and so
@@ -707,10 +707,6 @@
  * Routines for controlling stuff on the analog port
  */
 
-static const struct drm_encoder_helper_funcs crt_encoder_funcs = {
-	.mode_set = intel_crt_mode_set,
-};
-
 static const struct drm_connector_funcs intel_crt_connector_funcs = {
 	.reset = intel_crt_reset,
 	.dpms = intel_crt_dpms,
@@ -800,6 +796,7 @@
 		crt->adpa_reg = ADPA;
 
 	crt->base.compute_config = intel_crt_compute_config;
+	crt->base.mode_set = intel_crt_mode_set;
 	crt->base.disable = intel_disable_crt;
 	crt->base.enable = intel_enable_crt;
 	crt->base.get_config = intel_crt_get_config;
@@ -811,7 +808,6 @@
 		crt->base.get_hw_state = intel_crt_get_hw_state;
 	intel_connector->get_hw_state = intel_connector_get_hw_state;
 
-	drm_encoder_helper_add(&crt->base.base, &crt_encoder_funcs);
 	drm_connector_helper_add(connector, &intel_crt_connector_helper_funcs);
 
 	drm_sysfs_connector_add(connector);
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 324211a..63aca49 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -84,25 +84,17 @@
  * in either FDI or DP modes only, as HDMI connections will work with both
  * of those
  */
-static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port,
-				      bool use_fdi_mode)
+static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 reg;
 	int i;
-	const u32 *ddi_translations = ((use_fdi_mode) ?
+	const u32 *ddi_translations = (port == PORT_E) ?
 		hsw_ddi_translations_fdi :
-		hsw_ddi_translations_dp);
+		hsw_ddi_translations_dp;
 
-	DRM_DEBUG_DRIVER("Initializing DDI buffers for port %c in %s mode\n",
-			port_name(port),
-			use_fdi_mode ? "FDI" : "DP");
-
-	WARN((use_fdi_mode && (port != PORT_E)),
-		"Programming port %c in FDI mode, this probably will not work.\n",
-		port_name(port));
-
-	for (i=0, reg=DDI_BUF_TRANS(port); i < ARRAY_SIZE(hsw_ddi_translations_fdi); i++) {
+	for (i = 0, reg = DDI_BUF_TRANS(port);
+	     i < ARRAY_SIZE(hsw_ddi_translations_fdi); i++) {
 		I915_WRITE(reg, ddi_translations[i]);
 		reg += 4;
 	}
@@ -118,14 +110,8 @@
 	if (!HAS_DDI(dev))
 		return;
 
-	for (port = PORT_A; port < PORT_E; port++)
-		intel_prepare_ddi_buffers(dev, port, false);
-
-	/* DDI E is the suggested one to work in FDI mode, so program is as such
-	 * by default. It will have to be re-programmed in case a digital DP
-	 * output will be detected on it
-	 */
-	intel_prepare_ddi_buffers(dev, PORT_E, true);
+	for (port = PORT_A; port <= PORT_E; port++)
+		intel_prepare_ddi_buffers(dev, port);
 }
 
 static const long hsw_ddi_buf_ctl_values[] = {
@@ -281,43 +267,40 @@
 	DRM_ERROR("FDI link training failed!\n");
 }
 
-static void intel_ddi_mode_set(struct drm_encoder *encoder,
-			       struct drm_display_mode *mode,
-			       struct drm_display_mode *adjusted_mode)
+static void intel_ddi_mode_set(struct intel_encoder *encoder)
 {
-	struct drm_crtc *crtc = encoder->crtc;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct intel_encoder *intel_encoder = to_intel_encoder(encoder);
-	int port = intel_ddi_get_encoder_port(intel_encoder);
-	int pipe = intel_crtc->pipe;
-	int type = intel_encoder->type;
+	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
+	int port = intel_ddi_get_encoder_port(encoder);
+	int pipe = crtc->pipe;
+	int type = encoder->type;
+	struct drm_display_mode *adjusted_mode = &crtc->config.adjusted_mode;
 
 	DRM_DEBUG_KMS("Preparing DDI mode on port %c, pipe %c\n",
 		      port_name(port), pipe_name(pipe));
 
-	intel_crtc->eld_vld = false;
+	crtc->eld_vld = false;
 	if (type == INTEL_OUTPUT_DISPLAYPORT || type == INTEL_OUTPUT_EDP) {
-		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+		struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
 		struct intel_digital_port *intel_dig_port =
-			enc_to_dig_port(encoder);
+			enc_to_dig_port(&encoder->base);
 
-		intel_dp->DP = intel_dig_port->port_reversal |
+		intel_dp->DP = intel_dig_port->saved_port_bits |
 			       DDI_BUF_CTL_ENABLE | DDI_BUF_EMP_400MV_0DB_HSW;
 		intel_dp->DP |= DDI_PORT_WIDTH(intel_dp->lane_count);
 
 		if (intel_dp->has_audio) {
 			DRM_DEBUG_DRIVER("DP audio on pipe %c on DDI\n",
-					 pipe_name(intel_crtc->pipe));
+					 pipe_name(crtc->pipe));
 
 			/* write eld */
 			DRM_DEBUG_DRIVER("DP audio: write eld information\n");
-			intel_write_eld(encoder, adjusted_mode);
+			intel_write_eld(&encoder->base, adjusted_mode);
 		}
 
 		intel_dp_init_link_config(intel_dp);
 
 	} else if (type == INTEL_OUTPUT_HDMI) {
-		struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
+		struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
 
 		if (intel_hdmi->has_audio) {
 			/* Proper support for digital audio needs a new logic
@@ -325,14 +308,14 @@
 			 * patch bombing.
 			 */
 			DRM_DEBUG_DRIVER("HDMI audio on pipe %c on DDI\n",
-					 pipe_name(intel_crtc->pipe));
+					 pipe_name(crtc->pipe));
 
 			/* write eld */
 			DRM_DEBUG_DRIVER("HDMI audio: write eld information\n");
-			intel_write_eld(encoder, adjusted_mode);
+			intel_write_eld(&encoder->base, adjusted_mode);
 		}
 
-		intel_hdmi->set_infoframes(encoder, adjusted_mode);
+		intel_hdmi->set_infoframes(&encoder->base, adjusted_mode);
 	}
 }
 
@@ -1109,7 +1092,8 @@
 		 * enabling the port.
 		 */
 		I915_WRITE(DDI_BUF_CTL(port),
-			   intel_dig_port->port_reversal | DDI_BUF_CTL_ENABLE);
+			   intel_dig_port->saved_port_bits |
+			   DDI_BUF_CTL_ENABLE);
 	} else if (type == INTEL_OUTPUT_EDP) {
 		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
@@ -1117,6 +1101,7 @@
 			intel_dp_stop_link_train(intel_dp);
 
 		ironlake_edp_backlight_on(intel_dp);
+		intel_edp_psr_enable(intel_dp);
 	}
 
 	if (intel_crtc->eld_vld && type != INTEL_OUTPUT_EDP) {
@@ -1147,16 +1132,20 @@
 	if (type == INTEL_OUTPUT_EDP) {
 		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
+		intel_edp_psr_disable(intel_dp);
 		ironlake_edp_backlight_off(intel_dp);
 	}
 }
 
 int intel_ddi_get_cdclk_freq(struct drm_i915_private *dev_priv)
 {
-	if (I915_READ(HSW_FUSE_STRAP) & HSW_CDCLK_LIMIT)
+	uint32_t lcpll = I915_READ(LCPLL_CTL);
+
+	if (lcpll & LCPLL_CD_SOURCE_FCLK)
+		return 800000;
+	else if (I915_READ(HSW_FUSE_STRAP) & HSW_CDCLK_LIMIT)
 		return 450000;
-	else if ((I915_READ(LCPLL_CTL) & LCPLL_CLK_FREQ_MASK) ==
-		 LCPLL_CLK_FREQ_450)
+	else if ((lcpll & LCPLL_CLK_FREQ_MASK) == LCPLL_CLK_FREQ_450)
 		return 450000;
 	else if (IS_ULT(dev_priv->dev))
 		return 337500;
@@ -1308,10 +1297,6 @@
 	.destroy = intel_ddi_destroy,
 };
 
-static const struct drm_encoder_helper_funcs intel_ddi_helper_funcs = {
-	.mode_set = intel_ddi_mode_set,
-};
-
 void intel_ddi_init(struct drm_device *dev, enum port port)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1336,9 +1321,9 @@
 
 	drm_encoder_init(dev, encoder, &intel_ddi_funcs,
 			 DRM_MODE_ENCODER_TMDS);
-	drm_encoder_helper_add(encoder, &intel_ddi_helper_funcs);
 
 	intel_encoder->compute_config = intel_ddi_compute_config;
+	intel_encoder->mode_set = intel_ddi_mode_set;
 	intel_encoder->enable = intel_enable_ddi;
 	intel_encoder->pre_enable = intel_ddi_pre_enable;
 	intel_encoder->disable = intel_disable_ddi;
@@ -1347,8 +1332,9 @@
 	intel_encoder->get_config = intel_ddi_get_config;
 
 	intel_dig_port->port = port;
-	intel_dig_port->port_reversal = I915_READ(DDI_BUF_CTL(port)) &
-					DDI_BUF_PORT_REVERSAL;
+	intel_dig_port->saved_port_bits = I915_READ(DDI_BUF_CTL(port)) &
+					  (DDI_BUF_PORT_REVERSAL |
+					   DDI_A_4_LANES);
 	intel_dig_port->dp.output_reg = DDI_BUF_CTL(port);
 
 	intel_encoder->type = INTEL_OUTPUT_UNKNOWN;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 85f3eb7..38452d8 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -45,6 +45,15 @@
 static void intel_increase_pllclock(struct drm_crtc *crtc);
 static void intel_crtc_update_cursor(struct drm_crtc *crtc, bool on);
 
+static void i9xx_crtc_clock_get(struct intel_crtc *crtc,
+				struct intel_crtc_config *pipe_config);
+static void ironlake_crtc_clock_get(struct intel_crtc *crtc,
+				    struct intel_crtc_config *pipe_config);
+
+static int intel_set_mode(struct drm_crtc *crtc, struct drm_display_mode *mode,
+			  int x, int y, struct drm_framebuffer *old_fb);
+
+
 typedef struct {
 	int	min, max;
 } intel_range_t;
@@ -54,7 +63,6 @@
 	int	p2_slow, p2_fast;
 } intel_p2_t;
 
-#define INTEL_P2_NUM		      2
 typedef struct intel_limit intel_limit_t;
 struct intel_limit {
 	intel_range_t   dot, vco, n, m, m1, m2, p, p1;
@@ -84,7 +92,7 @@
 		return 27;
 }
 
-static const intel_limit_t intel_limits_i8xx_dvo = {
+static const intel_limit_t intel_limits_i8xx_dac = {
 	.dot = { .min = 25000, .max = 350000 },
 	.vco = { .min = 930000, .max = 1400000 },
 	.n = { .min = 3, .max = 16 },
@@ -97,6 +105,19 @@
 		.p2_slow = 4, .p2_fast = 2 },
 };
 
+static const intel_limit_t intel_limits_i8xx_dvo = {
+	.dot = { .min = 25000, .max = 350000 },
+	.vco = { .min = 930000, .max = 1400000 },
+	.n = { .min = 3, .max = 16 },
+	.m = { .min = 96, .max = 140 },
+	.m1 = { .min = 18, .max = 26 },
+	.m2 = { .min = 6, .max = 16 },
+	.p = { .min = 4, .max = 128 },
+	.p1 = { .min = 2, .max = 33 },
+	.p2 = { .dot_limit = 165000,
+		.p2_slow = 4, .p2_fast = 4 },
+};
+
 static const intel_limit_t intel_limits_i8xx_lvds = {
 	.dot = { .min = 25000, .max = 350000 },
 	.vco = { .min = 930000, .max = 1400000 },
@@ -405,8 +426,10 @@
 	} else {
 		if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS))
 			limit = &intel_limits_i8xx_lvds;
-		else
+		else if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DVO))
 			limit = &intel_limits_i8xx_dvo;
+		else
+			limit = &intel_limits_i8xx_dac;
 	}
 	return limit;
 }
@@ -667,7 +690,7 @@
 {
 	u32 p1, p2, m1, m2, vco, bestn, bestm1, bestm2, bestp1, bestp2;
 	u32 m, n, fastclk;
-	u32 updrate, minupdate, fracbits, p;
+	u32 updrate, minupdate, p;
 	unsigned long bestppm, ppm, absppm;
 	int dotclk, flag;
 
@@ -678,7 +701,6 @@
 	fastclk = dotclk / (2*100);
 	updrate = 0;
 	minupdate = 19200;
-	fracbits = 1;
 	n = p = p1 = p2 = m = m1 = m2 = vco = bestn = 0;
 	bestm1 = bestm2 = bestp1 = bestp2 = 0;
 
@@ -892,8 +914,8 @@
 }
 
 /* Only for pre-ILK configs */
-static void assert_pll(struct drm_i915_private *dev_priv,
-		       enum pipe pipe, bool state)
+void assert_pll(struct drm_i915_private *dev_priv,
+		enum pipe pipe, bool state)
 {
 	int reg;
 	u32 val;
@@ -906,10 +928,8 @@
 	     "PLL state assertion failure (expected %s, current %s)\n",
 	     state_string(state), state_string(cur_state));
 }
-#define assert_pll_enabled(d, p) assert_pll(d, p, true)
-#define assert_pll_disabled(d, p) assert_pll(d, p, false)
 
-static struct intel_shared_dpll *
+struct intel_shared_dpll *
 intel_crtc_to_shared_dpll(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
@@ -921,9 +941,9 @@
 }
 
 /* For ILK+ */
-static void assert_shared_dpll(struct drm_i915_private *dev_priv,
-			       struct intel_shared_dpll *pll,
-			       bool state)
+void assert_shared_dpll(struct drm_i915_private *dev_priv,
+			struct intel_shared_dpll *pll,
+			bool state)
 {
 	bool cur_state;
 	struct intel_dpll_hw_state hw_state;
@@ -942,8 +962,6 @@
 	     "%s assertion failure (expected %s, current %s)\n",
 	     pll->name, state_string(state), state_string(cur_state));
 }
-#define assert_shared_dpll_enabled(d, p) assert_shared_dpll(d, p, true)
-#define assert_shared_dpll_disabled(d, p) assert_shared_dpll(d, p, false)
 
 static void assert_fdi_tx(struct drm_i915_private *dev_priv,
 			  enum pipe pipe, bool state)
@@ -1007,15 +1025,19 @@
 	WARN(!(val & FDI_TX_PLL_ENABLE), "FDI TX PLL assertion failure, should be active but is disabled\n");
 }
 
-static void assert_fdi_rx_pll_enabled(struct drm_i915_private *dev_priv,
-				      enum pipe pipe)
+void assert_fdi_rx_pll(struct drm_i915_private *dev_priv,
+		       enum pipe pipe, bool state)
 {
 	int reg;
 	u32 val;
+	bool cur_state;
 
 	reg = FDI_RX_CTL(pipe);
 	val = I915_READ(reg);
-	WARN(!(val & FDI_RX_PLL_ENABLE), "FDI RX PLL assertion failure, should be active but is disabled\n");
+	cur_state = !!(val & FDI_RX_PLL_ENABLE);
+	WARN(cur_state != state,
+	     "FDI RX PLL assertion failure (expected %s, current %s)\n",
+	     state_string(state), state_string(cur_state));
 }
 
 static void assert_panel_unlocked(struct drm_i915_private *dev_priv,
@@ -1111,7 +1133,7 @@
 	}
 
 	/* Need to check both planes against the pipe */
-	for (i = 0; i < INTEL_INFO(dev)->num_pipes; i++) {
+	for_each_pipe(i) {
 		reg = DSPCNTR(i);
 		val = I915_READ(reg);
 		cur_pipe = (val & DISPPLANE_SEL_PIPE_MASK) >>
@@ -1301,51 +1323,92 @@
 	assert_pch_hdmi_disabled(dev_priv, pipe, PCH_HDMID);
 }
 
-/**
- * intel_enable_pll - enable a PLL
- * @dev_priv: i915 private structure
- * @pipe: pipe PLL to enable
- *
- * Enable @pipe's PLL so we can start pumping pixels from a plane.  Check to
- * make sure the PLL reg is writable first though, since the panel write
- * protect mechanism may be enabled.
- *
- * Note!  This is for pre-ILK only.
- *
- * Unfortunately needed by dvo_ns2501 since the dvo depends on it running.
- */
-static void intel_enable_pll(struct drm_i915_private *dev_priv, enum pipe pipe)
+static void vlv_enable_pll(struct intel_crtc *crtc)
 {
-	int reg;
-	u32 val;
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int reg = DPLL(crtc->pipe);
+	u32 dpll = crtc->config.dpll_hw_state.dpll;
 
-	assert_pipe_disabled(dev_priv, pipe);
+	assert_pipe_disabled(dev_priv, crtc->pipe);
 
 	/* No really, not for ILK+ */
-	BUG_ON(!IS_VALLEYVIEW(dev_priv->dev) && dev_priv->info->gen >= 5);
+	BUG_ON(!IS_VALLEYVIEW(dev_priv->dev));
 
 	/* PLL is protected by panel, make sure we can write it */
 	if (IS_MOBILE(dev_priv->dev) && !IS_I830(dev_priv->dev))
-		assert_panel_unlocked(dev_priv, pipe);
+		assert_panel_unlocked(dev_priv, crtc->pipe);
 
-	reg = DPLL(pipe);
-	val = I915_READ(reg);
-	val |= DPLL_VCO_ENABLE;
+	I915_WRITE(reg, dpll);
+	POSTING_READ(reg);
+	udelay(150);
+
+	if (wait_for(((I915_READ(reg) & DPLL_LOCK_VLV) == DPLL_LOCK_VLV), 1))
+		DRM_ERROR("DPLL %d failed to lock\n", crtc->pipe);
+
+	I915_WRITE(DPLL_MD(crtc->pipe), crtc->config.dpll_hw_state.dpll_md);
+	POSTING_READ(DPLL_MD(crtc->pipe));
 
 	/* We do this three times for luck */
-	I915_WRITE(reg, val);
+	I915_WRITE(reg, dpll);
 	POSTING_READ(reg);
 	udelay(150); /* wait for warmup */
-	I915_WRITE(reg, val);
+	I915_WRITE(reg, dpll);
 	POSTING_READ(reg);
 	udelay(150); /* wait for warmup */
-	I915_WRITE(reg, val);
+	I915_WRITE(reg, dpll);
+	POSTING_READ(reg);
+	udelay(150); /* wait for warmup */
+}
+
+static void i9xx_enable_pll(struct intel_crtc *crtc)
+{
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int reg = DPLL(crtc->pipe);
+	u32 dpll = crtc->config.dpll_hw_state.dpll;
+
+	assert_pipe_disabled(dev_priv, crtc->pipe);
+
+	/* No really, not for ILK+ */
+	BUG_ON(dev_priv->info->gen >= 5);
+
+	/* PLL is protected by panel, make sure we can write it */
+	if (IS_MOBILE(dev) && !IS_I830(dev))
+		assert_panel_unlocked(dev_priv, crtc->pipe);
+
+	I915_WRITE(reg, dpll);
+
+	/* Wait for the clocks to stabilize. */
+	POSTING_READ(reg);
+	udelay(150);
+
+	if (INTEL_INFO(dev)->gen >= 4) {
+		I915_WRITE(DPLL_MD(crtc->pipe),
+			   crtc->config.dpll_hw_state.dpll_md);
+	} else {
+		/* The pixel multiplier can only be updated once the
+		 * DPLL is enabled and the clocks are stable.
+		 *
+		 * So write it again.
+		 */
+		I915_WRITE(reg, dpll);
+	}
+
+	/* We do this three times for luck */
+	I915_WRITE(reg, dpll);
+	POSTING_READ(reg);
+	udelay(150); /* wait for warmup */
+	I915_WRITE(reg, dpll);
+	POSTING_READ(reg);
+	udelay(150); /* wait for warmup */
+	I915_WRITE(reg, dpll);
 	POSTING_READ(reg);
 	udelay(150); /* wait for warmup */
 }
 
 /**
- * intel_disable_pll - disable a PLL
+ * i9xx_disable_pll - disable a PLL
  * @dev_priv: i915 private structure
  * @pipe: pipe PLL to disable
  *
@@ -1353,11 +1416,8 @@
  *
  * Note!  This is for pre-ILK only.
  */
-static void intel_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe)
+static void i9xx_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe)
 {
-	int reg;
-	u32 val;
-
 	/* Don't disable pipe A or pipe A PLLs if needed */
 	if (pipe == PIPE_A && (dev_priv->quirks & QUIRK_PIPEA_FORCE))
 		return;
@@ -1365,11 +1425,8 @@
 	/* Make sure the pipe isn't still relying on us */
 	assert_pipe_disabled(dev_priv, pipe);
 
-	reg = DPLL(pipe);
-	val = I915_READ(reg);
-	val &= ~DPLL_VCO_ENABLE;
-	I915_WRITE(reg, val);
-	POSTING_READ(reg);
+	I915_WRITE(DPLL(pipe), 0);
+	POSTING_READ(DPLL(pipe));
 }
 
 void vlv_wait_port_ready(struct drm_i915_private *dev_priv, int port)
@@ -1819,7 +1876,7 @@
 	return 0;
 
 err_unpin:
-	i915_gem_object_unpin(obj);
+	i915_gem_object_unpin_from_display_plane(obj);
 err_interruptible:
 	dev_priv->mm.interruptible = true;
 	return ret;
@@ -1828,7 +1885,7 @@
 void intel_unpin_fb_obj(struct drm_i915_gem_object *obj)
 {
 	i915_gem_object_unpin_fence(obj);
-	i915_gem_object_unpin(obj);
+	i915_gem_object_unpin_from_display_plane(obj);
 }
 
 /* Computes the linear offset to the base tile and adjusts x, y. bytes per pixel
@@ -1942,16 +1999,17 @@
 		intel_crtc->dspaddr_offset = linear_offset;
 	}
 
-	DRM_DEBUG_KMS("Writing base %08X %08lX %d %d %d\n",
-		      obj->gtt_offset, linear_offset, x, y, fb->pitches[0]);
+	DRM_DEBUG_KMS("Writing base %08lX %08lX %d %d %d\n",
+		      i915_gem_obj_ggtt_offset(obj), linear_offset, x, y,
+		      fb->pitches[0]);
 	I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]);
 	if (INTEL_INFO(dev)->gen >= 4) {
 		I915_MODIFY_DISPBASE(DSPSURF(plane),
-				     obj->gtt_offset + intel_crtc->dspaddr_offset);
+				     i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
 		I915_WRITE(DSPTILEOFF(plane), (y << 16) | x);
 		I915_WRITE(DSPLINOFF(plane), linear_offset);
 	} else
-		I915_WRITE(DSPADDR(plane), obj->gtt_offset + linear_offset);
+		I915_WRITE(DSPADDR(plane), i915_gem_obj_ggtt_offset(obj) + linear_offset);
 	POSTING_READ(reg);
 
 	return 0;
@@ -2031,11 +2089,12 @@
 					       fb->pitches[0]);
 	linear_offset -= intel_crtc->dspaddr_offset;
 
-	DRM_DEBUG_KMS("Writing base %08X %08lX %d %d %d\n",
-		      obj->gtt_offset, linear_offset, x, y, fb->pitches[0]);
+	DRM_DEBUG_KMS("Writing base %08lX %08lX %d %d %d\n",
+		      i915_gem_obj_ggtt_offset(obj), linear_offset, x, y,
+		      fb->pitches[0]);
 	I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]);
 	I915_MODIFY_DISPBASE(DSPSURF(plane),
-			     obj->gtt_offset + intel_crtc->dspaddr_offset);
+			     i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
 	if (IS_HASWELL(dev)) {
 		I915_WRITE(DSPOFFSET(plane), (y << 16) | x);
 	} else {
@@ -2183,6 +2242,20 @@
 		return ret;
 	}
 
+	/* Update pipe size and adjust fitter if needed */
+	if (i915_fastboot) {
+		I915_WRITE(PIPESRC(intel_crtc->pipe),
+			   ((crtc->mode.hdisplay - 1) << 16) |
+			   (crtc->mode.vdisplay - 1));
+		if (!intel_crtc->config.pch_pfit.size &&
+		    (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS) ||
+		     intel_pipe_has_type(crtc, INTEL_OUTPUT_EDP))) {
+			I915_WRITE(PF_CTL(intel_crtc->pipe), 0);
+			I915_WRITE(PF_WIN_POS(intel_crtc->pipe), 0);
+			I915_WRITE(PF_WIN_SZ(intel_crtc->pipe), 0);
+		}
+	}
+
 	ret = dev_priv->display.update_plane(crtc, fb, x, y);
 	if (ret) {
 		intel_unpin_fb_obj(to_intel_framebuffer(fb)->obj);
@@ -2203,6 +2276,7 @@
 	}
 
 	intel_update_fbc(dev);
+	intel_edp_psr_update(dev);
 	mutex_unlock(&dev->struct_mutex);
 
 	intel_crtc_update_sarea_pos(crtc, x, y);
@@ -2523,7 +2597,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_crtc->pipe;
-	u32 reg, temp, i;
+	u32 reg, temp, i, j;
 
 	/* Train 1: umask FDI RX Interrupt symbol_lock and bit_lock bit
 	   for train result */
@@ -2539,97 +2613,99 @@
 	DRM_DEBUG_KMS("FDI_RX_IIR before link train 0x%x\n",
 		      I915_READ(FDI_RX_IIR(pipe)));
 
-	/* enable CPU FDI TX and PCH FDI RX */
-	reg = FDI_TX_CTL(pipe);
-	temp = I915_READ(reg);
-	temp &= ~FDI_DP_PORT_WIDTH_MASK;
-	temp |= FDI_DP_PORT_WIDTH(intel_crtc->config.fdi_lanes);
-	temp &= ~(FDI_LINK_TRAIN_AUTO | FDI_LINK_TRAIN_NONE_IVB);
-	temp |= FDI_LINK_TRAIN_PATTERN_1_IVB;
-	temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK;
-	temp |= FDI_LINK_TRAIN_400MV_0DB_SNB_B;
-	temp |= FDI_COMPOSITE_SYNC;
-	I915_WRITE(reg, temp | FDI_TX_ENABLE);
-
-	I915_WRITE(FDI_RX_MISC(pipe),
-		   FDI_RX_TP1_TO_TP2_48 | FDI_RX_FDI_DELAY_90);
-
-	reg = FDI_RX_CTL(pipe);
-	temp = I915_READ(reg);
-	temp &= ~FDI_LINK_TRAIN_AUTO;
-	temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT;
-	temp |= FDI_LINK_TRAIN_PATTERN_1_CPT;
-	temp |= FDI_COMPOSITE_SYNC;
-	I915_WRITE(reg, temp | FDI_RX_ENABLE);
-
-	POSTING_READ(reg);
-	udelay(150);
-
-	for (i = 0; i < 4; i++) {
+	/* Try each vswing and preemphasis setting twice before moving on */
+	for (j = 0; j < ARRAY_SIZE(snb_b_fdi_train_param) * 2; j++) {
+		/* disable first in case we need to retry */
 		reg = FDI_TX_CTL(pipe);
 		temp = I915_READ(reg);
+		temp &= ~(FDI_LINK_TRAIN_AUTO | FDI_LINK_TRAIN_NONE_IVB);
+		temp &= ~FDI_TX_ENABLE;
+		I915_WRITE(reg, temp);
+
+		reg = FDI_RX_CTL(pipe);
+		temp = I915_READ(reg);
+		temp &= ~FDI_LINK_TRAIN_AUTO;
+		temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT;
+		temp &= ~FDI_RX_ENABLE;
+		I915_WRITE(reg, temp);
+
+		/* enable CPU FDI TX and PCH FDI RX */
+		reg = FDI_TX_CTL(pipe);
+		temp = I915_READ(reg);
+		temp &= ~FDI_DP_PORT_WIDTH_MASK;
+		temp |= FDI_DP_PORT_WIDTH(intel_crtc->config.fdi_lanes);
+		temp |= FDI_LINK_TRAIN_PATTERN_1_IVB;
 		temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK;
-		temp |= snb_b_fdi_train_param[i];
+		temp |= snb_b_fdi_train_param[j/2];
+		temp |= FDI_COMPOSITE_SYNC;
+		I915_WRITE(reg, temp | FDI_TX_ENABLE);
+
+		I915_WRITE(FDI_RX_MISC(pipe),
+			   FDI_RX_TP1_TO_TP2_48 | FDI_RX_FDI_DELAY_90);
+
+		reg = FDI_RX_CTL(pipe);
+		temp = I915_READ(reg);
+		temp |= FDI_LINK_TRAIN_PATTERN_1_CPT;
+		temp |= FDI_COMPOSITE_SYNC;
+		I915_WRITE(reg, temp | FDI_RX_ENABLE);
+
+		POSTING_READ(reg);
+		udelay(1); /* should be 0.5us */
+
+		for (i = 0; i < 4; i++) {
+			reg = FDI_RX_IIR(pipe);
+			temp = I915_READ(reg);
+			DRM_DEBUG_KMS("FDI_RX_IIR 0x%x\n", temp);
+
+			if (temp & FDI_RX_BIT_LOCK ||
+			    (I915_READ(reg) & FDI_RX_BIT_LOCK)) {
+				I915_WRITE(reg, temp | FDI_RX_BIT_LOCK);
+				DRM_DEBUG_KMS("FDI train 1 done, level %i.\n",
+					      i);
+				break;
+			}
+			udelay(1); /* should be 0.5us */
+		}
+		if (i == 4) {
+			DRM_DEBUG_KMS("FDI train 1 fail on vswing %d\n", j / 2);
+			continue;
+		}
+
+		/* Train 2 */
+		reg = FDI_TX_CTL(pipe);
+		temp = I915_READ(reg);
+		temp &= ~FDI_LINK_TRAIN_NONE_IVB;
+		temp |= FDI_LINK_TRAIN_PATTERN_2_IVB;
+		I915_WRITE(reg, temp);
+
+		reg = FDI_RX_CTL(pipe);
+		temp = I915_READ(reg);
+		temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT;
+		temp |= FDI_LINK_TRAIN_PATTERN_2_CPT;
 		I915_WRITE(reg, temp);
 
 		POSTING_READ(reg);
-		udelay(500);
+		udelay(2); /* should be 1.5us */
 
-		reg = FDI_RX_IIR(pipe);
-		temp = I915_READ(reg);
-		DRM_DEBUG_KMS("FDI_RX_IIR 0x%x\n", temp);
+		for (i = 0; i < 4; i++) {
+			reg = FDI_RX_IIR(pipe);
+			temp = I915_READ(reg);
+			DRM_DEBUG_KMS("FDI_RX_IIR 0x%x\n", temp);
 
-		if (temp & FDI_RX_BIT_LOCK ||
-		    (I915_READ(reg) & FDI_RX_BIT_LOCK)) {
-			I915_WRITE(reg, temp | FDI_RX_BIT_LOCK);
-			DRM_DEBUG_KMS("FDI train 1 done, level %i.\n", i);
-			break;
+			if (temp & FDI_RX_SYMBOL_LOCK ||
+			    (I915_READ(reg) & FDI_RX_SYMBOL_LOCK)) {
+				I915_WRITE(reg, temp | FDI_RX_SYMBOL_LOCK);
+				DRM_DEBUG_KMS("FDI train 2 done, level %i.\n",
+					      i);
+				goto train_done;
+			}
+			udelay(2); /* should be 1.5us */
 		}
+		if (i == 4)
+			DRM_DEBUG_KMS("FDI train 2 fail on vswing %d\n", j / 2);
 	}
-	if (i == 4)
-		DRM_ERROR("FDI train 1 fail!\n");
 
-	/* Train 2 */
-	reg = FDI_TX_CTL(pipe);
-	temp = I915_READ(reg);
-	temp &= ~FDI_LINK_TRAIN_NONE_IVB;
-	temp |= FDI_LINK_TRAIN_PATTERN_2_IVB;
-	temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK;
-	temp |= FDI_LINK_TRAIN_400MV_0DB_SNB_B;
-	I915_WRITE(reg, temp);
-
-	reg = FDI_RX_CTL(pipe);
-	temp = I915_READ(reg);
-	temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT;
-	temp |= FDI_LINK_TRAIN_PATTERN_2_CPT;
-	I915_WRITE(reg, temp);
-
-	POSTING_READ(reg);
-	udelay(150);
-
-	for (i = 0; i < 4; i++) {
-		reg = FDI_TX_CTL(pipe);
-		temp = I915_READ(reg);
-		temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK;
-		temp |= snb_b_fdi_train_param[i];
-		I915_WRITE(reg, temp);
-
-		POSTING_READ(reg);
-		udelay(500);
-
-		reg = FDI_RX_IIR(pipe);
-		temp = I915_READ(reg);
-		DRM_DEBUG_KMS("FDI_RX_IIR 0x%x\n", temp);
-
-		if (temp & FDI_RX_SYMBOL_LOCK) {
-			I915_WRITE(reg, temp | FDI_RX_SYMBOL_LOCK);
-			DRM_DEBUG_KMS("FDI train 2 done, level %i.\n", i);
-			break;
-		}
-	}
-	if (i == 4)
-		DRM_ERROR("FDI train 2 fail!\n");
-
+train_done:
 	DRM_DEBUG_KMS("FDI train done.\n");
 }
 
@@ -2927,15 +3003,8 @@
 	/* For PCH output, training FDI link */
 	dev_priv->display.fdi_link_train(crtc);
 
-	/* XXX: pch pll's can be enabled any time before we enable the PCH
-	 * transcoder, and we actually should do this to not upset any PCH
-	 * transcoder that already use the clock when we share it.
-	 *
-	 * Note that enable_shared_dpll tries to do the right thing, but
-	 * get_shared_dpll unconditionally resets the pll - we need that to have
-	 * the right LVDS enable sequence. */
-	ironlake_enable_shared_dpll(intel_crtc);
-
+	/* We need to program the right clock selection before writing the pixel
+	 * mutliplier into the DPLL. */
 	if (HAS_PCH_CPT(dev)) {
 		u32 sel;
 
@@ -2949,6 +3018,15 @@
 		I915_WRITE(PCH_DPLL_SEL, temp);
 	}
 
+	/* XXX: pch pll's can be enabled any time before we enable the PCH
+	 * transcoder, and we actually should do this to not upset any PCH
+	 * transcoder that already use the clock when we share it.
+	 *
+	 * Note that enable_shared_dpll tries to do the right thing, but
+	 * get_shared_dpll unconditionally resets the pll - we need that to have
+	 * the right LVDS enable sequence. */
+	ironlake_enable_shared_dpll(intel_crtc);
+
 	/* set transcoder timing, panel must allow it */
 	assert_panel_unlocked(dev_priv, pipe);
 	ironlake_pch_transcoder_set_timings(intel_crtc, pipe);
@@ -3031,7 +3109,7 @@
 	crtc->config.shared_dpll = DPLL_ID_PRIVATE;
 }
 
-static struct intel_shared_dpll *intel_get_shared_dpll(struct intel_crtc *crtc, u32 dpll, u32 fp)
+static struct intel_shared_dpll *intel_get_shared_dpll(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
 	struct intel_shared_dpll *pll = intel_crtc_to_shared_dpll(crtc);
@@ -3045,7 +3123,7 @@
 
 	if (HAS_PCH_IBX(dev_priv->dev)) {
 		/* Ironlake PCH has a fixed PLL->PCH pipe mapping. */
-		i = crtc->pipe;
+		i = (enum intel_dpll_id) crtc->pipe;
 		pll = &dev_priv->shared_dplls[i];
 
 		DRM_DEBUG_KMS("CRTC:%d using pre-allocated %s\n",
@@ -3061,8 +3139,8 @@
 		if (pll->refcount == 0)
 			continue;
 
-		if (dpll == (I915_READ(PCH_DPLL(pll->id)) & 0x7fffffff) &&
-		    fp == I915_READ(PCH_FP0(pll->id))) {
+		if (memcmp(&crtc->config.dpll_hw_state, &pll->hw_state,
+			   sizeof(pll->hw_state)) == 0) {
 			DRM_DEBUG_KMS("CRTC:%d sharing existing %s (refcount %d, ative %d)\n",
 				      crtc->base.base.id,
 				      pll->name, pll->refcount, pll->active);
@@ -3096,13 +3174,7 @@
 		WARN_ON(pll->on);
 		assert_shared_dpll_disabled(dev_priv, pll);
 
-		/* Wait for the clocks to stabilize before rewriting the regs */
-		I915_WRITE(PCH_DPLL(pll->id), dpll & ~DPLL_VCO_ENABLE);
-		POSTING_READ(PCH_DPLL(pll->id));
-		udelay(150);
-
-		I915_WRITE(PCH_FP0(pll->id), fp);
-		I915_WRITE(PCH_DPLL(pll->id), dpll & ~DPLL_VCO_ENABLE);
+		pll->mode_set(dev_priv, pll);
 	}
 	pll->refcount++;
 
@@ -3174,7 +3246,6 @@
 	struct intel_encoder *encoder;
 	int pipe = intel_crtc->pipe;
 	int plane = intel_crtc->plane;
-	u32 temp;
 
 	WARN_ON(!crtc->enabled);
 
@@ -3188,12 +3259,9 @@
 
 	intel_update_watermarks(dev);
 
-	if (intel_pipe_has_type(crtc, INTEL_OUTPUT_LVDS)) {
-		temp = I915_READ(PCH_LVDS);
-		if ((temp & LVDS_PORT_EN) == 0)
-			I915_WRITE(PCH_LVDS, temp | LVDS_PORT_EN);
-	}
-
+	for_each_encoder_on_crtc(dev, crtc, encoder)
+		if (encoder->pre_enable)
+			encoder->pre_enable(encoder);
 
 	if (intel_crtc->config.has_pch_encoder) {
 		/* Note: FDI PLL enabling _must_ be done before we enable the
@@ -3205,10 +3273,6 @@
 		assert_fdi_rx_disabled(dev_priv, pipe);
 	}
 
-	for_each_encoder_on_crtc(dev, crtc, encoder)
-		if (encoder->pre_enable)
-			encoder->pre_enable(encoder);
-
 	ironlake_pfit_enable(intel_crtc);
 
 	/*
@@ -3389,7 +3453,7 @@
 	intel_crtc_wait_for_pending_flips(crtc);
 	drm_vblank_off(dev, pipe);
 
-	if (dev_priv->cfb_plane == plane)
+	if (dev_priv->fbc.plane == plane)
 		intel_disable_fbc(dev);
 
 	intel_crtc_update_cursor(crtc, false);
@@ -3462,7 +3526,7 @@
 	drm_vblank_off(dev, pipe);
 
 	/* FBC must be disabled before disabling the plane on HSW. */
-	if (dev_priv->cfb_plane == plane)
+	if (dev_priv->fbc.plane == plane)
 		intel_disable_fbc(dev);
 
 	hsw_disable_ips(intel_crtc);
@@ -3593,22 +3657,16 @@
 	intel_crtc->active = true;
 	intel_update_watermarks(dev);
 
-	mutex_lock(&dev_priv->dpio_lock);
-
 	for_each_encoder_on_crtc(dev, crtc, encoder)
 		if (encoder->pre_pll_enable)
 			encoder->pre_pll_enable(encoder);
 
-	intel_enable_pll(dev_priv, pipe);
+	vlv_enable_pll(intel_crtc);
 
 	for_each_encoder_on_crtc(dev, crtc, encoder)
 		if (encoder->pre_enable)
 			encoder->pre_enable(encoder);
 
-	/* VLV wants encoder enabling _before_ the pipe is up. */
-	for_each_encoder_on_crtc(dev, crtc, encoder)
-		encoder->enable(encoder);
-
 	i9xx_pfit_enable(intel_crtc);
 
 	intel_crtc_load_lut(crtc);
@@ -3620,7 +3678,8 @@
 
 	intel_update_fbc(dev);
 
-	mutex_unlock(&dev_priv->dpio_lock);
+	for_each_encoder_on_crtc(dev, crtc, encoder)
+		encoder->enable(encoder);
 }
 
 static void i9xx_crtc_enable(struct drm_crtc *crtc)
@@ -3640,12 +3699,12 @@
 	intel_crtc->active = true;
 	intel_update_watermarks(dev);
 
-	intel_enable_pll(dev_priv, pipe);
-
 	for_each_encoder_on_crtc(dev, crtc, encoder)
 		if (encoder->pre_enable)
 			encoder->pre_enable(encoder);
 
+	i9xx_enable_pll(intel_crtc);
+
 	i9xx_pfit_enable(intel_crtc);
 
 	intel_crtc_load_lut(crtc);
@@ -3701,7 +3760,7 @@
 	intel_crtc_wait_for_pending_flips(crtc);
 	drm_vblank_off(dev, pipe);
 
-	if (dev_priv->cfb_plane == plane)
+	if (dev_priv->fbc.plane == plane)
 		intel_disable_fbc(dev);
 
 	intel_crtc_dpms_overlay(intel_crtc, false);
@@ -3717,7 +3776,7 @@
 		if (encoder->post_disable)
 			encoder->post_disable(encoder);
 
-	intel_disable_pll(dev_priv, pipe);
+	i9xx_disable_pll(dev_priv, pipe);
 
 	intel_crtc->active = false;
 	intel_update_fbc(dev);
@@ -3817,16 +3876,6 @@
 	}
 }
 
-void intel_modeset_disable(struct drm_device *dev)
-{
-	struct drm_crtc *crtc;
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		if (crtc->enabled)
-			intel_crtc_disable(crtc);
-	}
-}
-
 void intel_encoder_destroy(struct drm_encoder *encoder)
 {
 	struct intel_encoder *intel_encoder = to_intel_encoder(encoder);
@@ -3835,10 +3884,10 @@
 	kfree(intel_encoder);
 }
 
-/* Simple dpms helper for encodres with just one connector, no cloning and only
+/* Simple dpms helper for encoders with just one connector, no cloning and only
  * one kind of off state. It clamps all !ON modes to fully OFF and changes the
  * state of the entire output pipe. */
-void intel_encoder_dpms(struct intel_encoder *encoder, int mode)
+static void intel_encoder_dpms(struct intel_encoder *encoder, int mode)
 {
 	if (mode == DRM_MODE_DPMS_ON) {
 		encoder->connectors_active = true;
@@ -4032,7 +4081,7 @@
 {
 	pipe_config->ips_enabled = i915_enable_ips &&
 				   hsw_crtc_supports_ips(crtc) &&
-				   pipe_config->pipe_bpp == 24;
+				   pipe_config->pipe_bpp <= 24;
 }
 
 static int intel_crtc_compute_config(struct intel_crtc *crtc,
@@ -4048,12 +4097,6 @@
 			return -EINVAL;
 	}
 
-	/* All interlaced capable intel hw wants timings in frames. Note though
-	 * that intel_lvds_mode_fixup does some funny tricks with the crtc
-	 * timings, so we need to be careful not to clobber these.*/
-	if (!pipe_config->timings_set)
-		drm_mode_set_crtcinfo(adjusted_mode, 0);
-
 	/* Cantiga+ cannot handle modes with a hsync front porch of 0.
 	 * WaPruneModeWithIncorrectHsyncOffset:ctg,elk,ilk,snb,ivb,vlv,hsw.
 	 */
@@ -4103,6 +4146,30 @@
 	return 200000;
 }
 
+static int pnv_get_display_clock_speed(struct drm_device *dev)
+{
+	u16 gcfgc = 0;
+
+	pci_read_config_word(dev->pdev, GCFGC, &gcfgc);
+
+	switch (gcfgc & GC_DISPLAY_CLOCK_MASK) {
+	case GC_DISPLAY_CLOCK_267_MHZ_PNV:
+		return 267000;
+	case GC_DISPLAY_CLOCK_333_MHZ_PNV:
+		return 333000;
+	case GC_DISPLAY_CLOCK_444_MHZ_PNV:
+		return 444000;
+	case GC_DISPLAY_CLOCK_200_MHZ_PNV:
+		return 200000;
+	default:
+		DRM_ERROR("Unknown pnv display core clock 0x%04x\n", gcfgc);
+	case GC_DISPLAY_CLOCK_133_MHZ_PNV:
+		return 133000;
+	case GC_DISPLAY_CLOCK_167_MHZ_PNV:
+		return 167000;
+	}
+}
+
 static int i915gm_get_display_clock_speed(struct drm_device *dev)
 {
 	u16 gcfgc = 0;
@@ -4266,14 +4333,17 @@
 	}
 
 	I915_WRITE(FP0(pipe), fp);
+	crtc->config.dpll_hw_state.fp0 = fp;
 
 	crtc->lowfreq_avail = false;
 	if (intel_pipe_has_type(&crtc->base, INTEL_OUTPUT_LVDS) &&
 	    reduced_clock && i915_powersave) {
 		I915_WRITE(FP1(pipe), fp2);
+		crtc->config.dpll_hw_state.fp1 = fp2;
 		crtc->lowfreq_avail = true;
 	} else {
 		I915_WRITE(FP1(pipe), fp);
+		crtc->config.dpll_hw_state.fp1 = fp;
 	}
 }
 
@@ -4351,17 +4421,13 @@
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_encoder *encoder;
 	int pipe = crtc->pipe;
 	u32 dpll, mdiv;
 	u32 bestn, bestm1, bestm2, bestp1, bestp2;
-	bool is_hdmi;
 	u32 coreclk, reg_val, dpll_md;
 
 	mutex_lock(&dev_priv->dpio_lock);
 
-	is_hdmi = intel_pipe_has_type(&crtc->base, INTEL_OUTPUT_HDMI);
-
 	bestn = crtc->config.dpll.n;
 	bestm1 = crtc->config.dpll.m1;
 	bestm2 = crtc->config.dpll.m2;
@@ -4407,7 +4473,7 @@
 	    intel_pipe_has_type(&crtc->base, INTEL_OUTPUT_ANALOG) ||
 	    intel_pipe_has_type(&crtc->base, INTEL_OUTPUT_HDMI))
 		vlv_dpio_write(dev_priv, DPIO_LPF_COEFF(pipe),
-				 0x005f0021);
+				 0x009f0003);
 	else
 		vlv_dpio_write(dev_priv, DPIO_LPF_COEFF(pipe),
 				 0x00d0000f);
@@ -4440,10 +4506,6 @@
 
 	vlv_dpio_write(dev_priv, DPIO_PLL_CML(pipe), 0x87871000);
 
-	for_each_encoder_on_crtc(dev, &crtc->base, encoder)
-		if (encoder->pre_pll_enable)
-			encoder->pre_pll_enable(encoder);
-
 	/* Enable DPIO clock input */
 	dpll = DPLL_EXT_BUFFER_ENABLE_VLV | DPLL_REFA_CLK_ENABLE_VLV |
 		DPLL_VGA_MODE_DIS | DPLL_INTEGRATED_CLOCK_VLV;
@@ -4451,17 +4513,11 @@
 		dpll |= DPLL_INTEGRATED_CRI_CLK_VLV;
 
 	dpll |= DPLL_VCO_ENABLE;
-	I915_WRITE(DPLL(pipe), dpll);
-	POSTING_READ(DPLL(pipe));
-	udelay(150);
-
-	if (wait_for(((I915_READ(DPLL(pipe)) & DPLL_LOCK_VLV) == DPLL_LOCK_VLV), 1))
-		DRM_ERROR("DPLL %d failed to lock\n", pipe);
+	crtc->config.dpll_hw_state.dpll = dpll;
 
 	dpll_md = (crtc->config.pixel_multiplier - 1)
 		<< DPLL_MD_UDI_MULTIPLIER_SHIFT;
-	I915_WRITE(DPLL_MD(pipe), dpll_md);
-	POSTING_READ(DPLL_MD(pipe));
+	crtc->config.dpll_hw_state.dpll_md = dpll_md;
 
 	if (crtc->config.has_dp_encoder)
 		intel_dp_set_m_n(crtc);
@@ -4475,8 +4531,6 @@
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_encoder *encoder;
-	int pipe = crtc->pipe;
 	u32 dpll;
 	bool is_sdvo;
 	struct dpll *clock = &crtc->config.dpll;
@@ -4499,10 +4553,10 @@
 	}
 
 	if (is_sdvo)
-		dpll |= DPLL_DVO_HIGH_SPEED;
+		dpll |= DPLL_SDVO_HIGH_SPEED;
 
 	if (intel_pipe_has_type(&crtc->base, INTEL_OUTPUT_DISPLAYPORT))
-		dpll |= DPLL_DVO_HIGH_SPEED;
+		dpll |= DPLL_SDVO_HIGH_SPEED;
 
 	/* compute bitmask from p1 value */
 	if (IS_PINEVIEW(dev))
@@ -4538,35 +4592,16 @@
 		dpll |= PLL_REF_INPUT_DREFCLK;
 
 	dpll |= DPLL_VCO_ENABLE;
-	I915_WRITE(DPLL(pipe), dpll & ~DPLL_VCO_ENABLE);
-	POSTING_READ(DPLL(pipe));
-	udelay(150);
-
-	for_each_encoder_on_crtc(dev, &crtc->base, encoder)
-		if (encoder->pre_pll_enable)
-			encoder->pre_pll_enable(encoder);
-
-	if (crtc->config.has_dp_encoder)
-		intel_dp_set_m_n(crtc);
-
-	I915_WRITE(DPLL(pipe), dpll);
-
-	/* Wait for the clocks to stabilize. */
-	POSTING_READ(DPLL(pipe));
-	udelay(150);
+	crtc->config.dpll_hw_state.dpll = dpll;
 
 	if (INTEL_INFO(dev)->gen >= 4) {
 		u32 dpll_md = (crtc->config.pixel_multiplier - 1)
 			<< DPLL_MD_UDI_MULTIPLIER_SHIFT;
-		I915_WRITE(DPLL_MD(pipe), dpll_md);
-	} else {
-		/* The pixel multiplier can only be updated once the
-		 * DPLL is enabled and the clocks are stable.
-		 *
-		 * So write it again.
-		 */
-		I915_WRITE(DPLL(pipe), dpll);
+		crtc->config.dpll_hw_state.dpll_md = dpll_md;
 	}
+
+	if (crtc->config.has_dp_encoder)
+		intel_dp_set_m_n(crtc);
 }
 
 static void i8xx_update_pll(struct intel_crtc *crtc,
@@ -4575,8 +4610,6 @@
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_encoder *encoder;
-	int pipe = crtc->pipe;
 	u32 dpll;
 	struct dpll *clock = &crtc->config.dpll;
 
@@ -4595,6 +4628,9 @@
 			dpll |= PLL_P2_DIVIDE_BY_4;
 	}
 
+	if (intel_pipe_has_type(&crtc->base, INTEL_OUTPUT_DVO))
+		dpll |= DPLL_DVO_2X_MODE;
+
 	if (intel_pipe_has_type(&crtc->base, INTEL_OUTPUT_LVDS) &&
 		 intel_panel_use_ssc(dev_priv) && num_connectors < 2)
 		dpll |= PLLB_REF_INPUT_SPREADSPECTRUMIN;
@@ -4602,26 +4638,7 @@
 		dpll |= PLL_REF_INPUT_DREFCLK;
 
 	dpll |= DPLL_VCO_ENABLE;
-	I915_WRITE(DPLL(pipe), dpll & ~DPLL_VCO_ENABLE);
-	POSTING_READ(DPLL(pipe));
-	udelay(150);
-
-	for_each_encoder_on_crtc(dev, &crtc->base, encoder)
-		if (encoder->pre_pll_enable)
-			encoder->pre_pll_enable(encoder);
-
-	I915_WRITE(DPLL(pipe), dpll);
-
-	/* Wait for the clocks to stabilize. */
-	POSTING_READ(DPLL(pipe));
-	udelay(150);
-
-	/* The pixel multiplier can only be updated once the
-	 * DPLL is enabled and the clocks are stable.
-	 *
-	 * So write it again.
-	 */
-	I915_WRITE(DPLL(pipe), dpll);
+	crtc->config.dpll_hw_state.dpll = dpll;
 }
 
 static void intel_set_pipe_timings(struct intel_crtc *intel_crtc)
@@ -4727,6 +4744,27 @@
 	pipe_config->requested_mode.hdisplay = ((tmp >> 16) & 0xffff) + 1;
 }
 
+static void intel_crtc_mode_from_pipe_config(struct intel_crtc *intel_crtc,
+					     struct intel_crtc_config *pipe_config)
+{
+	struct drm_crtc *crtc = &intel_crtc->base;
+
+	crtc->mode.hdisplay = pipe_config->adjusted_mode.crtc_hdisplay;
+	crtc->mode.htotal = pipe_config->adjusted_mode.crtc_htotal;
+	crtc->mode.hsync_start = pipe_config->adjusted_mode.crtc_hsync_start;
+	crtc->mode.hsync_end = pipe_config->adjusted_mode.crtc_hsync_end;
+
+	crtc->mode.vdisplay = pipe_config->adjusted_mode.crtc_vdisplay;
+	crtc->mode.vtotal = pipe_config->adjusted_mode.crtc_vtotal;
+	crtc->mode.vsync_start = pipe_config->adjusted_mode.crtc_vsync_start;
+	crtc->mode.vsync_end = pipe_config->adjusted_mode.crtc_vsync_end;
+
+	crtc->mode.flags = pipe_config->adjusted_mode.flags;
+
+	crtc->mode.clock = pipe_config->adjusted_mode.clock;
+	crtc->mode.flags |= pipe_config->adjusted_mode.flags;
+}
+
 static void i9xx_set_pipeconf(struct intel_crtc *intel_crtc)
 {
 	struct drm_device *dev = intel_crtc->base.dev;
@@ -4913,22 +4951,19 @@
 	uint32_t tmp;
 
 	tmp = I915_READ(PFIT_CONTROL);
+	if (!(tmp & PFIT_ENABLE))
+		return;
 
+	/* Check whether the pfit is attached to our pipe. */
 	if (INTEL_INFO(dev)->gen < 4) {
 		if (crtc->pipe != PIPE_B)
 			return;
-
-		/* gen2/3 store dither state in pfit control, needs to match */
-		pipe_config->gmch_pfit.control = tmp & PANEL_8TO6_DITHER_ENABLE;
 	} else {
 		if ((tmp & PFIT_PIPE_MASK) != (crtc->pipe << PFIT_PIPE_SHIFT))
 			return;
 	}
 
-	if (!(tmp & PFIT_ENABLE))
-		return;
-
-	pipe_config->gmch_pfit.control = I915_READ(PFIT_CONTROL);
+	pipe_config->gmch_pfit.control = tmp;
 	pipe_config->gmch_pfit.pgm_ratios = I915_READ(PFIT_PGM_RATIOS);
 	if (INTEL_INFO(dev)->gen < 5)
 		pipe_config->gmch_pfit.lvds_border_bits =
@@ -4942,7 +4977,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t tmp;
 
-	pipe_config->cpu_transcoder = crtc->pipe;
+	pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe;
 	pipe_config->shared_dpll = DPLL_ID_PRIVATE;
 
 	tmp = I915_READ(PIPECONF(crtc->pipe));
@@ -4958,6 +4993,7 @@
 		pipe_config->pixel_multiplier =
 			((tmp & DPLL_MD_UDI_MULTIPLIER_MASK)
 			 >> DPLL_MD_UDI_MULTIPLIER_SHIFT) + 1;
+		pipe_config->dpll_hw_state.dpll_md = tmp;
 	} else if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev)) {
 		tmp = I915_READ(DPLL(crtc->pipe));
 		pipe_config->pixel_multiplier =
@@ -4969,6 +5005,16 @@
 		 * function. */
 		pipe_config->pixel_multiplier = 1;
 	}
+	pipe_config->dpll_hw_state.dpll = I915_READ(DPLL(crtc->pipe));
+	if (!IS_VALLEYVIEW(dev)) {
+		pipe_config->dpll_hw_state.fp0 = I915_READ(FP0(crtc->pipe));
+		pipe_config->dpll_hw_state.fp1 = I915_READ(FP1(crtc->pipe));
+	} else {
+		/* Mask out read-only status bits. */
+		pipe_config->dpll_hw_state.dpll &= ~(DPLL_LOCK_VLV |
+						     DPLL_PORTC_READY_MASK |
+						     DPLL_PORTB_READY_MASK);
+	}
 
 	return true;
 }
@@ -5122,74 +5168,37 @@
 	BUG_ON(val != final);
 }
 
-/* Sequence to enable CLKOUT_DP for FDI usage and configure PCH FDI I/O. */
-static void lpt_init_pch_refclk(struct drm_device *dev)
+static void lpt_reset_fdi_mphy(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_mode_config *mode_config = &dev->mode_config;
-	struct intel_encoder *encoder;
-	bool has_vga = false;
-	bool is_sdv = false;
-	u32 tmp;
+	uint32_t tmp;
 
-	list_for_each_entry(encoder, &mode_config->encoder_list, base.head) {
-		switch (encoder->type) {
-		case INTEL_OUTPUT_ANALOG:
-			has_vga = true;
-			break;
-		}
-	}
+	tmp = I915_READ(SOUTH_CHICKEN2);
+	tmp |= FDI_MPHY_IOSFSB_RESET_CTL;
+	I915_WRITE(SOUTH_CHICKEN2, tmp);
 
-	if (!has_vga)
-		return;
+	if (wait_for_atomic_us(I915_READ(SOUTH_CHICKEN2) &
+			       FDI_MPHY_IOSFSB_RESET_STATUS, 100))
+		DRM_ERROR("FDI mPHY reset assert timeout\n");
 
-	mutex_lock(&dev_priv->dpio_lock);
+	tmp = I915_READ(SOUTH_CHICKEN2);
+	tmp &= ~FDI_MPHY_IOSFSB_RESET_CTL;
+	I915_WRITE(SOUTH_CHICKEN2, tmp);
 
-	/* XXX: Rip out SDV support once Haswell ships for real. */
-	if (IS_HASWELL(dev) && (dev->pci_device & 0xFF00) == 0x0C00)
-		is_sdv = true;
+	if (wait_for_atomic_us((I915_READ(SOUTH_CHICKEN2) &
+				FDI_MPHY_IOSFSB_RESET_STATUS) == 0, 100))
+		DRM_ERROR("FDI mPHY reset de-assert timeout\n");
+}
 
-	tmp = intel_sbi_read(dev_priv, SBI_SSCCTL, SBI_ICLK);
-	tmp &= ~SBI_SSCCTL_DISABLE;
-	tmp |= SBI_SSCCTL_PATHALT;
-	intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK);
-
-	udelay(24);
-
-	tmp = intel_sbi_read(dev_priv, SBI_SSCCTL, SBI_ICLK);
-	tmp &= ~SBI_SSCCTL_PATHALT;
-	intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK);
-
-	if (!is_sdv) {
-		tmp = I915_READ(SOUTH_CHICKEN2);
-		tmp |= FDI_MPHY_IOSFSB_RESET_CTL;
-		I915_WRITE(SOUTH_CHICKEN2, tmp);
-
-		if (wait_for_atomic_us(I915_READ(SOUTH_CHICKEN2) &
-				       FDI_MPHY_IOSFSB_RESET_STATUS, 100))
-			DRM_ERROR("FDI mPHY reset assert timeout\n");
-
-		tmp = I915_READ(SOUTH_CHICKEN2);
-		tmp &= ~FDI_MPHY_IOSFSB_RESET_CTL;
-		I915_WRITE(SOUTH_CHICKEN2, tmp);
-
-		if (wait_for_atomic_us((I915_READ(SOUTH_CHICKEN2) &
-				        FDI_MPHY_IOSFSB_RESET_STATUS) == 0,
-				       100))
-			DRM_ERROR("FDI mPHY reset de-assert timeout\n");
-	}
+/* WaMPhyProgramming:hsw */
+static void lpt_program_fdi_mphy(struct drm_i915_private *dev_priv)
+{
+	uint32_t tmp;
 
 	tmp = intel_sbi_read(dev_priv, 0x8008, SBI_MPHY);
 	tmp &= ~(0xFF << 24);
 	tmp |= (0x12 << 24);
 	intel_sbi_write(dev_priv, 0x8008, tmp, SBI_MPHY);
 
-	if (is_sdv) {
-		tmp = intel_sbi_read(dev_priv, 0x800C, SBI_MPHY);
-		tmp |= 0x7FFF;
-		intel_sbi_write(dev_priv, 0x800C, tmp, SBI_MPHY);
-	}
-
 	tmp = intel_sbi_read(dev_priv, 0x2008, SBI_MPHY);
 	tmp |= (1 << 11);
 	intel_sbi_write(dev_priv, 0x2008, tmp, SBI_MPHY);
@@ -5198,24 +5207,6 @@
 	tmp |= (1 << 11);
 	intel_sbi_write(dev_priv, 0x2108, tmp, SBI_MPHY);
 
-	if (is_sdv) {
-		tmp = intel_sbi_read(dev_priv, 0x2038, SBI_MPHY);
-		tmp |= (0x3F << 24) | (0xF << 20) | (0xF << 16);
-		intel_sbi_write(dev_priv, 0x2038, tmp, SBI_MPHY);
-
-		tmp = intel_sbi_read(dev_priv, 0x2138, SBI_MPHY);
-		tmp |= (0x3F << 24) | (0xF << 20) | (0xF << 16);
-		intel_sbi_write(dev_priv, 0x2138, tmp, SBI_MPHY);
-
-		tmp = intel_sbi_read(dev_priv, 0x203C, SBI_MPHY);
-		tmp |= (0x3F << 8);
-		intel_sbi_write(dev_priv, 0x203C, tmp, SBI_MPHY);
-
-		tmp = intel_sbi_read(dev_priv, 0x213C, SBI_MPHY);
-		tmp |= (0x3F << 8);
-		intel_sbi_write(dev_priv, 0x213C, tmp, SBI_MPHY);
-	}
-
 	tmp = intel_sbi_read(dev_priv, 0x206C, SBI_MPHY);
 	tmp |= (1 << 24) | (1 << 21) | (1 << 18);
 	intel_sbi_write(dev_priv, 0x206C, tmp, SBI_MPHY);
@@ -5224,17 +5215,15 @@
 	tmp |= (1 << 24) | (1 << 21) | (1 << 18);
 	intel_sbi_write(dev_priv, 0x216C, tmp, SBI_MPHY);
 
-	if (!is_sdv) {
-		tmp = intel_sbi_read(dev_priv, 0x2080, SBI_MPHY);
-		tmp &= ~(7 << 13);
-		tmp |= (5 << 13);
-		intel_sbi_write(dev_priv, 0x2080, tmp, SBI_MPHY);
+	tmp = intel_sbi_read(dev_priv, 0x2080, SBI_MPHY);
+	tmp &= ~(7 << 13);
+	tmp |= (5 << 13);
+	intel_sbi_write(dev_priv, 0x2080, tmp, SBI_MPHY);
 
-		tmp = intel_sbi_read(dev_priv, 0x2180, SBI_MPHY);
-		tmp &= ~(7 << 13);
-		tmp |= (5 << 13);
-		intel_sbi_write(dev_priv, 0x2180, tmp, SBI_MPHY);
-	}
+	tmp = intel_sbi_read(dev_priv, 0x2180, SBI_MPHY);
+	tmp &= ~(7 << 13);
+	tmp |= (5 << 13);
+	intel_sbi_write(dev_priv, 0x2180, tmp, SBI_MPHY);
 
 	tmp = intel_sbi_read(dev_priv, 0x208C, SBI_MPHY);
 	tmp &= ~0xFF;
@@ -5256,34 +5245,120 @@
 	tmp |= (0x1C << 16);
 	intel_sbi_write(dev_priv, 0x2198, tmp, SBI_MPHY);
 
-	if (!is_sdv) {
-		tmp = intel_sbi_read(dev_priv, 0x20C4, SBI_MPHY);
-		tmp |= (1 << 27);
-		intel_sbi_write(dev_priv, 0x20C4, tmp, SBI_MPHY);
+	tmp = intel_sbi_read(dev_priv, 0x20C4, SBI_MPHY);
+	tmp |= (1 << 27);
+	intel_sbi_write(dev_priv, 0x20C4, tmp, SBI_MPHY);
 
-		tmp = intel_sbi_read(dev_priv, 0x21C4, SBI_MPHY);
-		tmp |= (1 << 27);
-		intel_sbi_write(dev_priv, 0x21C4, tmp, SBI_MPHY);
+	tmp = intel_sbi_read(dev_priv, 0x21C4, SBI_MPHY);
+	tmp |= (1 << 27);
+	intel_sbi_write(dev_priv, 0x21C4, tmp, SBI_MPHY);
 
-		tmp = intel_sbi_read(dev_priv, 0x20EC, SBI_MPHY);
-		tmp &= ~(0xF << 28);
-		tmp |= (4 << 28);
-		intel_sbi_write(dev_priv, 0x20EC, tmp, SBI_MPHY);
+	tmp = intel_sbi_read(dev_priv, 0x20EC, SBI_MPHY);
+	tmp &= ~(0xF << 28);
+	tmp |= (4 << 28);
+	intel_sbi_write(dev_priv, 0x20EC, tmp, SBI_MPHY);
 
-		tmp = intel_sbi_read(dev_priv, 0x21EC, SBI_MPHY);
-		tmp &= ~(0xF << 28);
-		tmp |= (4 << 28);
-		intel_sbi_write(dev_priv, 0x21EC, tmp, SBI_MPHY);
+	tmp = intel_sbi_read(dev_priv, 0x21EC, SBI_MPHY);
+	tmp &= ~(0xF << 28);
+	tmp |= (4 << 28);
+	intel_sbi_write(dev_priv, 0x21EC, tmp, SBI_MPHY);
+}
+
+/* Implements 3 different sequences from BSpec chapter "Display iCLK
+ * Programming" based on the parameters passed:
+ * - Sequence to enable CLKOUT_DP
+ * - Sequence to enable CLKOUT_DP without spread
+ * - Sequence to enable CLKOUT_DP for FDI usage and configure PCH FDI I/O
+ */
+static void lpt_enable_clkout_dp(struct drm_device *dev, bool with_spread,
+				 bool with_fdi)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t reg, tmp;
+
+	if (WARN(with_fdi && !with_spread, "FDI requires downspread\n"))
+		with_spread = true;
+	if (WARN(dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE &&
+		 with_fdi, "LP PCH doesn't have FDI\n"))
+		with_fdi = false;
+
+	mutex_lock(&dev_priv->dpio_lock);
+
+	tmp = intel_sbi_read(dev_priv, SBI_SSCCTL, SBI_ICLK);
+	tmp &= ~SBI_SSCCTL_DISABLE;
+	tmp |= SBI_SSCCTL_PATHALT;
+	intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK);
+
+	udelay(24);
+
+	if (with_spread) {
+		tmp = intel_sbi_read(dev_priv, SBI_SSCCTL, SBI_ICLK);
+		tmp &= ~SBI_SSCCTL_PATHALT;
+		intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK);
+
+		if (with_fdi) {
+			lpt_reset_fdi_mphy(dev_priv);
+			lpt_program_fdi_mphy(dev_priv);
+		}
 	}
 
-	/* ULT uses SBI_GEN0, but ULT doesn't have VGA, so we don't care. */
-	tmp = intel_sbi_read(dev_priv, SBI_DBUFF0, SBI_ICLK);
-	tmp |= SBI_DBUFF0_ENABLE;
-	intel_sbi_write(dev_priv, SBI_DBUFF0, tmp, SBI_ICLK);
+	reg = (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) ?
+	       SBI_GEN0 : SBI_DBUFF0;
+	tmp = intel_sbi_read(dev_priv, reg, SBI_ICLK);
+	tmp |= SBI_GEN0_CFG_BUFFENABLE_DISABLE;
+	intel_sbi_write(dev_priv, reg, tmp, SBI_ICLK);
 
 	mutex_unlock(&dev_priv->dpio_lock);
 }
 
+/* Sequence to disable CLKOUT_DP */
+static void lpt_disable_clkout_dp(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t reg, tmp;
+
+	mutex_lock(&dev_priv->dpio_lock);
+
+	reg = (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) ?
+	       SBI_GEN0 : SBI_DBUFF0;
+	tmp = intel_sbi_read(dev_priv, reg, SBI_ICLK);
+	tmp &= ~SBI_GEN0_CFG_BUFFENABLE_DISABLE;
+	intel_sbi_write(dev_priv, reg, tmp, SBI_ICLK);
+
+	tmp = intel_sbi_read(dev_priv, SBI_SSCCTL, SBI_ICLK);
+	if (!(tmp & SBI_SSCCTL_DISABLE)) {
+		if (!(tmp & SBI_SSCCTL_PATHALT)) {
+			tmp |= SBI_SSCCTL_PATHALT;
+			intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK);
+			udelay(32);
+		}
+		tmp |= SBI_SSCCTL_DISABLE;
+		intel_sbi_write(dev_priv, SBI_SSCCTL, tmp, SBI_ICLK);
+	}
+
+	mutex_unlock(&dev_priv->dpio_lock);
+}
+
+static void lpt_init_pch_refclk(struct drm_device *dev)
+{
+	struct drm_mode_config *mode_config = &dev->mode_config;
+	struct intel_encoder *encoder;
+	bool has_vga = false;
+
+	list_for_each_entry(encoder, &mode_config->encoder_list, base.head) {
+		switch (encoder->type) {
+		case INTEL_OUTPUT_ANALOG:
+			has_vga = true;
+			break;
+		}
+	}
+
+	if (has_vga)
+		lpt_enable_clkout_dp(dev, true, true);
+	else
+		lpt_disable_clkout_dp(dev);
+}
+
 /*
  * Initialize reference clocks when the driver loads
  */
@@ -5613,9 +5688,9 @@
 		<< PLL_REF_SDVO_HDMI_MULTIPLIER_SHIFT;
 
 	if (is_sdvo)
-		dpll |= DPLL_DVO_HIGH_SPEED;
+		dpll |= DPLL_SDVO_HIGH_SPEED;
 	if (intel_crtc->config.has_dp_encoder)
-		dpll |= DPLL_DVO_HIGH_SPEED;
+		dpll |= DPLL_SDVO_HIGH_SPEED;
 
 	/* compute bitmask from p1 value */
 	dpll |= (1 << (intel_crtc->config.dpll.p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT;
@@ -5711,7 +5786,7 @@
 		else
 			intel_crtc->config.dpll_hw_state.fp1 = fp;
 
-		pll = intel_get_shared_dpll(intel_crtc, dpll, fp);
+		pll = intel_get_shared_dpll(intel_crtc);
 		if (pll == NULL) {
 			DRM_DEBUG_DRIVER("failed to find PLL for pipe %c\n",
 					 pipe_name(pipe));
@@ -5723,10 +5798,6 @@
 	if (intel_crtc->config.has_dp_encoder)
 		intel_dp_set_m_n(intel_crtc);
 
-	for_each_encoder_on_crtc(dev, crtc, encoder)
-		if (encoder->pre_pll_enable)
-			encoder->pre_pll_enable(encoder);
-
 	if (is_lvds && has_reduced_clock && i915_powersave)
 		intel_crtc->lowfreq_avail = true;
 	else
@@ -5735,23 +5806,6 @@
 	if (intel_crtc->config.has_pch_encoder) {
 		pll = intel_crtc_to_shared_dpll(intel_crtc);
 
-		I915_WRITE(PCH_DPLL(pll->id), dpll);
-
-		/* Wait for the clocks to stabilize. */
-		POSTING_READ(PCH_DPLL(pll->id));
-		udelay(150);
-
-		/* The pixel multiplier can only be updated once the
-		 * DPLL is enabled and the clocks are stable.
-		 *
-		 * So write it again.
-		 */
-		I915_WRITE(PCH_DPLL(pll->id), dpll);
-
-		if (has_reduced_clock)
-			I915_WRITE(PCH_FP1(pll->id), fp2);
-		else
-			I915_WRITE(PCH_FP1(pll->id), fp);
 	}
 
 	intel_set_pipe_timings(intel_crtc);
@@ -5823,7 +5877,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t tmp;
 
-	pipe_config->cpu_transcoder = crtc->pipe;
+	pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe;
 	pipe_config->shared_dpll = DPLL_ID_PRIVATE;
 
 	tmp = I915_READ(PIPECONF(crtc->pipe));
@@ -5841,12 +5895,9 @@
 
 		ironlake_get_fdi_m_n_config(crtc, pipe_config);
 
-		/* XXX: Can't properly read out the pch dpll pixel multiplier
-		 * since we don't have state tracking for pch clocks yet. */
-		pipe_config->pixel_multiplier = 1;
-
 		if (HAS_PCH_IBX(dev_priv->dev)) {
-			pipe_config->shared_dpll = crtc->pipe;
+			pipe_config->shared_dpll =
+				(enum intel_dpll_id) crtc->pipe;
 		} else {
 			tmp = I915_READ(PCH_DPLL_SEL);
 			if (tmp & TRANS_DPLLB_SEL(crtc->pipe))
@@ -5859,6 +5910,11 @@
 
 		WARN_ON(!pll->get_hw_state(dev_priv, pll,
 					   &pipe_config->dpll_hw_state));
+
+		tmp = pipe_config->dpll_hw_state.dpll;
+		pipe_config->pixel_multiplier =
+			((tmp & PLL_REF_SDVO_HDMI_MULTIPLIER_MASK)
+			 >> PLL_REF_SDVO_HDMI_MULTIPLIER_SHIFT) + 1;
 	} else {
 		pipe_config->pixel_multiplier = 1;
 	}
@@ -5870,6 +5926,305 @@
 	return true;
 }
 
+static void assert_can_disable_lcpll(struct drm_i915_private *dev_priv)
+{
+	struct drm_device *dev = dev_priv->dev;
+	struct intel_ddi_plls *plls = &dev_priv->ddi_plls;
+	struct intel_crtc *crtc;
+	unsigned long irqflags;
+	uint32_t val;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head)
+		WARN(crtc->base.enabled, "CRTC for pipe %c enabled\n",
+		     pipe_name(crtc->pipe));
+
+	WARN(I915_READ(HSW_PWR_WELL_DRIVER), "Power well on\n");
+	WARN(plls->spll_refcount, "SPLL enabled\n");
+	WARN(plls->wrpll1_refcount, "WRPLL1 enabled\n");
+	WARN(plls->wrpll2_refcount, "WRPLL2 enabled\n");
+	WARN(I915_READ(PCH_PP_STATUS) & PP_ON, "Panel power on\n");
+	WARN(I915_READ(BLC_PWM_CPU_CTL2) & BLM_PWM_ENABLE,
+	     "CPU PWM1 enabled\n");
+	WARN(I915_READ(HSW_BLC_PWM2_CTL) & BLM_PWM_ENABLE,
+	     "CPU PWM2 enabled\n");
+	WARN(I915_READ(BLC_PWM_PCH_CTL1) & BLM_PCH_PWM_ENABLE,
+	     "PCH PWM1 enabled\n");
+	WARN(I915_READ(UTIL_PIN_CTL) & UTIL_PIN_ENABLE,
+	     "Utility pin enabled\n");
+	WARN(I915_READ(PCH_GTC_CTL) & PCH_GTC_ENABLE, "PCH GTC enabled\n");
+
+	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+	val = I915_READ(DEIMR);
+	WARN((val & ~DE_PCH_EVENT_IVB) != val,
+	     "Unexpected DEIMR bits enabled: 0x%x\n", val);
+	val = I915_READ(SDEIMR);
+	WARN((val | SDE_HOTPLUG_MASK_CPT) != 0xffffffff,
+	     "Unexpected SDEIMR bits enabled: 0x%x\n", val);
+	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+}
+
+/*
+ * This function implements pieces of two sequences from BSpec:
+ * - Sequence for display software to disable LCPLL
+ * - Sequence for display software to allow package C8+
+ * The steps implemented here are just the steps that actually touch the LCPLL
+ * register. Callers should take care of disabling all the display engine
+ * functions, doing the mode unset, fixing interrupts, etc.
+ */
+void hsw_disable_lcpll(struct drm_i915_private *dev_priv,
+		       bool switch_to_fclk, bool allow_power_down)
+{
+	uint32_t val;
+
+	assert_can_disable_lcpll(dev_priv);
+
+	val = I915_READ(LCPLL_CTL);
+
+	if (switch_to_fclk) {
+		val |= LCPLL_CD_SOURCE_FCLK;
+		I915_WRITE(LCPLL_CTL, val);
+
+		if (wait_for_atomic_us(I915_READ(LCPLL_CTL) &
+				       LCPLL_CD_SOURCE_FCLK_DONE, 1))
+			DRM_ERROR("Switching to FCLK failed\n");
+
+		val = I915_READ(LCPLL_CTL);
+	}
+
+	val |= LCPLL_PLL_DISABLE;
+	I915_WRITE(LCPLL_CTL, val);
+	POSTING_READ(LCPLL_CTL);
+
+	if (wait_for((I915_READ(LCPLL_CTL) & LCPLL_PLL_LOCK) == 0, 1))
+		DRM_ERROR("LCPLL still locked\n");
+
+	val = I915_READ(D_COMP);
+	val |= D_COMP_COMP_DISABLE;
+	I915_WRITE(D_COMP, val);
+	POSTING_READ(D_COMP);
+	ndelay(100);
+
+	if (wait_for((I915_READ(D_COMP) & D_COMP_RCOMP_IN_PROGRESS) == 0, 1))
+		DRM_ERROR("D_COMP RCOMP still in progress\n");
+
+	if (allow_power_down) {
+		val = I915_READ(LCPLL_CTL);
+		val |= LCPLL_POWER_DOWN_ALLOW;
+		I915_WRITE(LCPLL_CTL, val);
+		POSTING_READ(LCPLL_CTL);
+	}
+}
+
+/*
+ * Fully restores LCPLL, disallowing power down and switching back to LCPLL
+ * source.
+ */
+void hsw_restore_lcpll(struct drm_i915_private *dev_priv)
+{
+	uint32_t val;
+
+	val = I915_READ(LCPLL_CTL);
+
+	if ((val & (LCPLL_PLL_LOCK | LCPLL_PLL_DISABLE | LCPLL_CD_SOURCE_FCLK |
+		    LCPLL_POWER_DOWN_ALLOW)) == LCPLL_PLL_LOCK)
+		return;
+
+	/* Make sure we're not on PC8 state before disabling PC8, otherwise
+	 * we'll hang the machine! */
+	dev_priv->uncore.funcs.force_wake_get(dev_priv);
+
+	if (val & LCPLL_POWER_DOWN_ALLOW) {
+		val &= ~LCPLL_POWER_DOWN_ALLOW;
+		I915_WRITE(LCPLL_CTL, val);
+		POSTING_READ(LCPLL_CTL);
+	}
+
+	val = I915_READ(D_COMP);
+	val |= D_COMP_COMP_FORCE;
+	val &= ~D_COMP_COMP_DISABLE;
+	I915_WRITE(D_COMP, val);
+	POSTING_READ(D_COMP);
+
+	val = I915_READ(LCPLL_CTL);
+	val &= ~LCPLL_PLL_DISABLE;
+	I915_WRITE(LCPLL_CTL, val);
+
+	if (wait_for(I915_READ(LCPLL_CTL) & LCPLL_PLL_LOCK, 5))
+		DRM_ERROR("LCPLL not locked yet\n");
+
+	if (val & LCPLL_CD_SOURCE_FCLK) {
+		val = I915_READ(LCPLL_CTL);
+		val &= ~LCPLL_CD_SOURCE_FCLK;
+		I915_WRITE(LCPLL_CTL, val);
+
+		if (wait_for_atomic_us((I915_READ(LCPLL_CTL) &
+					LCPLL_CD_SOURCE_FCLK_DONE) == 0, 1))
+			DRM_ERROR("Switching back to LCPLL failed\n");
+	}
+
+	dev_priv->uncore.funcs.force_wake_put(dev_priv);
+}
+
+void hsw_enable_pc8_work(struct work_struct *__work)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(to_delayed_work(__work), struct drm_i915_private,
+			     pc8.enable_work);
+	struct drm_device *dev = dev_priv->dev;
+	uint32_t val;
+
+	if (dev_priv->pc8.enabled)
+		return;
+
+	DRM_DEBUG_KMS("Enabling package C8+\n");
+
+	dev_priv->pc8.enabled = true;
+
+	if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) {
+		val = I915_READ(SOUTH_DSPCLK_GATE_D);
+		val &= ~PCH_LP_PARTITION_LEVEL_DISABLE;
+		I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
+	}
+
+	lpt_disable_clkout_dp(dev);
+	hsw_pc8_disable_interrupts(dev);
+	hsw_disable_lcpll(dev_priv, true, true);
+}
+
+static void __hsw_enable_package_c8(struct drm_i915_private *dev_priv)
+{
+	WARN_ON(!mutex_is_locked(&dev_priv->pc8.lock));
+	WARN(dev_priv->pc8.disable_count < 1,
+	     "pc8.disable_count: %d\n", dev_priv->pc8.disable_count);
+
+	dev_priv->pc8.disable_count--;
+	if (dev_priv->pc8.disable_count != 0)
+		return;
+
+	schedule_delayed_work(&dev_priv->pc8.enable_work,
+			      msecs_to_jiffies(i915_pc8_timeout));
+}
+
+static void __hsw_disable_package_c8(struct drm_i915_private *dev_priv)
+{
+	struct drm_device *dev = dev_priv->dev;
+	uint32_t val;
+
+	WARN_ON(!mutex_is_locked(&dev_priv->pc8.lock));
+	WARN(dev_priv->pc8.disable_count < 0,
+	     "pc8.disable_count: %d\n", dev_priv->pc8.disable_count);
+
+	dev_priv->pc8.disable_count++;
+	if (dev_priv->pc8.disable_count != 1)
+		return;
+
+	cancel_delayed_work_sync(&dev_priv->pc8.enable_work);
+	if (!dev_priv->pc8.enabled)
+		return;
+
+	DRM_DEBUG_KMS("Disabling package C8+\n");
+
+	hsw_restore_lcpll(dev_priv);
+	hsw_pc8_restore_interrupts(dev);
+	lpt_init_pch_refclk(dev);
+
+	if (dev_priv->pch_id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) {
+		val = I915_READ(SOUTH_DSPCLK_GATE_D);
+		val |= PCH_LP_PARTITION_LEVEL_DISABLE;
+		I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
+	}
+
+	intel_prepare_ddi(dev);
+	i915_gem_init_swizzling(dev);
+	mutex_lock(&dev_priv->rps.hw_lock);
+	gen6_update_ring_freq(dev);
+	mutex_unlock(&dev_priv->rps.hw_lock);
+	dev_priv->pc8.enabled = false;
+}
+
+void hsw_enable_package_c8(struct drm_i915_private *dev_priv)
+{
+	mutex_lock(&dev_priv->pc8.lock);
+	__hsw_enable_package_c8(dev_priv);
+	mutex_unlock(&dev_priv->pc8.lock);
+}
+
+void hsw_disable_package_c8(struct drm_i915_private *dev_priv)
+{
+	mutex_lock(&dev_priv->pc8.lock);
+	__hsw_disable_package_c8(dev_priv);
+	mutex_unlock(&dev_priv->pc8.lock);
+}
+
+static bool hsw_can_enable_package_c8(struct drm_i915_private *dev_priv)
+{
+	struct drm_device *dev = dev_priv->dev;
+	struct intel_crtc *crtc;
+	uint32_t val;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head)
+		if (crtc->base.enabled)
+			return false;
+
+	/* This case is still possible since we have the i915.disable_power_well
+	 * parameter and also the KVMr or something else might be requesting the
+	 * power well. */
+	val = I915_READ(HSW_PWR_WELL_DRIVER);
+	if (val != 0) {
+		DRM_DEBUG_KMS("Not enabling PC8: power well on\n");
+		return false;
+	}
+
+	return true;
+}
+
+/* Since we're called from modeset_global_resources there's no way to
+ * symmetrically increase and decrease the refcount, so we use
+ * dev_priv->pc8.requirements_met to track whether we already have the refcount
+ * or not.
+ */
+static void hsw_update_package_c8(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	bool allow;
+
+	if (!i915_enable_pc8)
+		return;
+
+	mutex_lock(&dev_priv->pc8.lock);
+
+	allow = hsw_can_enable_package_c8(dev_priv);
+
+	if (allow == dev_priv->pc8.requirements_met)
+		goto done;
+
+	dev_priv->pc8.requirements_met = allow;
+
+	if (allow)
+		__hsw_enable_package_c8(dev_priv);
+	else
+		__hsw_disable_package_c8(dev_priv);
+
+done:
+	mutex_unlock(&dev_priv->pc8.lock);
+}
+
+static void hsw_package_c8_gpu_idle(struct drm_i915_private *dev_priv)
+{
+	if (!dev_priv->pc8.gpu_idle) {
+		dev_priv->pc8.gpu_idle = true;
+		hsw_enable_package_c8(dev_priv);
+	}
+}
+
+static void hsw_package_c8_gpu_busy(struct drm_i915_private *dev_priv)
+{
+	if (dev_priv->pc8.gpu_idle) {
+		dev_priv->pc8.gpu_idle = false;
+		hsw_disable_package_c8(dev_priv);
+	}
+}
+
 static void haswell_modeset_global_resources(struct drm_device *dev)
 {
 	bool enable = false;
@@ -5885,6 +6240,8 @@
 	}
 
 	intel_set_power_well(dev, enable);
+
+	hsw_update_package_c8(dev);
 }
 
 static int haswell_crtc_mode_set(struct drm_crtc *crtc,
@@ -5938,7 +6295,7 @@
 	enum intel_display_power_domain pfit_domain;
 	uint32_t tmp;
 
-	pipe_config->cpu_transcoder = crtc->pipe;
+	pipe_config->cpu_transcoder = (enum transcoder) crtc->pipe;
 	pipe_config->shared_dpll = DPLL_ID_PRIVATE;
 
 	tmp = I915_READ(TRANS_DDI_FUNC_CTL(TRANSCODER_EDP));
@@ -6008,11 +6365,8 @@
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_encoder_helper_funcs *encoder_funcs;
 	struct intel_encoder *encoder;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct drm_display_mode *adjusted_mode =
-		&intel_crtc->config.adjusted_mode;
 	struct drm_display_mode *mode = &intel_crtc->config.requested_mode;
 	int pipe = intel_crtc->pipe;
 	int ret;
@@ -6031,12 +6385,7 @@
 			encoder->base.base.id,
 			drm_get_encoder_name(&encoder->base),
 			mode->base.id, mode->name);
-		if (encoder->mode_set) {
-			encoder->mode_set(encoder);
-		} else {
-			encoder_funcs = encoder->base.helper_private;
-			encoder_funcs->mode_set(&encoder->base, mode, adjusted_mode);
-		}
+		encoder->mode_set(encoder);
 	}
 
 	return 0;
@@ -6551,7 +6900,7 @@
 			goto fail_unpin;
 		}
 
-		addr = obj->gtt_offset;
+		addr = i915_gem_obj_ggtt_offset(obj);
 	} else {
 		int align = IS_I830(dev) ? 16 * 1024 : 256;
 		ret = i915_gem_attach_phys_object(dev, obj,
@@ -6573,7 +6922,7 @@
 			if (intel_crtc->cursor_bo != obj)
 				i915_gem_detach_phys_object(dev, intel_crtc->cursor_bo);
 		} else
-			i915_gem_object_unpin(intel_crtc->cursor_bo);
+			i915_gem_object_unpin_from_display_plane(intel_crtc->cursor_bo);
 		drm_gem_object_unreference(&intel_crtc->cursor_bo->base);
 	}
 
@@ -6588,7 +6937,7 @@
 
 	return 0;
 fail_unpin:
-	i915_gem_object_unpin(obj);
+	i915_gem_object_unpin_from_display_plane(obj);
 fail_locked:
 	mutex_unlock(&dev->struct_mutex);
 fail:
@@ -6878,11 +7227,12 @@
 }
 
 /* Returns the clock of the currently programmed mode of the given pipe. */
-static int intel_crtc_clock_get(struct drm_device *dev, struct drm_crtc *crtc)
+static void i9xx_crtc_clock_get(struct intel_crtc *crtc,
+				struct intel_crtc_config *pipe_config)
 {
+	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int pipe = intel_crtc->pipe;
+	int pipe = pipe_config->cpu_transcoder;
 	u32 dpll = I915_READ(DPLL(pipe));
 	u32 fp;
 	intel_clock_t clock;
@@ -6921,7 +7271,8 @@
 		default:
 			DRM_DEBUG_KMS("Unknown DPLL mode %08x in programmed "
 				  "mode\n", (int)(dpll & DPLL_MODE_MASK));
-			return 0;
+			pipe_config->adjusted_mode.clock = 0;
+			return;
 		}
 
 		if (IS_PINEVIEW(dev))
@@ -6958,12 +7309,55 @@
 		}
 	}
 
-	/* XXX: It would be nice to validate the clocks, but we can't reuse
-	 * i830PllIsValid() because it relies on the xf86_config connector
-	 * configuration being accurate, which it isn't necessarily.
+	pipe_config->adjusted_mode.clock = clock.dot *
+		pipe_config->pixel_multiplier;
+}
+
+static void ironlake_crtc_clock_get(struct intel_crtc *crtc,
+				    struct intel_crtc_config *pipe_config)
+{
+	struct drm_device *dev = crtc->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	enum transcoder cpu_transcoder = pipe_config->cpu_transcoder;
+	int link_freq, repeat;
+	u64 clock;
+	u32 link_m, link_n;
+
+	repeat = pipe_config->pixel_multiplier;
+
+	/*
+	 * The calculation for the data clock is:
+	 * pixel_clock = ((m/n)*(link_clock * nr_lanes * repeat))/bpp
+	 * But we want to avoid losing precison if possible, so:
+	 * pixel_clock = ((m * link_clock * nr_lanes * repeat)/(n*bpp))
+	 *
+	 * and the link clock is simpler:
+	 * link_clock = (m * link_clock * repeat) / n
 	 */
 
-	return clock.dot;
+	/*
+	 * We need to get the FDI or DP link clock here to derive
+	 * the M/N dividers.
+	 *
+	 * For FDI, we read it from the BIOS or use a fixed 2.7GHz.
+	 * For DP, it's either 1.62GHz or 2.7GHz.
+	 * We do our calculations in 10*MHz since we don't need much precison.
+	 */
+	if (pipe_config->has_pch_encoder)
+		link_freq = intel_fdi_link_freq(dev) * 10000;
+	else
+		link_freq = pipe_config->port_clock;
+
+	link_m = I915_READ(PIPE_LINK_M1(cpu_transcoder));
+	link_n = I915_READ(PIPE_LINK_N1(cpu_transcoder));
+
+	if (!link_m || !link_n)
+		return;
+
+	clock = ((u64)link_m * (u64)link_freq * (u64)repeat);
+	do_div(clock, link_n);
+
+	pipe_config->adjusted_mode.clock = clock;
 }
 
 /** Returns the currently programmed mode of the given pipe. */
@@ -6974,6 +7368,7 @@
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	enum transcoder cpu_transcoder = intel_crtc->config.cpu_transcoder;
 	struct drm_display_mode *mode;
+	struct intel_crtc_config pipe_config;
 	int htot = I915_READ(HTOTAL(cpu_transcoder));
 	int hsync = I915_READ(HSYNC(cpu_transcoder));
 	int vtot = I915_READ(VTOTAL(cpu_transcoder));
@@ -6983,7 +7378,18 @@
 	if (!mode)
 		return NULL;
 
-	mode->clock = intel_crtc_clock_get(dev, crtc);
+	/*
+	 * Construct a pipe_config sufficient for getting the clock info
+	 * back out of crtc_clock_get.
+	 *
+	 * Note, if LVDS ever uses a non-1 pixel multiplier, we'll need
+	 * to use a real value here instead.
+	 */
+	pipe_config.cpu_transcoder = (enum transcoder) intel_crtc->pipe;
+	pipe_config.pixel_multiplier = 1;
+	i9xx_crtc_clock_get(intel_crtc, &pipe_config);
+
+	mode->clock = pipe_config.adjusted_mode.clock;
 	mode->hdisplay = (htot & 0xffff) + 1;
 	mode->htotal = ((htot & 0xffff0000) >> 16) + 1;
 	mode->hsync_start = (hsync & 0xffff) + 1;
@@ -7067,13 +7473,19 @@
 
 void intel_mark_busy(struct drm_device *dev)
 {
-	i915_update_gfx_val(dev->dev_private);
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	hsw_package_c8_gpu_busy(dev_priv);
+	i915_update_gfx_val(dev_priv);
 }
 
 void intel_mark_idle(struct drm_device *dev)
 {
+	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_crtc *crtc;
 
+	hsw_package_c8_gpu_idle(dev_priv);
+
 	if (!i915_powersave)
 		return;
 
@@ -7238,7 +7650,8 @@
 static int intel_gen2_queue_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
-				 struct drm_i915_gem_object *obj)
+				 struct drm_i915_gem_object *obj,
+				 uint32_t flags)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -7266,7 +7679,7 @@
 	intel_ring_emit(ring, MI_DISPLAY_FLIP |
 			MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
 	intel_ring_emit(ring, fb->pitches[0]);
-	intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset);
+	intel_ring_emit(ring, i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
 	intel_ring_emit(ring, 0); /* aux display base address, unused */
 
 	intel_mark_page_flip_active(intel_crtc);
@@ -7282,7 +7695,8 @@
 static int intel_gen3_queue_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
-				 struct drm_i915_gem_object *obj)
+				 struct drm_i915_gem_object *obj,
+				 uint32_t flags)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -7307,7 +7721,7 @@
 	intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 |
 			MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
 	intel_ring_emit(ring, fb->pitches[0]);
-	intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset);
+	intel_ring_emit(ring, i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
 	intel_ring_emit(ring, MI_NOOP);
 
 	intel_mark_page_flip_active(intel_crtc);
@@ -7323,7 +7737,8 @@
 static int intel_gen4_queue_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
-				 struct drm_i915_gem_object *obj)
+				 struct drm_i915_gem_object *obj,
+				 uint32_t flags)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -7347,7 +7762,7 @@
 			MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
 	intel_ring_emit(ring, fb->pitches[0]);
 	intel_ring_emit(ring,
-			(obj->gtt_offset + intel_crtc->dspaddr_offset) |
+			(i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset) |
 			obj->tiling_mode);
 
 	/* XXX Enabling the panel-fitter across page-flip is so far
@@ -7371,7 +7786,8 @@
 static int intel_gen6_queue_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
-				 struct drm_i915_gem_object *obj)
+				 struct drm_i915_gem_object *obj,
+				 uint32_t flags)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -7390,7 +7806,7 @@
 	intel_ring_emit(ring, MI_DISPLAY_FLIP |
 			MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
 	intel_ring_emit(ring, fb->pitches[0] | obj->tiling_mode);
-	intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset);
+	intel_ring_emit(ring, i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
 
 	/* Contrary to the suggestions in the documentation,
 	 * "Enable Panel Fitter" does not seem to be required when page
@@ -7421,7 +7837,8 @@
 static int intel_gen7_queue_flip(struct drm_device *dev,
 				 struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
-				 struct drm_i915_gem_object *obj)
+				 struct drm_i915_gem_object *obj,
+				 uint32_t flags)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -7455,7 +7872,7 @@
 
 	intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit);
 	intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode));
-	intel_ring_emit(ring, obj->gtt_offset + intel_crtc->dspaddr_offset);
+	intel_ring_emit(ring, i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset);
 	intel_ring_emit(ring, (MI_NOOP));
 
 	intel_mark_page_flip_active(intel_crtc);
@@ -7471,14 +7888,16 @@
 static int intel_default_queue_flip(struct drm_device *dev,
 				    struct drm_crtc *crtc,
 				    struct drm_framebuffer *fb,
-				    struct drm_i915_gem_object *obj)
+				    struct drm_i915_gem_object *obj,
+				    uint32_t flags)
 {
 	return -ENODEV;
 }
 
 static int intel_crtc_page_flip(struct drm_crtc *crtc,
 				struct drm_framebuffer *fb,
-				struct drm_pending_vblank_event *event)
+				struct drm_pending_vblank_event *event,
+				uint32_t page_flip_flags)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -7548,7 +7967,7 @@
 	atomic_inc(&intel_crtc->unpin_work_count);
 	intel_crtc->reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
 
-	ret = dev_priv->display.queue_flip(dev, crtc, fb, obj);
+	ret = dev_priv->display.queue_flip(dev, crtc, fb, obj, page_flip_flags);
 	if (ret)
 		goto cleanup_pending;
 
@@ -7792,7 +8211,6 @@
 			  struct drm_display_mode *mode)
 {
 	struct drm_device *dev = crtc->dev;
-	struct drm_encoder_helper_funcs *encoder_funcs;
 	struct intel_encoder *encoder;
 	struct intel_crtc_config *pipe_config;
 	int plane_bpp, ret = -EINVAL;
@@ -7809,9 +8227,23 @@
 
 	drm_mode_copy(&pipe_config->adjusted_mode, mode);
 	drm_mode_copy(&pipe_config->requested_mode, mode);
-	pipe_config->cpu_transcoder = to_intel_crtc(crtc)->pipe;
+	pipe_config->cpu_transcoder =
+		(enum transcoder) to_intel_crtc(crtc)->pipe;
 	pipe_config->shared_dpll = DPLL_ID_PRIVATE;
 
+	/*
+	 * Sanitize sync polarity flags based on requested ones. If neither
+	 * positive or negative polarity is requested, treat this as meaning
+	 * negative polarity.
+	 */
+	if (!(pipe_config->adjusted_mode.flags &
+	      (DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NHSYNC)))
+		pipe_config->adjusted_mode.flags |= DRM_MODE_FLAG_NHSYNC;
+
+	if (!(pipe_config->adjusted_mode.flags &
+	      (DRM_MODE_FLAG_PVSYNC | DRM_MODE_FLAG_NVSYNC)))
+		pipe_config->adjusted_mode.flags |= DRM_MODE_FLAG_NVSYNC;
+
 	/* Compute a starting value for pipe_config->pipe_bpp taking the source
 	 * plane pixel format and any sink constraints into account. Returns the
 	 * source plane bpp so that dithering can be selected on mismatches
@@ -7826,6 +8258,9 @@
 	pipe_config->port_clock = 0;
 	pipe_config->pixel_multiplier = 1;
 
+	/* Fill in default crtc timings, allow encoders to overwrite them. */
+	drm_mode_set_crtcinfo(&pipe_config->adjusted_mode, 0);
+
 	/* Pass our mode to the connectors and the CRTC to give them a chance to
 	 * adjust it according to limitations or connector properties, and also
 	 * a chance to reject the mode entirely.
@@ -7836,20 +8271,8 @@
 		if (&encoder->new_crtc->base != crtc)
 			continue;
 
-		if (encoder->compute_config) {
-			if (!(encoder->compute_config(encoder, pipe_config))) {
-				DRM_DEBUG_KMS("Encoder config failure\n");
-				goto fail;
-			}
-
-			continue;
-		}
-
-		encoder_funcs = encoder->base.helper_private;
-		if (!(encoder_funcs->mode_fixup(&encoder->base,
-						&pipe_config->requested_mode,
-						&pipe_config->adjusted_mode))) {
-			DRM_DEBUG_KMS("Encoder fixup failed\n");
+		if (!(encoder->compute_config(encoder, pipe_config))) {
+			DRM_DEBUG_KMS("Encoder config failure\n");
 			goto fail;
 		}
 	}
@@ -8044,6 +8467,28 @@
 
 }
 
+static bool intel_fuzzy_clock_check(struct intel_crtc_config *cur,
+				    struct intel_crtc_config *new)
+{
+	int clock1, clock2, diff;
+
+	clock1 = cur->adjusted_mode.clock;
+	clock2 = new->adjusted_mode.clock;
+
+	if (clock1 == clock2)
+		return true;
+
+	if (!clock1 || !clock2)
+		return false;
+
+	diff = abs(clock1 - clock2);
+
+	if (((((diff + clock1 + clock2) * 100)) / (clock1 + clock2)) < 105)
+		return true;
+
+	return false;
+}
+
 #define for_each_intel_crtc_masked(dev, mask, intel_crtc) \
 	list_for_each_entry((intel_crtc), \
 			    &(dev)->mode_config.crtc_list, \
@@ -8075,7 +8520,7 @@
 
 #define PIPE_CONF_CHECK_FLAGS(name, mask)	\
 	if ((current_config->name ^ pipe_config->name) & (mask)) { \
-		DRM_ERROR("mismatch in " #name " " \
+		DRM_ERROR("mismatch in " #name "(" #mask ") "	   \
 			  "(expected %i, found %i)\n", \
 			  current_config->name & (mask), \
 			  pipe_config->name & (mask)); \
@@ -8109,8 +8554,7 @@
 	PIPE_CONF_CHECK_I(adjusted_mode.crtc_vsync_start);
 	PIPE_CONF_CHECK_I(adjusted_mode.crtc_vsync_end);
 
-	if (!HAS_PCH_SPLIT(dev))
-		PIPE_CONF_CHECK_I(pixel_multiplier);
+	PIPE_CONF_CHECK_I(pixel_multiplier);
 
 	PIPE_CONF_CHECK_FLAGS(adjusted_mode.flags,
 			      DRM_MODE_FLAG_INTERLACE);
@@ -8141,6 +8585,7 @@
 
 	PIPE_CONF_CHECK_I(shared_dpll);
 	PIPE_CONF_CHECK_X(dpll_hw_state.dpll);
+	PIPE_CONF_CHECK_X(dpll_hw_state.dpll_md);
 	PIPE_CONF_CHECK_X(dpll_hw_state.fp0);
 	PIPE_CONF_CHECK_X(dpll_hw_state.fp1);
 
@@ -8149,6 +8594,15 @@
 #undef PIPE_CONF_CHECK_FLAGS
 #undef PIPE_CONF_QUIRK
 
+	if (!IS_HASWELL(dev)) {
+		if (!intel_fuzzy_clock_check(current_config, pipe_config)) {
+			DRM_ERROR("mismatch in clock (expected %d, found %d)\n",
+				  current_config->adjusted_mode.clock,
+				  pipe_config->adjusted_mode.clock);
+			return false;
+		}
+	}
+
 	return true;
 }
 
@@ -8272,12 +8726,17 @@
 
 		list_for_each_entry(encoder, &dev->mode_config.encoder_list,
 				    base.head) {
+			enum pipe pipe;
 			if (encoder->base.crtc != &crtc->base)
 				continue;
-			if (encoder->get_config)
+			if (encoder->get_config &&
+			    encoder->get_hw_state(encoder, &pipe))
 				encoder->get_config(encoder, &pipe_config);
 		}
 
+		if (dev_priv->display.get_clock)
+			dev_priv->display.get_clock(crtc, &pipe_config);
+
 		WARN(crtc->active != active,
 		     "crtc active state doesn't match with hw state "
 		     "(expected %i, found %i)\n", crtc->active, active);
@@ -8317,6 +8776,8 @@
 		     pll->active, pll->refcount);
 		WARN(pll->active && !pll->on,
 		     "pll in active use but not on in sw tracking\n");
+		WARN(pll->on && !pll->active,
+		     "pll in on but not on in use in sw tracking\n");
 		WARN(pll->on != active,
 		     "pll on state mismatch (expected %i, found %i)\n",
 		     pll->on, active);
@@ -8453,9 +8914,9 @@
 	return ret;
 }
 
-int intel_set_mode(struct drm_crtc *crtc,
-		     struct drm_display_mode *mode,
-		     int x, int y, struct drm_framebuffer *fb)
+static int intel_set_mode(struct drm_crtc *crtc,
+			  struct drm_display_mode *mode,
+			  int x, int y, struct drm_framebuffer *fb)
 {
 	int ret;
 
@@ -8541,15 +9002,20 @@
 }
 
 static bool
-is_crtc_connector_off(struct drm_crtc *crtc, struct drm_connector *connectors,
-		      int num_connectors)
+is_crtc_connector_off(struct drm_mode_set *set)
 {
 	int i;
 
-	for (i = 0; i < num_connectors; i++)
-		if (connectors[i].encoder &&
-		    connectors[i].encoder->crtc == crtc &&
-		    connectors[i].dpms != DRM_MODE_DPMS_ON)
+	if (set->num_connectors == 0)
+		return false;
+
+	if (WARN_ON(set->connectors == NULL))
+		return false;
+
+	for (i = 0; i < set->num_connectors; i++)
+		if (set->connectors[i]->encoder &&
+		    set->connectors[i]->encoder->crtc == set->crtc &&
+		    set->connectors[i]->dpms != DRM_MODE_DPMS_ON)
 			return true;
 
 	return false;
@@ -8562,15 +9028,21 @@
 
 	/* We should be able to check here if the fb has the same properties
 	 * and then just flip_or_move it */
-	if (set->connectors != NULL &&
-	    is_crtc_connector_off(set->crtc, *set->connectors,
-				  set->num_connectors)) {
-			config->mode_changed = true;
+	if (is_crtc_connector_off(set)) {
+		config->mode_changed = true;
 	} else if (set->crtc->fb != set->fb) {
 		/* If we have no fb then treat it as a full mode set */
 		if (set->crtc->fb == NULL) {
-			DRM_DEBUG_KMS("crtc has no fb, full mode set\n");
-			config->mode_changed = true;
+			struct intel_crtc *intel_crtc =
+				to_intel_crtc(set->crtc);
+
+			if (intel_crtc->active && i915_fastboot) {
+				DRM_DEBUG_KMS("crtc has no fb, will flip\n");
+				config->fb_changed = true;
+			} else {
+				DRM_DEBUG_KMS("inactive crtc, full mode set\n");
+				config->mode_changed = true;
+			}
 		} else if (set->fb == NULL) {
 			config->mode_changed = true;
 		} else if (set->fb->pixel_format !=
@@ -8590,6 +9062,9 @@
 		drm_mode_debug_printmodeline(set->mode);
 		config->mode_changed = true;
 	}
+
+	DRM_DEBUG_KMS("computed changes for [CRTC:%d], mode_changed=%d, fb_changed=%d\n",
+			set->crtc->base.id, config->mode_changed, config->fb_changed);
 }
 
 static int
@@ -8600,14 +9075,13 @@
 	struct drm_crtc *new_crtc;
 	struct intel_connector *connector;
 	struct intel_encoder *encoder;
-	int count, ro;
+	int ro;
 
 	/* The upper layers ensure that we either disable a crtc or have a list
 	 * of connectors. For paranoia, double-check this. */
 	WARN_ON(!set->fb && (set->num_connectors != 0));
 	WARN_ON(set->fb && (set->num_connectors == 0));
 
-	count = 0;
 	list_for_each_entry(connector, &dev->mode_config.connector_list,
 			    base.head) {
 		/* Otherwise traverse passed in connector list and get encoders
@@ -8641,7 +9115,6 @@
 	/* connector->new_encoder is now updated for all connectors. */
 
 	/* Update crtc of enabled connectors. */
-	count = 0;
 	list_for_each_entry(connector, &dev->mode_config.connector_list,
 			    base.head) {
 		if (!connector->new_encoder)
@@ -8800,19 +9273,32 @@
 	return val & DPLL_VCO_ENABLE;
 }
 
+static void ibx_pch_dpll_mode_set(struct drm_i915_private *dev_priv,
+				  struct intel_shared_dpll *pll)
+{
+	I915_WRITE(PCH_FP0(pll->id), pll->hw_state.fp0);
+	I915_WRITE(PCH_FP1(pll->id), pll->hw_state.fp1);
+}
+
 static void ibx_pch_dpll_enable(struct drm_i915_private *dev_priv,
 				struct intel_shared_dpll *pll)
 {
-	uint32_t reg, val;
-
 	/* PCH refclock must be enabled first */
 	assert_pch_refclk_enabled(dev_priv);
 
-	reg = PCH_DPLL(pll->id);
-	val = I915_READ(reg);
-	val |= DPLL_VCO_ENABLE;
-	I915_WRITE(reg, val);
-	POSTING_READ(reg);
+	I915_WRITE(PCH_DPLL(pll->id), pll->hw_state.dpll);
+
+	/* Wait for the clocks to stabilize. */
+	POSTING_READ(PCH_DPLL(pll->id));
+	udelay(150);
+
+	/* The pixel multiplier can only be updated once the
+	 * DPLL is enabled and the clocks are stable.
+	 *
+	 * So write it again.
+	 */
+	I915_WRITE(PCH_DPLL(pll->id), pll->hw_state.dpll);
+	POSTING_READ(PCH_DPLL(pll->id));
 	udelay(200);
 }
 
@@ -8821,7 +9307,6 @@
 {
 	struct drm_device *dev = dev_priv->dev;
 	struct intel_crtc *crtc;
-	uint32_t reg, val;
 
 	/* Make sure no transcoder isn't still depending on us. */
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, base.head) {
@@ -8829,11 +9314,8 @@
 			assert_pch_transcoder_disabled(dev_priv, crtc->pipe);
 	}
 
-	reg = PCH_DPLL(pll->id);
-	val = I915_READ(reg);
-	val &= ~DPLL_VCO_ENABLE;
-	I915_WRITE(reg, val);
-	POSTING_READ(reg);
+	I915_WRITE(PCH_DPLL(pll->id), 0);
+	POSTING_READ(PCH_DPLL(pll->id));
 	udelay(200);
 }
 
@@ -8852,6 +9334,7 @@
 	for (i = 0; i < dev_priv->num_shared_dpll; i++) {
 		dev_priv->shared_dplls[i].id = i;
 		dev_priv->shared_dplls[i].name = ibx_pch_dpll_names[i];
+		dev_priv->shared_dplls[i].mode_set = ibx_pch_dpll_mode_set;
 		dev_priv->shared_dplls[i].enable = ibx_pch_dpll_enable;
 		dev_priv->shared_dplls[i].disable = ibx_pch_dpll_disable;
 		dev_priv->shared_dplls[i].get_hw_state =
@@ -9031,8 +9514,13 @@
 			intel_dp_init(dev, PCH_DP_D, PORT_D);
 	} else if (IS_VALLEYVIEW(dev)) {
 		/* Check for built-in panel first. Shares lanes with HDMI on SDVOC */
-		if (I915_READ(VLV_DISPLAY_BASE + DP_C) & DP_DETECTED)
-			intel_dp_init(dev, VLV_DISPLAY_BASE + DP_C, PORT_C);
+		if (I915_READ(VLV_DISPLAY_BASE + GEN4_HDMIC) & SDVO_DETECTED) {
+			intel_hdmi_init(dev, VLV_DISPLAY_BASE + GEN4_HDMIC,
+					PORT_C);
+			if (I915_READ(VLV_DISPLAY_BASE + DP_C) & DP_DETECTED)
+				intel_dp_init(dev, VLV_DISPLAY_BASE + DP_C,
+					      PORT_C);
+		}
 
 		if (I915_READ(VLV_DISPLAY_BASE + GEN4_HDMIB) & SDVO_DETECTED) {
 			intel_hdmi_init(dev, VLV_DISPLAY_BASE + GEN4_HDMIB,
@@ -9092,13 +9580,17 @@
 	drm_helper_move_panel_connectors_to_head(dev);
 }
 
+void intel_framebuffer_fini(struct intel_framebuffer *fb)
+{
+	drm_framebuffer_cleanup(&fb->base);
+	drm_gem_object_unreference_unlocked(&fb->obj->base);
+}
+
 static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb)
 {
 	struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb);
 
-	drm_framebuffer_cleanup(fb);
-	drm_gem_object_unreference_unlocked(&intel_fb->obj->base);
-
+	intel_framebuffer_fini(intel_fb);
 	kfree(intel_fb);
 }
 
@@ -9268,6 +9760,7 @@
 		dev_priv->display.update_plane = ironlake_update_plane;
 	} else if (HAS_PCH_SPLIT(dev)) {
 		dev_priv->display.get_pipe_config = ironlake_get_pipe_config;
+		dev_priv->display.get_clock = ironlake_crtc_clock_get;
 		dev_priv->display.crtc_mode_set = ironlake_crtc_mode_set;
 		dev_priv->display.crtc_enable = ironlake_crtc_enable;
 		dev_priv->display.crtc_disable = ironlake_crtc_disable;
@@ -9275,6 +9768,7 @@
 		dev_priv->display.update_plane = ironlake_update_plane;
 	} else if (IS_VALLEYVIEW(dev)) {
 		dev_priv->display.get_pipe_config = i9xx_get_pipe_config;
+		dev_priv->display.get_clock = i9xx_crtc_clock_get;
 		dev_priv->display.crtc_mode_set = i9xx_crtc_mode_set;
 		dev_priv->display.crtc_enable = valleyview_crtc_enable;
 		dev_priv->display.crtc_disable = i9xx_crtc_disable;
@@ -9282,6 +9776,7 @@
 		dev_priv->display.update_plane = i9xx_update_plane;
 	} else {
 		dev_priv->display.get_pipe_config = i9xx_get_pipe_config;
+		dev_priv->display.get_clock = i9xx_crtc_clock_get;
 		dev_priv->display.crtc_mode_set = i9xx_crtc_mode_set;
 		dev_priv->display.crtc_enable = i9xx_crtc_enable;
 		dev_priv->display.crtc_disable = i9xx_crtc_disable;
@@ -9299,9 +9794,12 @@
 	else if (IS_I915G(dev))
 		dev_priv->display.get_display_clock_speed =
 			i915_get_display_clock_speed;
-	else if (IS_I945GM(dev) || IS_845G(dev) || IS_PINEVIEW_M(dev))
+	else if (IS_I945GM(dev) || IS_845G(dev))
 		dev_priv->display.get_display_clock_speed =
 			i9xx_misc_get_display_clock_speed;
+	else if (IS_PINEVIEW(dev))
+		dev_priv->display.get_display_clock_speed =
+			pnv_get_display_clock_speed;
 	else if (IS_I915GM(dev))
 		dev_priv->display.get_display_clock_speed =
 			i915gm_get_display_clock_speed;
@@ -9398,6 +9896,17 @@
 	DRM_INFO("applying inverted panel brightness quirk\n");
 }
 
+/*
+ * Some machines (Dell XPS13) suffer broken backlight controls if
+ * BLM_PCH_PWM_ENABLE is set.
+ */
+static void quirk_no_pcm_pwm_enable(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	dev_priv->quirks |= QUIRK_NO_PCH_PWM_ENABLE;
+	DRM_INFO("applying no-PCH_PWM_ENABLE quirk\n");
+}
+
 struct intel_quirk {
 	int device;
 	int subsystem_vendor;
@@ -9467,6 +9976,11 @@
 
 	/* Acer Aspire 4736Z */
 	{ 0x2a42, 0x1025, 0x0260, quirk_invert_brightness },
+
+	/* Dell XPS13 HD Sandy Bridge */
+	{ 0x0116, 0x1028, 0x052e, quirk_no_pcm_pwm_enable },
+	/* Dell XPS13 HD and XPS13 FHD Ivy Bridge */
+	{ 0x0166, 0x1028, 0x058b, quirk_no_pcm_pwm_enable },
 };
 
 static void intel_init_quirks(struct drm_device *dev)
@@ -9566,7 +10080,7 @@
 		      INTEL_INFO(dev)->num_pipes,
 		      INTEL_INFO(dev)->num_pipes > 1 ? "s" : "");
 
-	for (i = 0; i < INTEL_INFO(dev)->num_pipes; i++) {
+	for_each_pipe(i) {
 		intel_crtc_init(dev, i);
 		for (j = 0; j < dev_priv->num_plane; j++) {
 			ret = intel_plane_init(dev, i, j);
@@ -9772,6 +10286,17 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 vga_reg = i915_vgacntrl_reg(dev);
 
+	/* This function can be called both from intel_modeset_setup_hw_state or
+	 * at a very early point in our resume sequence, where the power well
+	 * structures are not yet restored. Since this function is at a very
+	 * paranoid "someone might have enabled VGA while we were not looking"
+	 * level, just check if the power well is enabled instead of trying to
+	 * follow the "don't touch the power well if we don't need it" policy
+	 * the rest of the driver uses. */
+	if (HAS_POWER_WELL(dev) &&
+	    (I915_READ(HSW_PWR_WELL_DRIVER) & HSW_PWR_WELL_STATE_ENABLED) == 0)
+		return;
+
 	if (I915_READ(vga_reg) != VGA_DISP_DISABLE) {
 		DRM_DEBUG_KMS("Something enabled VGA plane, disabling it\n");
 		i915_disable_vga(dev);
@@ -9817,8 +10342,8 @@
 		}
 		pll->refcount = pll->active;
 
-		DRM_DEBUG_KMS("%s hw state readout: refcount %i\n",
-			      pll->name, pll->refcount);
+		DRM_DEBUG_KMS("%s hw state readout: refcount %i, on %i\n",
+			      pll->name, pll->refcount, pll->on);
 	}
 
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list,
@@ -9842,6 +10367,15 @@
 			      pipe);
 	}
 
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list,
+			    base.head) {
+		if (!crtc->active)
+			continue;
+		if (dev_priv->display.get_clock)
+			dev_priv->display.get_clock(crtc,
+						    &crtc->config);
+	}
+
 	list_for_each_entry(connector, &dev->mode_config.connector_list,
 			    base.head) {
 		if (connector->get_hw_state(connector)) {
@@ -9869,9 +10403,26 @@
 	struct drm_plane *plane;
 	struct intel_crtc *crtc;
 	struct intel_encoder *encoder;
+	int i;
 
 	intel_modeset_readout_hw_state(dev);
 
+	/*
+	 * Now that we have the config, copy it to each CRTC struct
+	 * Note that this could go away if we move to using crtc_config
+	 * checking everywhere.
+	 */
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list,
+			    base.head) {
+		if (crtc->active && i915_fastboot) {
+			intel_crtc_mode_from_pipe_config(crtc, &crtc->config);
+
+			DRM_DEBUG_KMS("[CRTC:%d] found active mode: ",
+				      crtc->base.base.id);
+			drm_mode_debug_printmodeline(&crtc->base.mode);
+		}
+	}
+
 	/* HW state is read out, now we need to sanitize this mess. */
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list,
 			    base.head) {
@@ -9884,6 +10435,18 @@
 		intel_dump_pipe_config(crtc, &crtc->config, "[setup_hw_state]");
 	}
 
+	for (i = 0; i < dev_priv->num_shared_dpll; i++) {
+		struct intel_shared_dpll *pll = &dev_priv->shared_dplls[i];
+
+		if (!pll->on || pll->active)
+			continue;
+
+		DRM_DEBUG_KMS("%s enabled but not in use, disabling\n", pll->name);
+
+		pll->disable(dev_priv, pll);
+		pll->on = false;
+	}
+
 	if (force_restore) {
 		/*
 		 * We need to use raw interfaces for restoring state to avoid
@@ -9922,7 +10485,6 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_crtc *crtc;
-	struct intel_crtc *intel_crtc;
 
 	/*
 	 * Interrupts and polling as the first thing to avoid creating havoc.
@@ -9946,7 +10508,6 @@
 		if (!crtc->fb)
 			continue;
 
-		intel_crtc = to_intel_crtc(crtc);
 		intel_increase_pllclock(crtc);
 	}
 
@@ -10002,13 +10563,12 @@
 	return 0;
 }
 
-#ifdef CONFIG_DEBUG_FS
-#include <linux/seq_file.h>
-
 struct intel_display_error_state {
 
 	u32 power_well_driver;
 
+	int num_transcoders;
+
 	struct intel_cursor_error_state {
 		u32 control;
 		u32 position;
@@ -10017,16 +10577,7 @@
 	} cursor[I915_MAX_PIPES];
 
 	struct intel_pipe_error_state {
-		enum transcoder cpu_transcoder;
-		u32 conf;
 		u32 source;
-
-		u32 htotal;
-		u32 hblank;
-		u32 hsync;
-		u32 vtotal;
-		u32 vblank;
-		u32 vsync;
 	} pipe[I915_MAX_PIPES];
 
 	struct intel_plane_error_state {
@@ -10038,6 +10589,19 @@
 		u32 surface;
 		u32 tile_offset;
 	} plane[I915_MAX_PIPES];
+
+	struct intel_transcoder_error_state {
+		enum transcoder cpu_transcoder;
+
+		u32 conf;
+
+		u32 htotal;
+		u32 hblank;
+		u32 hsync;
+		u32 vtotal;
+		u32 vblank;
+		u32 vsync;
+	} transcoder[4];
 };
 
 struct intel_display_error_state *
@@ -10045,9 +10609,17 @@
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct intel_display_error_state *error;
-	enum transcoder cpu_transcoder;
+	int transcoders[] = {
+		TRANSCODER_A,
+		TRANSCODER_B,
+		TRANSCODER_C,
+		TRANSCODER_EDP,
+	};
 	int i;
 
+	if (INTEL_INFO(dev)->num_pipes == 0)
+		return NULL;
+
 	error = kmalloc(sizeof(*error), GFP_ATOMIC);
 	if (error == NULL)
 		return NULL;
@@ -10056,9 +10628,6 @@
 		error->power_well_driver = I915_READ(HSW_PWR_WELL_DRIVER);
 
 	for_each_pipe(i) {
-		cpu_transcoder = intel_pipe_to_cpu_transcoder(dev_priv, i);
-		error->pipe[i].cpu_transcoder = cpu_transcoder;
-
 		if (INTEL_INFO(dev)->gen <= 6 || IS_VALLEYVIEW(dev)) {
 			error->cursor[i].control = I915_READ(CURCNTR(i));
 			error->cursor[i].position = I915_READ(CURPOS(i));
@@ -10082,22 +10651,32 @@
 			error->plane[i].tile_offset = I915_READ(DSPTILEOFF(i));
 		}
 
-		error->pipe[i].conf = I915_READ(PIPECONF(cpu_transcoder));
 		error->pipe[i].source = I915_READ(PIPESRC(i));
-		error->pipe[i].htotal = I915_READ(HTOTAL(cpu_transcoder));
-		error->pipe[i].hblank = I915_READ(HBLANK(cpu_transcoder));
-		error->pipe[i].hsync = I915_READ(HSYNC(cpu_transcoder));
-		error->pipe[i].vtotal = I915_READ(VTOTAL(cpu_transcoder));
-		error->pipe[i].vblank = I915_READ(VBLANK(cpu_transcoder));
-		error->pipe[i].vsync = I915_READ(VSYNC(cpu_transcoder));
+	}
+
+	error->num_transcoders = INTEL_INFO(dev)->num_pipes;
+	if (HAS_DDI(dev_priv->dev))
+		error->num_transcoders++; /* Account for eDP. */
+
+	for (i = 0; i < error->num_transcoders; i++) {
+		enum transcoder cpu_transcoder = transcoders[i];
+
+		error->transcoder[i].cpu_transcoder = cpu_transcoder;
+
+		error->transcoder[i].conf = I915_READ(PIPECONF(cpu_transcoder));
+		error->transcoder[i].htotal = I915_READ(HTOTAL(cpu_transcoder));
+		error->transcoder[i].hblank = I915_READ(HBLANK(cpu_transcoder));
+		error->transcoder[i].hsync = I915_READ(HSYNC(cpu_transcoder));
+		error->transcoder[i].vtotal = I915_READ(VTOTAL(cpu_transcoder));
+		error->transcoder[i].vblank = I915_READ(VBLANK(cpu_transcoder));
+		error->transcoder[i].vsync = I915_READ(VSYNC(cpu_transcoder));
 	}
 
 	/* In the code above we read the registers without checking if the power
 	 * well was on, so here we have to clear the FPGA_DBG_RM_NOCLAIM bit to
 	 * prevent the next I915_WRITE from detecting it and printing an error
 	 * message. */
-	if (HAS_POWER_WELL(dev))
-		I915_WRITE_NOTRACE(FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
+	intel_uncore_clear_errors(dev);
 
 	return error;
 }
@@ -10111,22 +10690,16 @@
 {
 	int i;
 
+	if (!error)
+		return;
+
 	err_printf(m, "Num Pipes: %d\n", INTEL_INFO(dev)->num_pipes);
 	if (HAS_POWER_WELL(dev))
 		err_printf(m, "PWR_WELL_CTL2: %08x\n",
 			   error->power_well_driver);
 	for_each_pipe(i) {
 		err_printf(m, "Pipe [%d]:\n", i);
-		err_printf(m, "  CPU transcoder: %c\n",
-			   transcoder_name(error->pipe[i].cpu_transcoder));
-		err_printf(m, "  CONF: %08x\n", error->pipe[i].conf);
 		err_printf(m, "  SRC: %08x\n", error->pipe[i].source);
-		err_printf(m, "  HTOTAL: %08x\n", error->pipe[i].htotal);
-		err_printf(m, "  HBLANK: %08x\n", error->pipe[i].hblank);
-		err_printf(m, "  HSYNC: %08x\n", error->pipe[i].hsync);
-		err_printf(m, "  VTOTAL: %08x\n", error->pipe[i].vtotal);
-		err_printf(m, "  VBLANK: %08x\n", error->pipe[i].vblank);
-		err_printf(m, "  VSYNC: %08x\n", error->pipe[i].vsync);
 
 		err_printf(m, "Plane [%d]:\n", i);
 		err_printf(m, "  CNTR: %08x\n", error->plane[i].control);
@@ -10147,5 +10720,16 @@
 		err_printf(m, "  POS: %08x\n", error->cursor[i].position);
 		err_printf(m, "  BASE: %08x\n", error->cursor[i].base);
 	}
+
+	for (i = 0; i < error->num_transcoders; i++) {
+		err_printf(m, "  CPU transcoder: %c\n",
+			   transcoder_name(error->transcoder[i].cpu_transcoder));
+		err_printf(m, "  CONF: %08x\n", error->transcoder[i].conf);
+		err_printf(m, "  HTOTAL: %08x\n", error->transcoder[i].htotal);
+		err_printf(m, "  HBLANK: %08x\n", error->transcoder[i].hblank);
+		err_printf(m, "  HSYNC: %08x\n", error->transcoder[i].hsync);
+		err_printf(m, "  VTOTAL: %08x\n", error->transcoder[i].vtotal);
+		err_printf(m, "  VBLANK: %08x\n", error->transcoder[i].vblank);
+		err_printf(m, "  VSYNC: %08x\n", error->transcoder[i].vsync);
+	}
 }
-#endif
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 26e162b..2151d13 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -276,6 +276,45 @@
 	return status;
 }
 
+static uint32_t get_aux_clock_divider(struct intel_dp *intel_dp,
+				      int index)
+{
+	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+	struct drm_device *dev = intel_dig_port->base.base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	/* The clock divider is based off the hrawclk,
+	 * and would like to run at 2MHz. So, take the
+	 * hrawclk value and divide by 2 and use that
+	 *
+	 * Note that PCH attached eDP panels should use a 125MHz input
+	 * clock divider.
+	 */
+	if (IS_VALLEYVIEW(dev)) {
+		return index ? 0 : 100;
+	} else if (intel_dig_port->port == PORT_A) {
+		if (index)
+			return 0;
+		if (HAS_DDI(dev))
+			return DIV_ROUND_CLOSEST(intel_ddi_get_cdclk_freq(dev_priv), 2000);
+		else if (IS_GEN6(dev) || IS_GEN7(dev))
+			return 200; /* SNB & IVB eDP input clock at 400Mhz */
+		else
+			return 225; /* eDP input clock at 450Mhz */
+	} else if (dev_priv->pch_id == INTEL_PCH_LPT_DEVICE_ID_TYPE) {
+		/* Workaround for non-ULT HSW */
+		switch (index) {
+		case 0: return 63;
+		case 1: return 72;
+		default: return 0;
+		}
+	} else if (HAS_PCH_SPLIT(dev)) {
+		return index ? 0 : DIV_ROUND_UP(intel_pch_rawclk(dev), 2);
+	} else {
+		return index ? 0 :intel_hrawclk(dev) / 2;
+	}
+}
+
 static int
 intel_dp_aux_ch(struct intel_dp *intel_dp,
 		uint8_t *send, int send_bytes,
@@ -286,10 +325,10 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t ch_ctl = intel_dp->aux_ch_ctl_reg;
 	uint32_t ch_data = ch_ctl + 4;
+	uint32_t aux_clock_divider;
 	int i, ret, recv_bytes;
 	uint32_t status;
-	uint32_t aux_clock_divider;
-	int try, precharge;
+	int try, precharge, clock = 0;
 	bool has_aux_irq = INTEL_INFO(dev)->gen >= 5 && !IS_VALLEYVIEW(dev);
 
 	/* dp aux is extremely sensitive to irq latency, hence request the
@@ -299,37 +338,14 @@
 	pm_qos_update_request(&dev_priv->pm_qos, 0);
 
 	intel_dp_check_edp(intel_dp);
-	/* The clock divider is based off the hrawclk,
-	 * and would like to run at 2MHz. So, take the
-	 * hrawclk value and divide by 2 and use that
-	 *
-	 * Note that PCH attached eDP panels should use a 125MHz input
-	 * clock divider.
-	 */
-	if (IS_VALLEYVIEW(dev)) {
-		aux_clock_divider = 100;
-	} else if (intel_dig_port->port == PORT_A) {
-		if (HAS_DDI(dev))
-			aux_clock_divider = DIV_ROUND_CLOSEST(
-				intel_ddi_get_cdclk_freq(dev_priv), 2000);
-		else if (IS_GEN6(dev) || IS_GEN7(dev))
-			aux_clock_divider = 200; /* SNB & IVB eDP input clock at 400Mhz */
-		else
-			aux_clock_divider = 225; /* eDP input clock at 450Mhz */
-	} else if (dev_priv->pch_id == INTEL_PCH_LPT_DEVICE_ID_TYPE) {
-		/* Workaround for non-ULT HSW */
-		aux_clock_divider = 74;
-	} else if (HAS_PCH_SPLIT(dev)) {
-		aux_clock_divider = DIV_ROUND_UP(intel_pch_rawclk(dev), 2);
-	} else {
-		aux_clock_divider = intel_hrawclk(dev) / 2;
-	}
 
 	if (IS_GEN6(dev))
 		precharge = 3;
 	else
 		precharge = 5;
 
+	intel_aux_display_runtime_get(dev_priv);
+
 	/* Try to wait for any previous AUX channel activity */
 	for (try = 0; try < 3; try++) {
 		status = I915_READ_NOTRACE(ch_ctl);
@@ -345,37 +361,41 @@
 		goto out;
 	}
 
-	/* Must try at least 3 times according to DP spec */
-	for (try = 0; try < 5; try++) {
-		/* Load the send data into the aux channel data registers */
-		for (i = 0; i < send_bytes; i += 4)
-			I915_WRITE(ch_data + i,
-				   pack_aux(send + i, send_bytes - i));
+	while ((aux_clock_divider = get_aux_clock_divider(intel_dp, clock++))) {
+		/* Must try at least 3 times according to DP spec */
+		for (try = 0; try < 5; try++) {
+			/* Load the send data into the aux channel data registers */
+			for (i = 0; i < send_bytes; i += 4)
+				I915_WRITE(ch_data + i,
+					   pack_aux(send + i, send_bytes - i));
 
-		/* Send the command and wait for it to complete */
-		I915_WRITE(ch_ctl,
-			   DP_AUX_CH_CTL_SEND_BUSY |
-			   (has_aux_irq ? DP_AUX_CH_CTL_INTERRUPT : 0) |
-			   DP_AUX_CH_CTL_TIME_OUT_400us |
-			   (send_bytes << DP_AUX_CH_CTL_MESSAGE_SIZE_SHIFT) |
-			   (precharge << DP_AUX_CH_CTL_PRECHARGE_2US_SHIFT) |
-			   (aux_clock_divider << DP_AUX_CH_CTL_BIT_CLOCK_2X_SHIFT) |
-			   DP_AUX_CH_CTL_DONE |
-			   DP_AUX_CH_CTL_TIME_OUT_ERROR |
-			   DP_AUX_CH_CTL_RECEIVE_ERROR);
+			/* Send the command and wait for it to complete */
+			I915_WRITE(ch_ctl,
+				   DP_AUX_CH_CTL_SEND_BUSY |
+				   (has_aux_irq ? DP_AUX_CH_CTL_INTERRUPT : 0) |
+				   DP_AUX_CH_CTL_TIME_OUT_400us |
+				   (send_bytes << DP_AUX_CH_CTL_MESSAGE_SIZE_SHIFT) |
+				   (precharge << DP_AUX_CH_CTL_PRECHARGE_2US_SHIFT) |
+				   (aux_clock_divider << DP_AUX_CH_CTL_BIT_CLOCK_2X_SHIFT) |
+				   DP_AUX_CH_CTL_DONE |
+				   DP_AUX_CH_CTL_TIME_OUT_ERROR |
+				   DP_AUX_CH_CTL_RECEIVE_ERROR);
 
-		status = intel_dp_aux_wait_done(intel_dp, has_aux_irq);
+			status = intel_dp_aux_wait_done(intel_dp, has_aux_irq);
 
-		/* Clear done status and any errors */
-		I915_WRITE(ch_ctl,
-			   status |
-			   DP_AUX_CH_CTL_DONE |
-			   DP_AUX_CH_CTL_TIME_OUT_ERROR |
-			   DP_AUX_CH_CTL_RECEIVE_ERROR);
+			/* Clear done status and any errors */
+			I915_WRITE(ch_ctl,
+				   status |
+				   DP_AUX_CH_CTL_DONE |
+				   DP_AUX_CH_CTL_TIME_OUT_ERROR |
+				   DP_AUX_CH_CTL_RECEIVE_ERROR);
 
-		if (status & (DP_AUX_CH_CTL_TIME_OUT_ERROR |
-			      DP_AUX_CH_CTL_RECEIVE_ERROR))
-			continue;
+			if (status & (DP_AUX_CH_CTL_TIME_OUT_ERROR |
+				      DP_AUX_CH_CTL_RECEIVE_ERROR))
+				continue;
+			if (status & DP_AUX_CH_CTL_DONE)
+				break;
+		}
 		if (status & DP_AUX_CH_CTL_DONE)
 			break;
 	}
@@ -416,6 +436,7 @@
 	ret = recv_bytes;
 out:
 	pm_qos_update_request(&dev_priv->pm_qos, PM_QOS_DEFAULT_VALUE);
+	intel_aux_display_runtime_put(dev_priv);
 
 	return ret;
 }
@@ -710,8 +731,11 @@
 	/* Walk through all bpp values. Luckily they're all nicely spaced with 2
 	 * bpc in between. */
 	bpp = pipe_config->pipe_bpp;
-	if (is_edp(intel_dp) && dev_priv->vbt.edp_bpp)
+	if (is_edp(intel_dp) && dev_priv->vbt.edp_bpp) {
+		DRM_DEBUG_KMS("clamping bpp for eDP panel to BIOS-provided %i\n",
+			      dev_priv->vbt.edp_bpp);
 		bpp = min_t(int, bpp, dev_priv->vbt.edp_bpp);
+	}
 
 	for (; bpp >= 6*3; bpp -= 2*3) {
 		mode_rate = intel_dp_link_required(adjusted_mode->clock, bpp);
@@ -812,15 +836,14 @@
 	udelay(500);
 }
 
-static void
-intel_dp_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
-		  struct drm_display_mode *adjusted_mode)
+static void intel_dp_mode_set(struct intel_encoder *encoder)
 {
-	struct drm_device *dev = encoder->dev;
+	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
 	enum port port = dp_to_dig_port(intel_dp)->port;
-	struct intel_crtc *crtc = to_intel_crtc(encoder->crtc);
+	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
+	struct drm_display_mode *adjusted_mode = &crtc->config.adjusted_mode;
 
 	/*
 	 * There are four kinds of DP registers:
@@ -852,7 +875,7 @@
 		DRM_DEBUG_DRIVER("Enabling DP audio on pipe %c\n",
 				 pipe_name(crtc->pipe));
 		intel_dp->DP |= DP_AUDIO_OUTPUT_ENABLE;
-		intel_write_eld(encoder, adjusted_mode);
+		intel_write_eld(&encoder->base, adjusted_mode);
 	}
 
 	intel_dp_init_link_config(intel_dp);
@@ -1360,6 +1383,275 @@
 	}
 
 	pipe_config->adjusted_mode.flags |= flags;
+
+	if (dp_to_dig_port(intel_dp)->port == PORT_A) {
+		if ((I915_READ(DP_A) & DP_PLL_FREQ_MASK) == DP_PLL_FREQ_160MHZ)
+			pipe_config->port_clock = 162000;
+		else
+			pipe_config->port_clock = 270000;
+	}
+}
+
+static bool is_edp_psr(struct intel_dp *intel_dp)
+{
+	return is_edp(intel_dp) &&
+		intel_dp->psr_dpcd[0] & DP_PSR_IS_SUPPORTED;
+}
+
+static bool intel_edp_is_psr_enabled(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (!IS_HASWELL(dev))
+		return false;
+
+	return I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE;
+}
+
+static void intel_edp_psr_write_vsc(struct intel_dp *intel_dp,
+				    struct edp_vsc_psr *vsc_psr)
+{
+	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+	struct drm_device *dev = dig_port->base.base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *crtc = to_intel_crtc(dig_port->base.base.crtc);
+	u32 ctl_reg = HSW_TVIDEO_DIP_CTL(crtc->config.cpu_transcoder);
+	u32 data_reg = HSW_TVIDEO_DIP_VSC_DATA(crtc->config.cpu_transcoder);
+	uint32_t *data = (uint32_t *) vsc_psr;
+	unsigned int i;
+
+	/* As per BSPec (Pipe Video Data Island Packet), we need to disable
+	   the video DIP being updated before program video DIP data buffer
+	   registers for DIP being updated. */
+	I915_WRITE(ctl_reg, 0);
+	POSTING_READ(ctl_reg);
+
+	for (i = 0; i < VIDEO_DIP_VSC_DATA_SIZE; i += 4) {
+		if (i < sizeof(struct edp_vsc_psr))
+			I915_WRITE(data_reg + i, *data++);
+		else
+			I915_WRITE(data_reg + i, 0);
+	}
+
+	I915_WRITE(ctl_reg, VIDEO_DIP_ENABLE_VSC_HSW);
+	POSTING_READ(ctl_reg);
+}
+
+static void intel_edp_psr_setup(struct intel_dp *intel_dp)
+{
+	struct drm_device *dev = intel_dp_to_dev(intel_dp);
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct edp_vsc_psr psr_vsc;
+
+	if (intel_dp->psr_setup_done)
+		return;
+
+	/* Prepare VSC packet as per EDP 1.3 spec, Table 3.10 */
+	memset(&psr_vsc, 0, sizeof(psr_vsc));
+	psr_vsc.sdp_header.HB0 = 0;
+	psr_vsc.sdp_header.HB1 = 0x7;
+	psr_vsc.sdp_header.HB2 = 0x2;
+	psr_vsc.sdp_header.HB3 = 0x8;
+	intel_edp_psr_write_vsc(intel_dp, &psr_vsc);
+
+	/* Avoid continuous PSR exit by masking memup and hpd */
+	I915_WRITE(EDP_PSR_DEBUG_CTL, EDP_PSR_DEBUG_MASK_MEMUP |
+		   EDP_PSR_DEBUG_MASK_HPD);
+
+	intel_dp->psr_setup_done = true;
+}
+
+static void intel_edp_psr_enable_sink(struct intel_dp *intel_dp)
+{
+	struct drm_device *dev = intel_dp_to_dev(intel_dp);
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t aux_clock_divider = get_aux_clock_divider(intel_dp, 0);
+	int precharge = 0x3;
+	int msg_size = 5;       /* Header(4) + Message(1) */
+
+	/* Enable PSR in sink */
+	if (intel_dp->psr_dpcd[1] & DP_PSR_NO_TRAIN_ON_EXIT)
+		intel_dp_aux_native_write_1(intel_dp, DP_PSR_EN_CFG,
+					    DP_PSR_ENABLE &
+					    ~DP_PSR_MAIN_LINK_ACTIVE);
+	else
+		intel_dp_aux_native_write_1(intel_dp, DP_PSR_EN_CFG,
+					    DP_PSR_ENABLE |
+					    DP_PSR_MAIN_LINK_ACTIVE);
+
+	/* Setup AUX registers */
+	I915_WRITE(EDP_PSR_AUX_DATA1, EDP_PSR_DPCD_COMMAND);
+	I915_WRITE(EDP_PSR_AUX_DATA2, EDP_PSR_DPCD_NORMAL_OPERATION);
+	I915_WRITE(EDP_PSR_AUX_CTL,
+		   DP_AUX_CH_CTL_TIME_OUT_400us |
+		   (msg_size << DP_AUX_CH_CTL_MESSAGE_SIZE_SHIFT) |
+		   (precharge << DP_AUX_CH_CTL_PRECHARGE_2US_SHIFT) |
+		   (aux_clock_divider << DP_AUX_CH_CTL_BIT_CLOCK_2X_SHIFT));
+}
+
+static void intel_edp_psr_enable_source(struct intel_dp *intel_dp)
+{
+	struct drm_device *dev = intel_dp_to_dev(intel_dp);
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t max_sleep_time = 0x1f;
+	uint32_t idle_frames = 1;
+	uint32_t val = 0x0;
+
+	if (intel_dp->psr_dpcd[1] & DP_PSR_NO_TRAIN_ON_EXIT) {
+		val |= EDP_PSR_LINK_STANDBY;
+		val |= EDP_PSR_TP2_TP3_TIME_0us;
+		val |= EDP_PSR_TP1_TIME_0us;
+		val |= EDP_PSR_SKIP_AUX_EXIT;
+	} else
+		val |= EDP_PSR_LINK_DISABLE;
+
+	I915_WRITE(EDP_PSR_CTL, val |
+		   EDP_PSR_MIN_LINK_ENTRY_TIME_8_LINES |
+		   max_sleep_time << EDP_PSR_MAX_SLEEP_TIME_SHIFT |
+		   idle_frames << EDP_PSR_IDLE_FRAME_SHIFT |
+		   EDP_PSR_ENABLE);
+}
+
+static bool intel_edp_psr_match_conditions(struct intel_dp *intel_dp)
+{
+	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+	struct drm_device *dev = dig_port->base.base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_crtc *crtc = dig_port->base.base.crtc;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	struct drm_i915_gem_object *obj = to_intel_framebuffer(crtc->fb)->obj;
+	struct intel_encoder *intel_encoder = &dp_to_dig_port(intel_dp)->base;
+
+	if (!IS_HASWELL(dev)) {
+		DRM_DEBUG_KMS("PSR not supported on this platform\n");
+		dev_priv->no_psr_reason = PSR_NO_SOURCE;
+		return false;
+	}
+
+	if ((intel_encoder->type != INTEL_OUTPUT_EDP) ||
+	    (dig_port->port != PORT_A)) {
+		DRM_DEBUG_KMS("HSW ties PSR to DDI A (eDP)\n");
+		dev_priv->no_psr_reason = PSR_HSW_NOT_DDIA;
+		return false;
+	}
+
+	if (!is_edp_psr(intel_dp)) {
+		DRM_DEBUG_KMS("PSR not supported by this panel\n");
+		dev_priv->no_psr_reason = PSR_NO_SINK;
+		return false;
+	}
+
+	if (!i915_enable_psr) {
+		DRM_DEBUG_KMS("PSR disable by flag\n");
+		dev_priv->no_psr_reason = PSR_MODULE_PARAM;
+		return false;
+	}
+
+	crtc = dig_port->base.base.crtc;
+	if (crtc == NULL) {
+		DRM_DEBUG_KMS("crtc not active for PSR\n");
+		dev_priv->no_psr_reason = PSR_CRTC_NOT_ACTIVE;
+		return false;
+	}
+
+	intel_crtc = to_intel_crtc(crtc);
+	if (!intel_crtc->active || !crtc->fb || !crtc->mode.clock) {
+		DRM_DEBUG_KMS("crtc not active for PSR\n");
+		dev_priv->no_psr_reason = PSR_CRTC_NOT_ACTIVE;
+		return false;
+	}
+
+	obj = to_intel_framebuffer(crtc->fb)->obj;
+	if (obj->tiling_mode != I915_TILING_X ||
+	    obj->fence_reg == I915_FENCE_REG_NONE) {
+		DRM_DEBUG_KMS("PSR condition failed: fb not tiled or fenced\n");
+		dev_priv->no_psr_reason = PSR_NOT_TILED;
+		return false;
+	}
+
+	if (I915_READ(SPRCTL(intel_crtc->pipe)) & SPRITE_ENABLE) {
+		DRM_DEBUG_KMS("PSR condition failed: Sprite is Enabled\n");
+		dev_priv->no_psr_reason = PSR_SPRITE_ENABLED;
+		return false;
+	}
+
+	if (I915_READ(HSW_STEREO_3D_CTL(intel_crtc->config.cpu_transcoder)) &
+	    S3D_ENABLE) {
+		DRM_DEBUG_KMS("PSR condition failed: Stereo 3D is Enabled\n");
+		dev_priv->no_psr_reason = PSR_S3D_ENABLED;
+		return false;
+	}
+
+	if (crtc->mode.flags & DRM_MODE_FLAG_INTERLACE) {
+		DRM_DEBUG_KMS("PSR condition failed: Interlaced is Enabled\n");
+		dev_priv->no_psr_reason = PSR_INTERLACED_ENABLED;
+		return false;
+	}
+
+	return true;
+}
+
+static void intel_edp_psr_do_enable(struct intel_dp *intel_dp)
+{
+	struct drm_device *dev = intel_dp_to_dev(intel_dp);
+
+	if (!intel_edp_psr_match_conditions(intel_dp) ||
+	    intel_edp_is_psr_enabled(dev))
+		return;
+
+	/* Setup PSR once */
+	intel_edp_psr_setup(intel_dp);
+
+	/* Enable PSR on the panel */
+	intel_edp_psr_enable_sink(intel_dp);
+
+	/* Enable PSR on the host */
+	intel_edp_psr_enable_source(intel_dp);
+}
+
+void intel_edp_psr_enable(struct intel_dp *intel_dp)
+{
+	struct drm_device *dev = intel_dp_to_dev(intel_dp);
+
+	if (intel_edp_psr_match_conditions(intel_dp) &&
+	    !intel_edp_is_psr_enabled(dev))
+		intel_edp_psr_do_enable(intel_dp);
+}
+
+void intel_edp_psr_disable(struct intel_dp *intel_dp)
+{
+	struct drm_device *dev = intel_dp_to_dev(intel_dp);
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (!intel_edp_is_psr_enabled(dev))
+		return;
+
+	I915_WRITE(EDP_PSR_CTL, I915_READ(EDP_PSR_CTL) & ~EDP_PSR_ENABLE);
+
+	/* Wait till PSR is idle */
+	if (_wait_for((I915_READ(EDP_PSR_STATUS_CTL) &
+		       EDP_PSR_STATUS_STATE_MASK) == 0, 2000, 10))
+		DRM_ERROR("Timed out waiting for PSR Idle State\n");
+}
+
+void intel_edp_psr_update(struct drm_device *dev)
+{
+	struct intel_encoder *encoder;
+	struct intel_dp *intel_dp = NULL;
+
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head)
+		if (encoder->type == INTEL_OUTPUT_EDP) {
+			intel_dp = enc_to_intel_dp(&encoder->base);
+
+			if (!is_edp_psr(intel_dp))
+				return;
+
+			if (!intel_edp_psr_match_conditions(intel_dp))
+				intel_edp_psr_disable(intel_dp);
+			else
+				if (!intel_edp_is_psr_enabled(dev))
+					intel_edp_psr_do_enable(intel_dp);
+		}
 }
 
 static void intel_disable_dp(struct intel_encoder *encoder)
@@ -1411,47 +1703,50 @@
 	intel_dp_complete_link_train(intel_dp);
 	intel_dp_stop_link_train(intel_dp);
 	ironlake_edp_backlight_on(intel_dp);
+}
 
-	if (IS_VALLEYVIEW(dev)) {
-		struct intel_digital_port *dport =
-			enc_to_dig_port(&encoder->base);
-		int channel = vlv_dport_to_channel(dport);
-
-		vlv_wait_port_ready(dev_priv, channel);
-	}
+static void vlv_enable_dp(struct intel_encoder *encoder)
+{
 }
 
 static void intel_pre_enable_dp(struct intel_encoder *encoder)
 {
 	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
 	struct intel_digital_port *dport = dp_to_dig_port(intel_dp);
+
+	if (dport->port == PORT_A)
+		ironlake_edp_pll_on(intel_dp);
+}
+
+static void vlv_pre_enable_dp(struct intel_encoder *encoder)
+{
+	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
+	struct intel_digital_port *dport = dp_to_dig_port(intel_dp);
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc);
+	int port = vlv_dport_to_channel(dport);
+	int pipe = intel_crtc->pipe;
+	u32 val;
 
-	if (dport->port == PORT_A && !IS_VALLEYVIEW(dev))
-		ironlake_edp_pll_on(intel_dp);
+	mutex_lock(&dev_priv->dpio_lock);
 
-	if (IS_VALLEYVIEW(dev)) {
-		struct intel_crtc *intel_crtc =
-			to_intel_crtc(encoder->base.crtc);
-		int port = vlv_dport_to_channel(dport);
-		int pipe = intel_crtc->pipe;
-		u32 val;
+	val = vlv_dpio_read(dev_priv, DPIO_DATA_LANE_A(port));
+	val = 0;
+	if (pipe)
+		val |= (1<<21);
+	else
+		val &= ~(1<<21);
+	val |= 0x001000c4;
+	vlv_dpio_write(dev_priv, DPIO_DATA_CHANNEL(port), val);
+	vlv_dpio_write(dev_priv, DPIO_PCS_CLOCKBUF0(port), 0x00760018);
+	vlv_dpio_write(dev_priv, DPIO_PCS_CLOCKBUF8(port), 0x00400888);
 
-		val = vlv_dpio_read(dev_priv, DPIO_DATA_LANE_A(port));
-		val = 0;
-		if (pipe)
-			val |= (1<<21);
-		else
-			val &= ~(1<<21);
-		val |= 0x001000c4;
-		vlv_dpio_write(dev_priv, DPIO_DATA_CHANNEL(port), val);
+	mutex_unlock(&dev_priv->dpio_lock);
 
-		vlv_dpio_write(dev_priv, DPIO_PCS_CLOCKBUF0(port),
-				 0x00760018);
-		vlv_dpio_write(dev_priv, DPIO_PCS_CLOCKBUF8(port),
-				 0x00400888);
-	}
+	intel_enable_dp(encoder);
+
+	vlv_wait_port_ready(dev_priv, port);
 }
 
 static void intel_dp_pre_pll_enable(struct intel_encoder *encoder)
@@ -1465,6 +1760,7 @@
 		return;
 
 	/* Program Tx lane resets to default */
+	mutex_lock(&dev_priv->dpio_lock);
 	vlv_dpio_write(dev_priv, DPIO_PCS_TX(port),
 			 DPIO_PCS_TX_LANE2_RESET |
 			 DPIO_PCS_TX_LANE1_RESET);
@@ -1478,6 +1774,7 @@
 	vlv_dpio_write(dev_priv, DPIO_PCS_STAGGER1(port), 0x00750f00);
 	vlv_dpio_write(dev_priv, DPIO_TX_CTL(port), 0x00001500);
 	vlv_dpio_write(dev_priv, DPIO_TX_LANE(port), 0x40400000);
+	mutex_unlock(&dev_priv->dpio_lock);
 }
 
 /*
@@ -1689,6 +1986,7 @@
 		return 0;
 	}
 
+	mutex_lock(&dev_priv->dpio_lock);
 	vlv_dpio_write(dev_priv, DPIO_TX_OCALINIT(port), 0x00000000);
 	vlv_dpio_write(dev_priv, DPIO_TX_SWING_CTL4(port), demph_reg_value);
 	vlv_dpio_write(dev_priv, DPIO_TX_SWING_CTL2(port),
@@ -1697,6 +1995,7 @@
 	vlv_dpio_write(dev_priv, DPIO_PCS_STAGGER0(port), 0x00030000);
 	vlv_dpio_write(dev_priv, DPIO_PCS_CTL_OVER1(port), preemph_reg_value);
 	vlv_dpio_write(dev_priv, DPIO_TX_OCALINIT(port), 0x80000000);
+	mutex_unlock(&dev_priv->dpio_lock);
 
 	return 0;
 }
@@ -2030,7 +2329,6 @@
 	struct drm_device *dev = encoder->dev;
 	int i;
 	uint8_t voltage;
-	bool clock_recovery = false;
 	int voltage_tries, loop_tries;
 	uint32_t DP = intel_dp->DP;
 
@@ -2048,7 +2346,6 @@
 	voltage = 0xff;
 	voltage_tries = 0;
 	loop_tries = 0;
-	clock_recovery = false;
 	for (;;) {
 		/* Use intel_dp->train_set[0] to set the voltage and pre emphasis values */
 		uint8_t	    link_status[DP_LINK_STATUS_SIZE];
@@ -2069,7 +2366,6 @@
 
 		if (drm_dp_clock_recovery_ok(link_status, intel_dp->lane_count)) {
 			DRM_DEBUG_KMS("clock recovery OK\n");
-			clock_recovery = true;
 			break;
 		}
 
@@ -2275,6 +2571,13 @@
 	if (intel_dp->dpcd[DP_DPCD_REV] == 0)
 		return false; /* DPCD not present */
 
+	/* Check if the panel supports PSR */
+	memset(intel_dp->psr_dpcd, 0, sizeof(intel_dp->psr_dpcd));
+	intel_dp_aux_native_read_retry(intel_dp, DP_PSR_SUPPORT,
+				       intel_dp->psr_dpcd,
+				       sizeof(intel_dp->psr_dpcd));
+	if (is_edp_psr(intel_dp))
+		DRM_DEBUG_KMS("Detected EDP PSR Panel.\n");
 	if (!(intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] &
 	      DP_DWN_STRM_PORT_PRESENT))
 		return true; /* native DP sink */
@@ -2542,6 +2845,9 @@
 	enum drm_connector_status status;
 	struct edid *edid = NULL;
 
+	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
+		      connector->base.id, drm_get_connector_name(connector));
+
 	intel_dp->has_audio = false;
 
 	if (HAS_PCH_SPLIT(dev))
@@ -2735,10 +3041,6 @@
 	kfree(intel_dig_port);
 }
 
-static const struct drm_encoder_helper_funcs intel_dp_helper_funcs = {
-	.mode_set = intel_dp_mode_set,
-};
-
 static const struct drm_connector_funcs intel_dp_connector_funcs = {
 	.dpms = intel_connector_dpms,
 	.detect = intel_dp_detect,
@@ -3166,6 +3468,8 @@
 	WARN(error, "intel_dp_i2c_init failed with error %d for port %c\n",
 	     error, port_name(port));
 
+	intel_dp->psr_setup_done = false;
+
 	if (!intel_edp_init_connector(intel_dp, intel_connector)) {
 		i2c_del_adapter(&intel_dp->adapter);
 		if (is_edp(intel_dp)) {
@@ -3216,17 +3520,21 @@
 
 	drm_encoder_init(dev, &intel_encoder->base, &intel_dp_enc_funcs,
 			 DRM_MODE_ENCODER_TMDS);
-	drm_encoder_helper_add(&intel_encoder->base, &intel_dp_helper_funcs);
 
 	intel_encoder->compute_config = intel_dp_compute_config;
-	intel_encoder->enable = intel_enable_dp;
-	intel_encoder->pre_enable = intel_pre_enable_dp;
+	intel_encoder->mode_set = intel_dp_mode_set;
 	intel_encoder->disable = intel_disable_dp;
 	intel_encoder->post_disable = intel_post_disable_dp;
 	intel_encoder->get_hw_state = intel_dp_get_hw_state;
 	intel_encoder->get_config = intel_dp_get_config;
-	if (IS_VALLEYVIEW(dev))
+	if (IS_VALLEYVIEW(dev)) {
 		intel_encoder->pre_pll_enable = intel_dp_pre_pll_enable;
+		intel_encoder->pre_enable = vlv_pre_enable_dp;
+		intel_encoder->enable = vlv_enable_dp;
+	} else {
+		intel_encoder->pre_enable = intel_pre_enable_dp;
+		intel_encoder->enable = intel_enable_dp;
+	}
 
 	intel_dig_port->port = port;
 	intel_dig_port->dp.output_reg = output_reg;
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index c8c9b6f..1760808 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -26,6 +26,7 @@
 #define __INTEL_DRV_H__
 
 #include <linux/i2c.h>
+#include <linux/hdmi.h>
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include <drm/drm_crtc.h>
@@ -208,10 +209,6 @@
 
 	struct drm_display_mode requested_mode;
 	struct drm_display_mode adjusted_mode;
-	/* This flag must be set by the encoder's compute_config callback if it
-	 * changes the crtc timings in the mode to prevent the crtc fixup from
-	 * overwriting them.  Currently only lvds needs that. */
-	bool timings_set;
 	/* Whether to set up the PCH/FDI. Note that we never allow sharing
 	 * between pch encoders and cpu encoders. */
 	bool has_pch_encoder;
@@ -334,6 +331,13 @@
 	bool pch_fifo_underrun_disabled;
 };
 
+struct intel_plane_wm_parameters {
+	uint32_t horiz_pixels;
+	uint8_t bytes_per_pixel;
+	bool enabled;
+	bool scaled;
+};
+
 struct intel_plane {
 	struct drm_plane base;
 	int plane;
@@ -352,20 +356,18 @@
 	 * as the other pieces of the struct may not reflect the values we want
 	 * for the watermark calculations. Currently only Haswell uses this.
 	 */
-	struct {
-		bool enable;
-		uint8_t bytes_per_pixel;
-		uint32_t horiz_pixels;
-	} wm;
+	struct intel_plane_wm_parameters wm;
 
 	void (*update_plane)(struct drm_plane *plane,
+			     struct drm_crtc *crtc,
 			     struct drm_framebuffer *fb,
 			     struct drm_i915_gem_object *obj,
 			     int crtc_x, int crtc_y,
 			     unsigned int crtc_w, unsigned int crtc_h,
 			     uint32_t x, uint32_t y,
 			     uint32_t src_w, uint32_t src_h);
-	void (*disable_plane)(struct drm_plane *plane);
+	void (*disable_plane)(struct drm_plane *plane,
+			      struct drm_crtc *crtc);
 	int (*update_colorkey)(struct drm_plane *plane,
 			       struct drm_intel_sprite_colorkey *key);
 	void (*get_colorkey)(struct drm_plane *plane,
@@ -397,66 +399,6 @@
 #define to_intel_framebuffer(x) container_of(x, struct intel_framebuffer, base)
 #define to_intel_plane(x) container_of(x, struct intel_plane, base)
 
-#define DIP_HEADER_SIZE	5
-
-#define DIP_TYPE_AVI    0x82
-#define DIP_VERSION_AVI 0x2
-#define DIP_LEN_AVI     13
-#define DIP_AVI_PR_1    0
-#define DIP_AVI_PR_2    1
-#define DIP_AVI_RGB_QUANT_RANGE_DEFAULT	(0 << 2)
-#define DIP_AVI_RGB_QUANT_RANGE_LIMITED	(1 << 2)
-#define DIP_AVI_RGB_QUANT_RANGE_FULL	(2 << 2)
-
-#define DIP_TYPE_SPD	0x83
-#define DIP_VERSION_SPD	0x1
-#define DIP_LEN_SPD	25
-#define DIP_SPD_UNKNOWN	0
-#define DIP_SPD_DSTB	0x1
-#define DIP_SPD_DVDP	0x2
-#define DIP_SPD_DVHS	0x3
-#define DIP_SPD_HDDVR	0x4
-#define DIP_SPD_DVC	0x5
-#define DIP_SPD_DSC	0x6
-#define DIP_SPD_VCD	0x7
-#define DIP_SPD_GAME	0x8
-#define DIP_SPD_PC	0x9
-#define DIP_SPD_BD	0xa
-#define DIP_SPD_SCD	0xb
-
-struct dip_infoframe {
-	uint8_t type;		/* HB0 */
-	uint8_t ver;		/* HB1 */
-	uint8_t len;		/* HB2 - body len, not including checksum */
-	uint8_t ecc;		/* Header ECC */
-	uint8_t checksum;	/* PB0 */
-	union {
-		struct {
-			/* PB1 - Y 6:5, A 4:4, B 3:2, S 1:0 */
-			uint8_t Y_A_B_S;
-			/* PB2 - C 7:6, M 5:4, R 3:0 */
-			uint8_t C_M_R;
-			/* PB3 - ITC 7:7, EC 6:4, Q 3:2, SC 1:0 */
-			uint8_t ITC_EC_Q_SC;
-			/* PB4 - VIC 6:0 */
-			uint8_t VIC;
-			/* PB5 - YQ 7:6, CN 5:4, PR 3:0 */
-			uint8_t YQ_CN_PR;
-			/* PB6 to PB13 */
-			uint16_t top_bar_end;
-			uint16_t bottom_bar_start;
-			uint16_t left_bar_end;
-			uint16_t right_bar_start;
-		} __attribute__ ((packed)) avi;
-		struct {
-			uint8_t vn[8];
-			uint8_t pd[16];
-			uint8_t sdi;
-		} __attribute__ ((packed)) spd;
-		uint8_t payload[27];
-	} __attribute__ ((packed)) body;
-} __attribute__((packed));
-
 struct intel_hdmi {
 	u32 hdmi_reg;
 	int ddc_bus;
@@ -467,7 +409,8 @@
 	enum hdmi_force_audio force_audio;
 	bool rgb_quant_range_selectable;
 	void (*write_infoframe)(struct drm_encoder *encoder,
-				struct dip_infoframe *frame);
+				enum hdmi_infoframe_type type,
+				const uint8_t *frame, ssize_t len);
 	void (*set_infoframes)(struct drm_encoder *encoder,
 			       struct drm_display_mode *adjusted_mode);
 };
@@ -487,6 +430,7 @@
 	uint8_t link_bw;
 	uint8_t lane_count;
 	uint8_t dpcd[DP_RECEIVER_CAP_SIZE];
+	uint8_t psr_dpcd[EDP_PSR_RECEIVER_CAP_SIZE];
 	uint8_t downstream_ports[DP_MAX_DOWNSTREAM_PORTS];
 	struct i2c_adapter adapter;
 	struct i2c_algo_dp_aux_data algo;
@@ -498,13 +442,14 @@
 	int backlight_off_delay;
 	struct delayed_work panel_vdd_work;
 	bool want_panel_vdd;
+	bool psr_setup_done;
 	struct intel_connector *attached_connector;
 };
 
 struct intel_digital_port {
 	struct intel_encoder base;
 	enum port port;
-	u32 port_reversal;
+	u32 saved_port_bits;
 	struct intel_dp dp;
 	struct intel_hdmi hdmi;
 };
@@ -549,13 +494,6 @@
 	bool enable_stall_check;
 };
 
-struct intel_fbc_work {
-	struct delayed_work work;
-	struct drm_crtc *crtc;
-	struct drm_framebuffer *fb;
-	int interval;
-};
-
 int intel_pch_rawclk(struct drm_device *dev);
 
 int intel_connector_update_modes(struct drm_connector *connector,
@@ -574,7 +512,6 @@
 extern struct intel_hdmi *enc_to_intel_hdmi(struct drm_encoder *encoder);
 extern bool intel_hdmi_compute_config(struct intel_encoder *encoder,
 				      struct intel_crtc_config *pipe_config);
-extern void intel_dip_infoframe_csum(struct dip_infoframe *avi_if);
 extern bool intel_sdvo_init(struct drm_device *dev, uint32_t sdvo_reg,
 			    bool is_sdvob);
 extern void intel_dvo_init(struct drm_device *dev);
@@ -639,14 +576,10 @@
 	bool mode_changed;
 };
 
-extern int intel_set_mode(struct drm_crtc *crtc, struct drm_display_mode *mode,
-			  int x, int y, struct drm_framebuffer *old_fb);
-extern void intel_modeset_disable(struct drm_device *dev);
 extern void intel_crtc_restore_mode(struct drm_crtc *crtc);
 extern void intel_crtc_load_lut(struct drm_crtc *crtc);
 extern void intel_crtc_update_dpms(struct drm_crtc *crtc);
 extern void intel_encoder_destroy(struct drm_encoder *encoder);
-extern void intel_encoder_dpms(struct intel_encoder *encoder, int mode);
 extern void intel_connector_dpms(struct drm_connector *, int mode);
 extern bool intel_connector_get_hw_state(struct intel_connector *connector);
 extern void intel_modeset_check_state(struct drm_device *dev);
@@ -712,12 +645,10 @@
 extern void intel_release_load_detect_pipe(struct drm_connector *connector,
 					   struct intel_load_detect_pipe *old);
 
-extern void intelfb_restore(void);
 extern void intel_crtc_fb_gamma_set(struct drm_crtc *crtc, u16 red, u16 green,
 				    u16 blue, int regno);
 extern void intel_crtc_fb_gamma_get(struct drm_crtc *crtc, u16 *red, u16 *green,
 				    u16 *blue, int regno);
-extern void intel_enable_clock_gating(struct drm_device *dev);
 
 extern int intel_pin_and_fence_fb_obj(struct drm_device *dev,
 				      struct drm_i915_gem_object *obj,
@@ -728,6 +659,7 @@
 				  struct intel_framebuffer *ifb,
 				  struct drm_mode_fb_cmd2 *mode_cmd,
 				  struct drm_i915_gem_object *obj);
+extern void intel_framebuffer_fini(struct intel_framebuffer *fb);
 extern int intel_fbdev_init(struct drm_device *dev);
 extern void intel_fbdev_initial_config(struct drm_device *dev);
 extern void intel_fbdev_fini(struct drm_device *dev);
@@ -747,6 +679,22 @@
 extern void intel_fb_output_poll_changed(struct drm_device *dev);
 extern void intel_fb_restore_mode(struct drm_device *dev);
 
+struct intel_shared_dpll *
+intel_crtc_to_shared_dpll(struct intel_crtc *crtc);
+
+void assert_shared_dpll(struct drm_i915_private *dev_priv,
+			struct intel_shared_dpll *pll,
+			bool state);
+#define assert_shared_dpll_enabled(d, p) assert_shared_dpll(d, p, true)
+#define assert_shared_dpll_disabled(d, p) assert_shared_dpll(d, p, false)
+void assert_pll(struct drm_i915_private *dev_priv,
+		enum pipe pipe, bool state);
+#define assert_pll_enabled(d, p) assert_pll(d, p, true)
+#define assert_pll_disabled(d, p) assert_pll(d, p, false)
+void assert_fdi_rx_pll(struct drm_i915_private *dev_priv,
+		       enum pipe pipe, bool state);
+#define assert_fdi_rx_pll_enabled(d, p) assert_fdi_rx_pll(d, p, true)
+#define assert_fdi_rx_pll_disabled(d, p) assert_fdi_rx_pll(d, p, false)
 extern void assert_pipe(struct drm_i915_private *dev_priv, enum pipe pipe,
 			bool state);
 #define assert_pipe_enabled(d, p) assert_pipe(d, p, true)
@@ -762,9 +710,10 @@
 
 /* For use by IVB LP watermark workaround in intel_sprite.c */
 extern void intel_update_watermarks(struct drm_device *dev);
-extern void intel_update_sprite_watermarks(struct drm_device *dev, int pipe,
-					   uint32_t sprite_width,
-					   int pixel_size, bool enable);
+extern void intel_update_sprite_watermarks(struct drm_plane *plane,
+					   struct drm_crtc *crtc,
+					   uint32_t sprite_width, int pixel_size,
+					   bool enabled, bool scaled);
 
 extern unsigned long intel_gen4_compute_page_offset(int *x, int *y,
 						    unsigned int tiling_mode,
@@ -780,7 +729,6 @@
 extern void intel_init_pm(struct drm_device *dev);
 /* FBC */
 extern bool intel_fbc_enabled(struct drm_device *dev);
-extern void intel_enable_fbc(struct drm_crtc *crtc, unsigned long interval);
 extern void intel_update_fbc(struct drm_device *dev);
 /* IPS */
 extern void intel_gpu_ips_init(struct drm_i915_private *dev_priv);
@@ -796,8 +744,8 @@
 extern void intel_set_power_well(struct drm_device *dev, bool enable);
 extern void intel_enable_gt_powersave(struct drm_device *dev);
 extern void intel_disable_gt_powersave(struct drm_device *dev);
-extern void gen6_gt_check_fifodbg(struct drm_i915_private *dev_priv);
 extern void ironlake_teardown_rc6(struct drm_device *dev);
+void gen6_update_ring_freq(struct drm_device *dev);
 
 extern bool intel_ddi_get_hw_state(struct intel_encoder *encoder,
 				   enum pipe *pipe);
@@ -825,4 +773,24 @@
 						 enum transcoder pch_transcoder,
 						 bool enable);
 
+extern void intel_edp_psr_enable(struct intel_dp *intel_dp);
+extern void intel_edp_psr_disable(struct intel_dp *intel_dp);
+extern void intel_edp_psr_update(struct drm_device *dev);
+extern void hsw_disable_lcpll(struct drm_i915_private *dev_priv,
+			      bool switch_to_fclk, bool allow_power_down);
+extern void hsw_restore_lcpll(struct drm_i915_private *dev_priv);
+extern void ilk_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask);
+extern void ilk_disable_gt_irq(struct drm_i915_private *dev_priv,
+			       uint32_t mask);
+extern void snb_enable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask);
+extern void snb_disable_pm_irq(struct drm_i915_private *dev_priv,
+			       uint32_t mask);
+extern void hsw_enable_pc8_work(struct work_struct *__work);
+extern void hsw_enable_package_c8(struct drm_i915_private *dev_priv);
+extern void hsw_disable_package_c8(struct drm_i915_private *dev_priv);
+extern void hsw_pc8_disable_interrupts(struct drm_device *dev);
+extern void hsw_pc8_restore_interrupts(struct drm_device *dev);
+extern void intel_aux_display_runtime_get(struct drm_i915_private *dev_priv);
+extern void intel_aux_display_runtime_put(struct drm_i915_private *dev_priv);
+
 #endif /* __INTEL_DRV_H__ */
diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c
index eb2020e..406303b 100644
--- a/drivers/gpu/drm/i915/intel_dvo.c
+++ b/drivers/gpu/drm/i915/intel_dvo.c
@@ -100,15 +100,14 @@
 	bool panel_wants_dither;
 };
 
-static struct intel_dvo *enc_to_intel_dvo(struct drm_encoder *encoder)
+static struct intel_dvo *enc_to_dvo(struct intel_encoder *encoder)
 {
-	return container_of(encoder, struct intel_dvo, base.base);
+	return container_of(encoder, struct intel_dvo, base);
 }
 
 static struct intel_dvo *intel_attached_dvo(struct drm_connector *connector)
 {
-	return container_of(intel_attached_encoder(connector),
-			    struct intel_dvo, base);
+	return enc_to_dvo(intel_attached_encoder(connector));
 }
 
 static bool intel_dvo_connector_get_hw_state(struct intel_connector *connector)
@@ -123,7 +122,7 @@
 {
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_dvo *intel_dvo = enc_to_intel_dvo(&encoder->base);
+	struct intel_dvo *intel_dvo = enc_to_dvo(encoder);
 	u32 tmp;
 
 	tmp = I915_READ(intel_dvo->dev.dvo_reg);
@@ -140,7 +139,7 @@
 				 struct intel_crtc_config *pipe_config)
 {
 	struct drm_i915_private *dev_priv = encoder->base.dev->dev_private;
-	struct intel_dvo *intel_dvo = enc_to_intel_dvo(&encoder->base);
+	struct intel_dvo *intel_dvo = enc_to_dvo(encoder);
 	u32 tmp, flags = 0;
 
 	tmp = I915_READ(intel_dvo->dev.dvo_reg);
@@ -159,7 +158,7 @@
 static void intel_disable_dvo(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = encoder->base.dev->dev_private;
-	struct intel_dvo *intel_dvo = enc_to_intel_dvo(&encoder->base);
+	struct intel_dvo *intel_dvo = enc_to_dvo(encoder);
 	u32 dvo_reg = intel_dvo->dev.dvo_reg;
 	u32 temp = I915_READ(dvo_reg);
 
@@ -171,7 +170,7 @@
 static void intel_enable_dvo(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = encoder->base.dev->dev_private;
-	struct intel_dvo *intel_dvo = enc_to_intel_dvo(&encoder->base);
+	struct intel_dvo *intel_dvo = enc_to_dvo(encoder);
 	u32 dvo_reg = intel_dvo->dev.dvo_reg;
 	u32 temp = I915_READ(dvo_reg);
 
@@ -241,11 +240,11 @@
 	return intel_dvo->dev.dev_ops->mode_valid(&intel_dvo->dev, mode);
 }
 
-static bool intel_dvo_mode_fixup(struct drm_encoder *encoder,
-				 const struct drm_display_mode *mode,
-				 struct drm_display_mode *adjusted_mode)
+static bool intel_dvo_compute_config(struct intel_encoder *encoder,
+				     struct intel_crtc_config *pipe_config)
 {
-	struct intel_dvo *intel_dvo = enc_to_intel_dvo(encoder);
+	struct intel_dvo *intel_dvo = enc_to_dvo(encoder);
+	struct drm_display_mode *adjusted_mode = &pipe_config->adjusted_mode;
 
 	/* If we have timings from the BIOS for the panel, put them in
 	 * to the adjusted mode.  The CRTC will be set up for this mode,
@@ -267,23 +266,23 @@
 	}
 
 	if (intel_dvo->dev.dev_ops->mode_fixup)
-		return intel_dvo->dev.dev_ops->mode_fixup(&intel_dvo->dev, mode, adjusted_mode);
+		return intel_dvo->dev.dev_ops->mode_fixup(&intel_dvo->dev,
+							  &pipe_config->requested_mode,
+							  adjusted_mode);
 
 	return true;
 }
 
-static void intel_dvo_mode_set(struct drm_encoder *encoder,
-			       struct drm_display_mode *mode,
-			       struct drm_display_mode *adjusted_mode)
+static void intel_dvo_mode_set(struct intel_encoder *encoder)
 {
-	struct drm_device *dev = encoder->dev;
+	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
-	struct intel_dvo *intel_dvo = enc_to_intel_dvo(encoder);
-	int pipe = intel_crtc->pipe;
+	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
+	struct drm_display_mode *adjusted_mode = &crtc->config.adjusted_mode;
+	struct intel_dvo *intel_dvo = enc_to_dvo(encoder);
+	int pipe = crtc->pipe;
 	u32 dvo_val;
 	u32 dvo_reg = intel_dvo->dev.dvo_reg, dvo_srcdim_reg;
-	int dpll_reg = DPLL(pipe);
 
 	switch (dvo_reg) {
 	case DVOA:
@@ -298,7 +297,9 @@
 		break;
 	}
 
-	intel_dvo->dev.dev_ops->mode_set(&intel_dvo->dev, mode, adjusted_mode);
+	intel_dvo->dev.dev_ops->mode_set(&intel_dvo->dev,
+					 &crtc->config.requested_mode,
+					 adjusted_mode);
 
 	/* Save the data order, since I don't know what it should be set to. */
 	dvo_val = I915_READ(dvo_reg) &
@@ -314,8 +315,6 @@
 	if (adjusted_mode->flags & DRM_MODE_FLAG_PVSYNC)
 		dvo_val |= DVO_VSYNC_ACTIVE_HIGH;
 
-	I915_WRITE(dpll_reg, I915_READ(dpll_reg) | DPLL_DVO_HIGH_SPEED);
-
 	/*I915_WRITE(DVOB_SRCDIM,
 	  (adjusted_mode->hdisplay << DVO_SRCDIM_HORIZONTAL_SHIFT) |
 	  (adjusted_mode->VDisplay << DVO_SRCDIM_VERTICAL_SHIFT));*/
@@ -335,6 +334,8 @@
 intel_dvo_detect(struct drm_connector *connector, bool force)
 {
 	struct intel_dvo *intel_dvo = intel_attached_dvo(connector);
+	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
+		      connector->base.id, drm_get_connector_name(connector));
 	return intel_dvo->dev.dev_ops->detect(&intel_dvo->dev);
 }
 
@@ -372,11 +373,6 @@
 	kfree(connector);
 }
 
-static const struct drm_encoder_helper_funcs intel_dvo_helper_funcs = {
-	.mode_fixup = intel_dvo_mode_fixup,
-	.mode_set = intel_dvo_mode_set,
-};
-
 static const struct drm_connector_funcs intel_dvo_connector_funcs = {
 	.dpms = intel_dvo_dpms,
 	.detect = intel_dvo_detect,
@@ -392,7 +388,7 @@
 
 static void intel_dvo_enc_destroy(struct drm_encoder *encoder)
 {
-	struct intel_dvo *intel_dvo = enc_to_intel_dvo(encoder);
+	struct intel_dvo *intel_dvo = enc_to_dvo(to_intel_encoder(encoder));
 
 	if (intel_dvo->dev.dev_ops->destroy)
 		intel_dvo->dev.dev_ops->destroy(&intel_dvo->dev);
@@ -471,6 +467,8 @@
 	intel_encoder->enable = intel_enable_dvo;
 	intel_encoder->get_hw_state = intel_dvo_get_hw_state;
 	intel_encoder->get_config = intel_dvo_get_config;
+	intel_encoder->compute_config = intel_dvo_compute_config;
+	intel_encoder->mode_set = intel_dvo_mode_set;
 	intel_connector->get_hw_state = intel_dvo_connector_get_hw_state;
 
 	/* Now, try to find a controller */
@@ -537,9 +535,6 @@
 		connector->interlace_allowed = false;
 		connector->doublescan_allowed = false;
 
-		drm_encoder_helper_add(&intel_encoder->base,
-				       &intel_dvo_helper_funcs);
-
 		intel_connector_attach_encoder(intel_connector, intel_encoder);
 		if (dvo->type == INTEL_DVO_CHIP_LVDS) {
 			/* For our LVDS chipsets, we should hopefully be able
diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
index dff669e..bc21000 100644
--- a/drivers/gpu/drm/i915/intel_fb.c
+++ b/drivers/gpu/drm/i915/intel_fb.c
@@ -139,11 +139,11 @@
 	info->apertures->ranges[0].base = dev->mode_config.fb_base;
 	info->apertures->ranges[0].size = dev_priv->gtt.mappable_end;
 
-	info->fix.smem_start = dev->mode_config.fb_base + obj->gtt_offset;
+	info->fix.smem_start = dev->mode_config.fb_base + i915_gem_obj_ggtt_offset(obj);
 	info->fix.smem_len = size;
 
 	info->screen_base =
-		ioremap_wc(dev_priv->gtt.mappable_base + obj->gtt_offset,
+		ioremap_wc(dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj),
 			   size);
 	if (!info->screen_base) {
 		ret = -ENOSPC;
@@ -166,9 +166,9 @@
 
 	/* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */
 
-	DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08x, bo %p\n",
+	DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08lx, bo %p\n",
 		      fb->width, fb->height,
-		      obj->gtt_offset, obj);
+		      i915_gem_obj_ggtt_offset(obj), obj);
 
 
 	mutex_unlock(&dev->struct_mutex);
@@ -193,26 +193,21 @@
 static void intel_fbdev_destroy(struct drm_device *dev,
 				struct intel_fbdev *ifbdev)
 {
-	struct fb_info *info;
-	struct intel_framebuffer *ifb = &ifbdev->ifb;
-
 	if (ifbdev->helper.fbdev) {
-		info = ifbdev->helper.fbdev;
+		struct fb_info *info = ifbdev->helper.fbdev;
+
 		unregister_framebuffer(info);
 		iounmap(info->screen_base);
 		if (info->cmap.len)
 			fb_dealloc_cmap(&info->cmap);
+
 		framebuffer_release(info);
 	}
 
 	drm_fb_helper_fini(&ifbdev->helper);
 
-	drm_framebuffer_unregister_private(&ifb->base);
-	drm_framebuffer_cleanup(&ifb->base);
-	if (ifb->obj) {
-		drm_gem_object_unreference_unlocked(&ifb->obj->base);
-		ifb->obj = NULL;
-	}
+	drm_framebuffer_unregister_private(&ifbdev->ifb.base);
+	intel_framebuffer_fini(&ifbdev->ifb);
 }
 
 int intel_fbdev_init(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index 98df2a0..4148cc8 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -29,6 +29,7 @@
 #include <linux/i2c.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
+#include <linux/hdmi.h>
 #include <drm/drmP.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
@@ -66,89 +67,83 @@
 	return enc_to_intel_hdmi(&intel_attached_encoder(connector)->base);
 }
 
-void intel_dip_infoframe_csum(struct dip_infoframe *frame)
+static u32 g4x_infoframe_index(enum hdmi_infoframe_type type)
 {
-	uint8_t *data = (uint8_t *)frame;
-	uint8_t sum = 0;
-	unsigned i;
-
-	frame->checksum = 0;
-	frame->ecc = 0;
-
-	for (i = 0; i < frame->len + DIP_HEADER_SIZE; i++)
-		sum += data[i];
-
-	frame->checksum = 0x100 - sum;
-}
-
-static u32 g4x_infoframe_index(struct dip_infoframe *frame)
-{
-	switch (frame->type) {
-	case DIP_TYPE_AVI:
+	switch (type) {
+	case HDMI_INFOFRAME_TYPE_AVI:
 		return VIDEO_DIP_SELECT_AVI;
-	case DIP_TYPE_SPD:
+	case HDMI_INFOFRAME_TYPE_SPD:
 		return VIDEO_DIP_SELECT_SPD;
+	case HDMI_INFOFRAME_TYPE_VENDOR:
+		return VIDEO_DIP_SELECT_VENDOR;
 	default:
-		DRM_DEBUG_DRIVER("unknown info frame type %d\n", frame->type);
+		DRM_DEBUG_DRIVER("unknown info frame type %d\n", type);
 		return 0;
 	}
 }
 
-static u32 g4x_infoframe_enable(struct dip_infoframe *frame)
+static u32 g4x_infoframe_enable(enum hdmi_infoframe_type type)
 {
-	switch (frame->type) {
-	case DIP_TYPE_AVI:
+	switch (type) {
+	case HDMI_INFOFRAME_TYPE_AVI:
 		return VIDEO_DIP_ENABLE_AVI;
-	case DIP_TYPE_SPD:
+	case HDMI_INFOFRAME_TYPE_SPD:
 		return VIDEO_DIP_ENABLE_SPD;
+	case HDMI_INFOFRAME_TYPE_VENDOR:
+		return VIDEO_DIP_ENABLE_VENDOR;
 	default:
-		DRM_DEBUG_DRIVER("unknown info frame type %d\n", frame->type);
+		DRM_DEBUG_DRIVER("unknown info frame type %d\n", type);
 		return 0;
 	}
 }
 
-static u32 hsw_infoframe_enable(struct dip_infoframe *frame)
+static u32 hsw_infoframe_enable(enum hdmi_infoframe_type type)
 {
-	switch (frame->type) {
-	case DIP_TYPE_AVI:
+	switch (type) {
+	case HDMI_INFOFRAME_TYPE_AVI:
 		return VIDEO_DIP_ENABLE_AVI_HSW;
-	case DIP_TYPE_SPD:
+	case HDMI_INFOFRAME_TYPE_SPD:
 		return VIDEO_DIP_ENABLE_SPD_HSW;
+	case HDMI_INFOFRAME_TYPE_VENDOR:
+		return VIDEO_DIP_ENABLE_VS_HSW;
 	default:
-		DRM_DEBUG_DRIVER("unknown info frame type %d\n", frame->type);
+		DRM_DEBUG_DRIVER("unknown info frame type %d\n", type);
 		return 0;
 	}
 }
 
-static u32 hsw_infoframe_data_reg(struct dip_infoframe *frame,
+static u32 hsw_infoframe_data_reg(enum hdmi_infoframe_type type,
 				  enum transcoder cpu_transcoder)
 {
-	switch (frame->type) {
-	case DIP_TYPE_AVI:
+	switch (type) {
+	case HDMI_INFOFRAME_TYPE_AVI:
 		return HSW_TVIDEO_DIP_AVI_DATA(cpu_transcoder);
-	case DIP_TYPE_SPD:
+	case HDMI_INFOFRAME_TYPE_SPD:
 		return HSW_TVIDEO_DIP_SPD_DATA(cpu_transcoder);
+	case HDMI_INFOFRAME_TYPE_VENDOR:
+		return HSW_TVIDEO_DIP_VS_DATA(cpu_transcoder);
 	default:
-		DRM_DEBUG_DRIVER("unknown info frame type %d\n", frame->type);
+		DRM_DEBUG_DRIVER("unknown info frame type %d\n", type);
 		return 0;
 	}
 }
 
 static void g4x_write_infoframe(struct drm_encoder *encoder,
-				struct dip_infoframe *frame)
+				enum hdmi_infoframe_type type,
+				const uint8_t *frame, ssize_t len)
 {
 	uint32_t *data = (uint32_t *)frame;
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 val = I915_READ(VIDEO_DIP_CTL);
-	unsigned i, len = DIP_HEADER_SIZE + frame->len;
+	int i;
 
 	WARN(!(val & VIDEO_DIP_ENABLE), "Writing DIP with CTL reg disabled\n");
 
 	val &= ~(VIDEO_DIP_SELECT_MASK | 0xf); /* clear DIP data offset */
-	val |= g4x_infoframe_index(frame);
+	val |= g4x_infoframe_index(type);
 
-	val &= ~g4x_infoframe_enable(frame);
+	val &= ~g4x_infoframe_enable(type);
 
 	I915_WRITE(VIDEO_DIP_CTL, val);
 
@@ -162,7 +157,7 @@
 		I915_WRITE(VIDEO_DIP_DATA, 0);
 	mmiowb();
 
-	val |= g4x_infoframe_enable(frame);
+	val |= g4x_infoframe_enable(type);
 	val &= ~VIDEO_DIP_FREQ_MASK;
 	val |= VIDEO_DIP_FREQ_VSYNC;
 
@@ -171,22 +166,22 @@
 }
 
 static void ibx_write_infoframe(struct drm_encoder *encoder,
-				struct dip_infoframe *frame)
+				enum hdmi_infoframe_type type,
+				const uint8_t *frame, ssize_t len)
 {
 	uint32_t *data = (uint32_t *)frame;
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
-	int reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
-	unsigned i, len = DIP_HEADER_SIZE + frame->len;
+	int i, reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
 	u32 val = I915_READ(reg);
 
 	WARN(!(val & VIDEO_DIP_ENABLE), "Writing DIP with CTL reg disabled\n");
 
 	val &= ~(VIDEO_DIP_SELECT_MASK | 0xf); /* clear DIP data offset */
-	val |= g4x_infoframe_index(frame);
+	val |= g4x_infoframe_index(type);
 
-	val &= ~g4x_infoframe_enable(frame);
+	val &= ~g4x_infoframe_enable(type);
 
 	I915_WRITE(reg, val);
 
@@ -200,7 +195,7 @@
 		I915_WRITE(TVIDEO_DIP_DATA(intel_crtc->pipe), 0);
 	mmiowb();
 
-	val |= g4x_infoframe_enable(frame);
+	val |= g4x_infoframe_enable(type);
 	val &= ~VIDEO_DIP_FREQ_MASK;
 	val |= VIDEO_DIP_FREQ_VSYNC;
 
@@ -209,25 +204,25 @@
 }
 
 static void cpt_write_infoframe(struct drm_encoder *encoder,
-				struct dip_infoframe *frame)
+				enum hdmi_infoframe_type type,
+				const uint8_t *frame, ssize_t len)
 {
 	uint32_t *data = (uint32_t *)frame;
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
-	int reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
-	unsigned i, len = DIP_HEADER_SIZE + frame->len;
+	int i, reg = TVIDEO_DIP_CTL(intel_crtc->pipe);
 	u32 val = I915_READ(reg);
 
 	WARN(!(val & VIDEO_DIP_ENABLE), "Writing DIP with CTL reg disabled\n");
 
 	val &= ~(VIDEO_DIP_SELECT_MASK | 0xf); /* clear DIP data offset */
-	val |= g4x_infoframe_index(frame);
+	val |= g4x_infoframe_index(type);
 
 	/* The DIP control register spec says that we need to update the AVI
 	 * infoframe without clearing its enable bit */
-	if (frame->type != DIP_TYPE_AVI)
-		val &= ~g4x_infoframe_enable(frame);
+	if (type != HDMI_INFOFRAME_TYPE_AVI)
+		val &= ~g4x_infoframe_enable(type);
 
 	I915_WRITE(reg, val);
 
@@ -241,7 +236,7 @@
 		I915_WRITE(TVIDEO_DIP_DATA(intel_crtc->pipe), 0);
 	mmiowb();
 
-	val |= g4x_infoframe_enable(frame);
+	val |= g4x_infoframe_enable(type);
 	val &= ~VIDEO_DIP_FREQ_MASK;
 	val |= VIDEO_DIP_FREQ_VSYNC;
 
@@ -250,22 +245,22 @@
 }
 
 static void vlv_write_infoframe(struct drm_encoder *encoder,
-				     struct dip_infoframe *frame)
+				enum hdmi_infoframe_type type,
+				const uint8_t *frame, ssize_t len)
 {
 	uint32_t *data = (uint32_t *)frame;
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
-	int reg = VLV_TVIDEO_DIP_CTL(intel_crtc->pipe);
-	unsigned i, len = DIP_HEADER_SIZE + frame->len;
+	int i, reg = VLV_TVIDEO_DIP_CTL(intel_crtc->pipe);
 	u32 val = I915_READ(reg);
 
 	WARN(!(val & VIDEO_DIP_ENABLE), "Writing DIP with CTL reg disabled\n");
 
 	val &= ~(VIDEO_DIP_SELECT_MASK | 0xf); /* clear DIP data offset */
-	val |= g4x_infoframe_index(frame);
+	val |= g4x_infoframe_index(type);
 
-	val &= ~g4x_infoframe_enable(frame);
+	val &= ~g4x_infoframe_enable(type);
 
 	I915_WRITE(reg, val);
 
@@ -279,7 +274,7 @@
 		I915_WRITE(VLV_TVIDEO_DIP_DATA(intel_crtc->pipe), 0);
 	mmiowb();
 
-	val |= g4x_infoframe_enable(frame);
+	val |= g4x_infoframe_enable(type);
 	val &= ~VIDEO_DIP_FREQ_MASK;
 	val |= VIDEO_DIP_FREQ_VSYNC;
 
@@ -288,21 +283,24 @@
 }
 
 static void hsw_write_infoframe(struct drm_encoder *encoder,
-				struct dip_infoframe *frame)
+				enum hdmi_infoframe_type type,
+				const uint8_t *frame, ssize_t len)
 {
 	uint32_t *data = (uint32_t *)frame;
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
 	u32 ctl_reg = HSW_TVIDEO_DIP_CTL(intel_crtc->config.cpu_transcoder);
-	u32 data_reg = hsw_infoframe_data_reg(frame, intel_crtc->config.cpu_transcoder);
-	unsigned int i, len = DIP_HEADER_SIZE + frame->len;
+	u32 data_reg;
+	int i;
 	u32 val = I915_READ(ctl_reg);
 
+	data_reg = hsw_infoframe_data_reg(type,
+					  intel_crtc->config.cpu_transcoder);
 	if (data_reg == 0)
 		return;
 
-	val &= ~hsw_infoframe_enable(frame);
+	val &= ~hsw_infoframe_enable(type);
 	I915_WRITE(ctl_reg, val);
 
 	mmiowb();
@@ -315,18 +313,48 @@
 		I915_WRITE(data_reg + i, 0);
 	mmiowb();
 
-	val |= hsw_infoframe_enable(frame);
+	val |= hsw_infoframe_enable(type);
 	I915_WRITE(ctl_reg, val);
 	POSTING_READ(ctl_reg);
 }
 
-static void intel_set_infoframe(struct drm_encoder *encoder,
-				struct dip_infoframe *frame)
+/*
+ * The data we write to the DIP data buffer registers is 1 byte bigger than the
+ * HDMI infoframe size because of an ECC/reserved byte at position 3 (starting
+ * at 0). It's also a byte used by DisplayPort so the same DIP registers can be
+ * used for both technologies.
+ *
+ * DW0: Reserved/ECC/DP | HB2 | HB1 | HB0
+ * DW1:       DB3       | DB2 | DB1 | DB0
+ * DW2:       DB7       | DB6 | DB5 | DB4
+ * DW3: ...
+ *
+ * (HB is Header Byte, DB is Data Byte)
+ *
+ * The hdmi pack() functions don't know about that hardware specific hole so we
+ * trick them by giving an offset into the buffer and moving back the header
+ * bytes by one.
+ */
+static void intel_write_infoframe(struct drm_encoder *encoder,
+				  union hdmi_infoframe *frame)
 {
 	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
+	uint8_t buffer[VIDEO_DIP_DATA_SIZE];
+	ssize_t len;
 
-	intel_dip_infoframe_csum(frame);
-	intel_hdmi->write_infoframe(encoder, frame);
+	/* see comment above for the reason for this offset */
+	len = hdmi_infoframe_pack(frame, buffer + 1, sizeof(buffer) - 1);
+	if (len < 0)
+		return;
+
+	/* Insert the 'hole' (see big comment above) at position 3 */
+	buffer[0] = buffer[1];
+	buffer[1] = buffer[2];
+	buffer[2] = buffer[3];
+	buffer[3] = 0;
+	len++;
+
+	intel_hdmi->write_infoframe(encoder, frame->any.type, buffer, len);
 }
 
 static void intel_hdmi_set_avi_infoframe(struct drm_encoder *encoder,
@@ -334,40 +362,57 @@
 {
 	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
-	struct dip_infoframe avi_if = {
-		.type = DIP_TYPE_AVI,
-		.ver = DIP_VERSION_AVI,
-		.len = DIP_LEN_AVI,
-	};
+	union hdmi_infoframe frame;
+	int ret;
 
-	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK)
-		avi_if.body.avi.YQ_CN_PR |= DIP_AVI_PR_2;
+	ret = drm_hdmi_avi_infoframe_from_display_mode(&frame.avi,
+						       adjusted_mode);
+	if (ret < 0) {
+		DRM_ERROR("couldn't fill AVI infoframe\n");
+		return;
+	}
 
 	if (intel_hdmi->rgb_quant_range_selectable) {
 		if (intel_crtc->config.limited_color_range)
-			avi_if.body.avi.ITC_EC_Q_SC |= DIP_AVI_RGB_QUANT_RANGE_LIMITED;
+			frame.avi.quantization_range =
+				HDMI_QUANTIZATION_RANGE_LIMITED;
 		else
-			avi_if.body.avi.ITC_EC_Q_SC |= DIP_AVI_RGB_QUANT_RANGE_FULL;
+			frame.avi.quantization_range =
+				HDMI_QUANTIZATION_RANGE_FULL;
 	}
 
-	avi_if.body.avi.VIC = drm_match_cea_mode(adjusted_mode);
-
-	intel_set_infoframe(encoder, &avi_if);
+	intel_write_infoframe(encoder, &frame);
 }
 
 static void intel_hdmi_set_spd_infoframe(struct drm_encoder *encoder)
 {
-	struct dip_infoframe spd_if;
+	union hdmi_infoframe frame;
+	int ret;
 
-	memset(&spd_if, 0, sizeof(spd_if));
-	spd_if.type = DIP_TYPE_SPD;
-	spd_if.ver = DIP_VERSION_SPD;
-	spd_if.len = DIP_LEN_SPD;
-	strcpy(spd_if.body.spd.vn, "Intel");
-	strcpy(spd_if.body.spd.pd, "Integrated gfx");
-	spd_if.body.spd.sdi = DIP_SPD_PC;
+	ret = hdmi_spd_infoframe_init(&frame.spd, "Intel", "Integrated gfx");
+	if (ret < 0) {
+		DRM_ERROR("couldn't fill SPD infoframe\n");
+		return;
+	}
 
-	intel_set_infoframe(encoder, &spd_if);
+	frame.spd.sdi = HDMI_SPD_SDI_PC;
+
+	intel_write_infoframe(encoder, &frame);
+}
+
+static void
+intel_hdmi_set_hdmi_infoframe(struct drm_encoder *encoder,
+			      struct drm_display_mode *adjusted_mode)
+{
+	union hdmi_infoframe frame;
+	int ret;
+
+	ret = drm_hdmi_vendor_infoframe_from_display_mode(&frame.vendor.hdmi,
+							  adjusted_mode);
+	if (ret < 0)
+		return;
+
+	intel_write_infoframe(encoder, &frame);
 }
 
 static void g4x_set_infoframes(struct drm_encoder *encoder,
@@ -432,6 +477,7 @@
 
 	intel_hdmi_set_avi_infoframe(encoder, adjusted_mode);
 	intel_hdmi_set_spd_infoframe(encoder);
+	intel_hdmi_set_hdmi_infoframe(encoder, adjusted_mode);
 }
 
 static void ibx_set_infoframes(struct drm_encoder *encoder,
@@ -493,6 +539,7 @@
 
 	intel_hdmi_set_avi_infoframe(encoder, adjusted_mode);
 	intel_hdmi_set_spd_infoframe(encoder);
+	intel_hdmi_set_hdmi_infoframe(encoder, adjusted_mode);
 }
 
 static void cpt_set_infoframes(struct drm_encoder *encoder,
@@ -528,6 +575,7 @@
 
 	intel_hdmi_set_avi_infoframe(encoder, adjusted_mode);
 	intel_hdmi_set_spd_infoframe(encoder);
+	intel_hdmi_set_hdmi_infoframe(encoder, adjusted_mode);
 }
 
 static void vlv_set_infoframes(struct drm_encoder *encoder,
@@ -562,6 +610,7 @@
 
 	intel_hdmi_set_avi_infoframe(encoder, adjusted_mode);
 	intel_hdmi_set_spd_infoframe(encoder);
+	intel_hdmi_set_hdmi_infoframe(encoder, adjusted_mode);
 }
 
 static void hsw_set_infoframes(struct drm_encoder *encoder,
@@ -589,16 +638,16 @@
 
 	intel_hdmi_set_avi_infoframe(encoder, adjusted_mode);
 	intel_hdmi_set_spd_infoframe(encoder);
+	intel_hdmi_set_hdmi_infoframe(encoder, adjusted_mode);
 }
 
-static void intel_hdmi_mode_set(struct drm_encoder *encoder,
-				struct drm_display_mode *mode,
-				struct drm_display_mode *adjusted_mode)
+static void intel_hdmi_mode_set(struct intel_encoder *encoder)
 {
-	struct drm_device *dev = encoder->dev;
+	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->crtc);
-	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder);
+	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct drm_display_mode *adjusted_mode = &crtc->config.adjusted_mode;
 	u32 hdmi_val;
 
 	hdmi_val = SDVO_ENCODING_HDMI;
@@ -609,7 +658,7 @@
 	if (adjusted_mode->flags & DRM_MODE_FLAG_PHSYNC)
 		hdmi_val |= SDVO_HSYNC_ACTIVE_HIGH;
 
-	if (intel_crtc->config.pipe_bpp > 24)
+	if (crtc->config.pipe_bpp > 24)
 		hdmi_val |= HDMI_COLOR_FORMAT_12bpc;
 	else
 		hdmi_val |= SDVO_COLOR_FORMAT_8bpc;
@@ -620,21 +669,21 @@
 
 	if (intel_hdmi->has_audio) {
 		DRM_DEBUG_DRIVER("Enabling HDMI audio on pipe %c\n",
-				 pipe_name(intel_crtc->pipe));
+				 pipe_name(crtc->pipe));
 		hdmi_val |= SDVO_AUDIO_ENABLE;
 		hdmi_val |= HDMI_MODE_SELECT_HDMI;
-		intel_write_eld(encoder, adjusted_mode);
+		intel_write_eld(&encoder->base, adjusted_mode);
 	}
 
 	if (HAS_PCH_CPT(dev))
-		hdmi_val |= SDVO_PIPE_SEL_CPT(intel_crtc->pipe);
+		hdmi_val |= SDVO_PIPE_SEL_CPT(crtc->pipe);
 	else
-		hdmi_val |= SDVO_PIPE_SEL(intel_crtc->pipe);
+		hdmi_val |= SDVO_PIPE_SEL(crtc->pipe);
 
 	I915_WRITE(intel_hdmi->hdmi_reg, hdmi_val);
 	POSTING_READ(intel_hdmi->hdmi_reg);
 
-	intel_hdmi->set_infoframes(encoder, adjusted_mode);
+	intel_hdmi->set_infoframes(&encoder->base, adjusted_mode);
 }
 
 static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder,
@@ -719,14 +768,10 @@
 		I915_WRITE(intel_hdmi->hdmi_reg, temp);
 		POSTING_READ(intel_hdmi->hdmi_reg);
 	}
+}
 
-	if (IS_VALLEYVIEW(dev)) {
-		struct intel_digital_port *dport =
-			enc_to_dig_port(&encoder->base);
-		int channel = vlv_dport_to_channel(dport);
-
-		vlv_wait_port_ready(dev_priv, channel);
-	}
+static void vlv_enable_hdmi(struct intel_encoder *encoder)
+{
 }
 
 static void intel_disable_hdmi(struct intel_encoder *encoder)
@@ -785,10 +830,22 @@
 	}
 }
 
+static int hdmi_portclock_limit(struct intel_hdmi *hdmi)
+{
+	struct drm_device *dev = intel_hdmi_to_dev(hdmi);
+
+	if (IS_G4X(dev))
+		return 165000;
+	else if (IS_HASWELL(dev))
+		return 300000;
+	else
+		return 225000;
+}
+
 static int intel_hdmi_mode_valid(struct drm_connector *connector,
 				 struct drm_display_mode *mode)
 {
-	if (mode->clock > 165000)
+	if (mode->clock > hdmi_portclock_limit(intel_attached_hdmi(connector)))
 		return MODE_CLOCK_HIGH;
 	if (mode->clock < 20000)
 		return MODE_CLOCK_LOW;
@@ -806,6 +863,7 @@
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_display_mode *adjusted_mode = &pipe_config->adjusted_mode;
 	int clock_12bpc = pipe_config->requested_mode.clock * 3 / 2;
+	int portclock_limit = hdmi_portclock_limit(intel_hdmi);
 	int desired_bpp;
 
 	if (intel_hdmi->color_range_auto) {
@@ -829,7 +887,7 @@
 	 * outputs. We also need to check that the higher clock still fits
 	 * within limits.
 	 */
-	if (pipe_config->pipe_bpp > 8*3 && clock_12bpc <= 225000
+	if (pipe_config->pipe_bpp > 8*3 && clock_12bpc <= portclock_limit
 	    && HAS_PCH_SPLIT(dev)) {
 		DRM_DEBUG_KMS("picking bpc to 12 for HDMI output\n");
 		desired_bpp = 12*3;
@@ -846,7 +904,7 @@
 		pipe_config->pipe_bpp = desired_bpp;
 	}
 
-	if (adjusted_mode->clock > 225000) {
+	if (adjusted_mode->clock > portclock_limit) {
 		DRM_DEBUG_KMS("too high HDMI clock, rejecting mode\n");
 		return false;
 	}
@@ -866,6 +924,9 @@
 	struct edid *edid;
 	enum drm_connector_status status = connector_status_disconnected;
 
+	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
+		      connector->base.id, drm_get_connector_name(connector));
+
 	intel_hdmi->has_hdmi_sink = false;
 	intel_hdmi->has_audio = false;
 	intel_hdmi->rgb_quant_range_selectable = false;
@@ -1017,6 +1078,7 @@
 		return;
 
 	/* Enable clock channels for this port */
+	mutex_lock(&dev_priv->dpio_lock);
 	val = vlv_dpio_read(dev_priv, DPIO_DATA_LANE_A(port));
 	val = 0;
 	if (pipe)
@@ -1047,6 +1109,11 @@
 			 0x00760018);
 	vlv_dpio_write(dev_priv, DPIO_PCS_CLOCKBUF8(port),
 			 0x00400888);
+	mutex_unlock(&dev_priv->dpio_lock);
+
+	intel_enable_hdmi(encoder);
+
+	vlv_wait_port_ready(dev_priv, port);
 }
 
 static void intel_hdmi_pre_pll_enable(struct intel_encoder *encoder)
@@ -1060,6 +1127,7 @@
 		return;
 
 	/* Program Tx lane resets to default */
+	mutex_lock(&dev_priv->dpio_lock);
 	vlv_dpio_write(dev_priv, DPIO_PCS_TX(port),
 			 DPIO_PCS_TX_LANE2_RESET |
 			 DPIO_PCS_TX_LANE1_RESET);
@@ -1078,6 +1146,7 @@
 			 0x00002000);
 	vlv_dpio_write(dev_priv, DPIO_TX_OCALINIT(port),
 			 DPIO_TX_OCALINIT_EN);
+	mutex_unlock(&dev_priv->dpio_lock);
 }
 
 static void intel_hdmi_post_disable(struct intel_encoder *encoder)
@@ -1100,10 +1169,6 @@
 	kfree(connector);
 }
 
-static const struct drm_encoder_helper_funcs intel_hdmi_helper_funcs = {
-	.mode_set = intel_hdmi_mode_set,
-};
-
 static const struct drm_connector_funcs intel_hdmi_connector_funcs = {
 	.dpms = intel_connector_dpms,
 	.detect = intel_hdmi_detect,
@@ -1208,7 +1273,6 @@
 {
 	struct intel_digital_port *intel_dig_port;
 	struct intel_encoder *intel_encoder;
-	struct drm_encoder *encoder;
 	struct intel_connector *intel_connector;
 
 	intel_dig_port = kzalloc(sizeof(struct intel_digital_port), GFP_KERNEL);
@@ -1222,21 +1286,22 @@
 	}
 
 	intel_encoder = &intel_dig_port->base;
-	encoder = &intel_encoder->base;
 
 	drm_encoder_init(dev, &intel_encoder->base, &intel_hdmi_enc_funcs,
 			 DRM_MODE_ENCODER_TMDS);
-	drm_encoder_helper_add(&intel_encoder->base, &intel_hdmi_helper_funcs);
 
 	intel_encoder->compute_config = intel_hdmi_compute_config;
-	intel_encoder->enable = intel_enable_hdmi;
+	intel_encoder->mode_set = intel_hdmi_mode_set;
 	intel_encoder->disable = intel_disable_hdmi;
 	intel_encoder->get_hw_state = intel_hdmi_get_hw_state;
 	intel_encoder->get_config = intel_hdmi_get_config;
 	if (IS_VALLEYVIEW(dev)) {
-		intel_encoder->pre_enable = intel_hdmi_pre_enable;
 		intel_encoder->pre_pll_enable = intel_hdmi_pre_pll_enable;
+		intel_encoder->pre_enable = intel_hdmi_pre_enable;
+		intel_encoder->enable = vlv_enable_hdmi;
 		intel_encoder->post_disable = intel_hdmi_post_disable;
+	} else {
+		intel_encoder->enable = intel_enable_hdmi;
 	}
 
 	intel_encoder->type = INTEL_OUTPUT_HDMI;
diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
index 639fe19..d1c1e0f7 100644
--- a/drivers/gpu/drm/i915/intel_i2c.c
+++ b/drivers/gpu/drm/i915/intel_i2c.c
@@ -398,6 +398,7 @@
 	int i, reg_offset;
 	int ret = 0;
 
+	intel_aux_display_runtime_get(dev_priv);
 	mutex_lock(&dev_priv->gmbus_mutex);
 
 	if (bus->force_bit) {
@@ -497,6 +498,7 @@
 
 out:
 	mutex_unlock(&dev_priv->gmbus_mutex);
+	intel_aux_display_runtime_put(dev_priv);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 021e8da..4d33278 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -109,23 +109,38 @@
 		flags |= DRM_MODE_FLAG_PVSYNC;
 
 	pipe_config->adjusted_mode.flags |= flags;
+
+	/* gen2/3 store dither state in pfit control, needs to match */
+	if (INTEL_INFO(dev)->gen < 4) {
+		tmp = I915_READ(PFIT_CONTROL);
+
+		pipe_config->gmch_pfit.control |= tmp & PANEL_8TO6_DITHER_ENABLE;
+	}
 }
 
 /* The LVDS pin pair needs to be on before the DPLLs are enabled.
  * This is an exception to the general rule that mode_set doesn't turn
  * things on.
  */
-static void intel_pre_pll_enable_lvds(struct intel_encoder *encoder)
+static void intel_pre_enable_lvds(struct intel_encoder *encoder)
 {
 	struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base);
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc);
+	struct intel_crtc *crtc = to_intel_crtc(encoder->base.crtc);
 	struct drm_display_mode *fixed_mode =
 		lvds_encoder->attached_connector->base.panel.fixed_mode;
-	int pipe = intel_crtc->pipe;
+	int pipe = crtc->pipe;
 	u32 temp;
 
+	if (HAS_PCH_SPLIT(dev)) {
+		assert_fdi_rx_pll_disabled(dev_priv, pipe);
+		assert_shared_dpll_disabled(dev_priv,
+					    intel_crtc_to_shared_dpll(crtc));
+	} else {
+		assert_pll_disabled(dev_priv, pipe);
+	}
+
 	temp = I915_READ(lvds_encoder->reg);
 	temp |= LVDS_PORT_EN | LVDS_A0A2_CLKA_POWER_UP;
 
@@ -142,7 +157,7 @@
 
 	/* set the corresponsding LVDS_BORDER bit */
 	temp &= ~LVDS_BORDER_ENABLE;
-	temp |= intel_crtc->config.gmch_pfit.lvds_border_bits;
+	temp |= crtc->config.gmch_pfit.lvds_border_bits;
 	/* Set the B0-B3 data pairs corresponding to whether we're going to
 	 * set the DPLLs for dual-channel mode or not.
 	 */
@@ -162,8 +177,7 @@
 	if (INTEL_INFO(dev)->gen == 4) {
 		/* Bspec wording suggests that LVDS port dithering only exists
 		 * for 18bpp panels. */
-		if (intel_crtc->config.dither &&
-		    intel_crtc->config.pipe_bpp == 18)
+		if (crtc->config.dither && crtc->config.pipe_bpp == 18)
 			temp |= LVDS_ENABLE_DITHER;
 		else
 			temp &= ~LVDS_ENABLE_DITHER;
@@ -290,14 +304,11 @@
 
 		intel_pch_panel_fitting(intel_crtc, pipe_config,
 					intel_connector->panel.fitting_mode);
-		return true;
 	} else {
 		intel_gmch_panel_fitting(intel_crtc, pipe_config,
 					 intel_connector->panel.fitting_mode);
-	}
 
-	drm_mode_set_crtcinfo(adjusted_mode, 0);
-	pipe_config->timings_set = true;
+	}
 
 	/*
 	 * XXX: It would be nice to support lower refresh rates on the
@@ -308,14 +319,12 @@
 	return true;
 }
 
-static void intel_lvds_mode_set(struct drm_encoder *encoder,
-				struct drm_display_mode *mode,
-				struct drm_display_mode *adjusted_mode)
+static void intel_lvds_mode_set(struct intel_encoder *encoder)
 {
 	/*
-	 * The LVDS pin pair will already have been turned on in the
-	 * intel_crtc_mode_set since it has a large impact on the DPLL
-	 * settings.
+	 * We don't do anything here, the LVDS port is fully set up in the pre
+	 * enable hook - the ordering constraints for enabling the lvds port vs.
+	 * enabling the display pll are too strict.
 	 */
 }
 
@@ -332,6 +341,9 @@
 	struct drm_device *dev = connector->dev;
 	enum drm_connector_status status;
 
+	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
+		      connector->base.id, drm_get_connector_name(connector));
+
 	status = intel_panel_detect(dev);
 	if (status != connector_status_unknown)
 		return status;
@@ -493,10 +505,6 @@
 	return 0;
 }
 
-static const struct drm_encoder_helper_funcs intel_lvds_helper_funcs = {
-	.mode_set = intel_lvds_mode_set,
-};
-
 static const struct drm_connector_helper_funcs intel_lvds_connector_helper_funcs = {
 	.get_modes = intel_lvds_get_modes,
 	.mode_valid = intel_lvds_mode_valid,
@@ -955,8 +963,9 @@
 			 DRM_MODE_ENCODER_LVDS);
 
 	intel_encoder->enable = intel_enable_lvds;
-	intel_encoder->pre_pll_enable = intel_pre_pll_enable_lvds;
+	intel_encoder->pre_enable = intel_pre_enable_lvds;
 	intel_encoder->compute_config = intel_lvds_compute_config;
+	intel_encoder->mode_set = intel_lvds_mode_set;
 	intel_encoder->disable = intel_disable_lvds;
 	intel_encoder->get_hw_state = intel_lvds_get_hw_state;
 	intel_encoder->get_config = intel_lvds_get_config;
@@ -973,7 +982,6 @@
 	else
 		intel_encoder->crtc_mask = (1 << 1);
 
-	drm_encoder_helper_add(encoder, &intel_lvds_helper_funcs);
 	drm_connector_helper_add(connector, &intel_lvds_connector_helper_funcs);
 	connector->display_info.subpixel_order = SubPixelHorizontalRGB;
 	connector->interlace_allowed = false;
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index a369881..ddfd0ae 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -196,7 +196,7 @@
 		regs = (struct overlay_registers __iomem *)overlay->reg_bo->phys_obj->handle->vaddr;
 	else
 		regs = io_mapping_map_wc(dev_priv->gtt.mappable,
-					 overlay->reg_bo->gtt_offset);
+					 i915_gem_obj_ggtt_offset(overlay->reg_bo));
 
 	return regs;
 }
@@ -740,7 +740,7 @@
 	swidth = params->src_w;
 	swidthsw = calc_swidthsw(overlay->dev, params->offset_Y, tmp_width);
 	sheight = params->src_h;
-	iowrite32(new_bo->gtt_offset + params->offset_Y, &regs->OBUF_0Y);
+	iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_Y, &regs->OBUF_0Y);
 	ostride = params->stride_Y;
 
 	if (params->format & I915_OVERLAY_YUV_PLANAR) {
@@ -754,8 +754,8 @@
 				      params->src_w/uv_hscale);
 		swidthsw |= max_t(u32, tmp_U, tmp_V) << 16;
 		sheight |= (params->src_h/uv_vscale) << 16;
-		iowrite32(new_bo->gtt_offset + params->offset_U, &regs->OBUF_0U);
-		iowrite32(new_bo->gtt_offset + params->offset_V, &regs->OBUF_0V);
+		iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_U, &regs->OBUF_0U);
+		iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_V, &regs->OBUF_0V);
 		ostride |= params->stride_UV << 16;
 	}
 
@@ -1333,7 +1333,9 @@
 
 	overlay->dev = dev;
 
-	reg_bo = i915_gem_object_create_stolen(dev, PAGE_SIZE);
+	reg_bo = NULL;
+	if (!OVERLAY_NEEDS_PHYSICAL(dev))
+		reg_bo = i915_gem_object_create_stolen(dev, PAGE_SIZE);
 	if (reg_bo == NULL)
 		reg_bo = i915_gem_alloc_object(dev, PAGE_SIZE);
 	if (reg_bo == NULL)
@@ -1350,12 +1352,12 @@
 		}
 		overlay->flip_addr = reg_bo->phys_obj->handle->busaddr;
 	} else {
-		ret = i915_gem_object_pin(reg_bo, PAGE_SIZE, true, false);
+		ret = i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, true, false);
 		if (ret) {
 			DRM_ERROR("failed to pin overlay register bo\n");
 			goto out_free_bo;
 		}
-		overlay->flip_addr = reg_bo->gtt_offset;
+		overlay->flip_addr = i915_gem_obj_ggtt_offset(reg_bo);
 
 		ret = i915_gem_object_set_to_gtt_domain(reg_bo, true);
 		if (ret) {
@@ -1412,9 +1414,6 @@
 	kfree(dev_priv->overlay);
 }
 
-#ifdef CONFIG_DEBUG_FS
-#include <linux/seq_file.h>
-
 struct intel_overlay_error_state {
 	struct overlay_registers regs;
 	unsigned long base;
@@ -1435,7 +1434,7 @@
 			overlay->reg_bo->phys_obj->handle->vaddr;
 	else
 		regs = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
-						overlay->reg_bo->gtt_offset);
+						i915_gem_obj_ggtt_offset(overlay->reg_bo));
 
 	return regs;
 }
@@ -1468,7 +1467,7 @@
 	if (OVERLAY_NEEDS_PHYSICAL(overlay->dev))
 		error->base = (__force long)overlay->reg_bo->phys_obj->handle->vaddr;
 	else
-		error->base = overlay->reg_bo->gtt_offset;
+		error->base = i915_gem_obj_ggtt_offset(overlay->reg_bo);
 
 	regs = intel_overlay_map_regs_atomic(overlay);
 	if (!regs)
@@ -1537,4 +1536,3 @@
 	P(UVSCALEV);
 #undef P
 }
-#endif
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index 80bea1d..a43c33b 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -494,8 +494,11 @@
 		goto out;
 	}
 
-	/* scale to hardware */
-	level = level * freq / max;
+	/* scale to hardware, but be careful to not overflow */
+	if (freq < max)
+		level = level * freq / max;
+	else
+		level = freq / max * level;
 
 	dev_priv->backlight.level = level;
 	if (dev_priv->backlight.device)
@@ -512,6 +515,17 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	unsigned long flags;
 
+	/*
+	 * Do not disable backlight on the vgaswitcheroo path. When switching
+	 * away from i915, the other client may depend on i915 to handle the
+	 * backlight. This will leave the backlight on unnecessarily when
+	 * another client is not activated.
+	 */
+	if (dev->switch_power_state == DRM_SWITCH_POWER_CHANGING) {
+		DRM_DEBUG_DRIVER("Skipping backlight disable on vga switch\n");
+		return;
+	}
+
 	spin_lock_irqsave(&dev_priv->backlight.lock, flags);
 
 	dev_priv->backlight.enabled = false;
@@ -580,7 +594,8 @@
 		POSTING_READ(reg);
 		I915_WRITE(reg, tmp | BLM_PWM_ENABLE);
 
-		if (HAS_PCH_SPLIT(dev)) {
+		if (HAS_PCH_SPLIT(dev) &&
+		    !(dev_priv->quirks & QUIRK_NO_PCH_PWM_ENABLE)) {
 			tmp = I915_READ(BLC_PWM_PCH_CTL1);
 			tmp |= BLM_PCH_PWM_ENABLE;
 			tmp &= ~BLM_PCH_OVERRIDE_ENABLE;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index d10e673..4605682 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -30,8 +30,7 @@
 #include "intel_drv.h"
 #include "../../../platform/x86/intel_ips.h"
 #include <linux/module.h>
-
-#define FORCEWAKE_ACK_TIMEOUT_MS 2
+#include <drm/i915_powerwell.h>
 
 /* FBC, or Frame Buffer Compression, is a technique employed to compress the
  * framebuffer contents in-memory, aiming at reducing the required bandwidth
@@ -86,7 +85,7 @@
 	int plane, i;
 	u32 fbc_ctl, fbc_ctl2;
 
-	cfb_pitch = dev_priv->cfb_size / FBC_LL_SIZE;
+	cfb_pitch = dev_priv->fbc.size / FBC_LL_SIZE;
 	if (fb->pitches[0] < cfb_pitch)
 		cfb_pitch = fb->pitches[0];
 
@@ -217,7 +216,7 @@
 		   (stall_watermark << DPFC_RECOMP_STALL_WM_SHIFT) |
 		   (interval << DPFC_RECOMP_TIMER_COUNT_SHIFT));
 	I915_WRITE(ILK_DPFC_FENCE_YOFF, crtc->y);
-	I915_WRITE(ILK_FBC_RT_BASE, obj->gtt_offset | ILK_FBC_RT_VALID);
+	I915_WRITE(ILK_FBC_RT_BASE, i915_gem_obj_ggtt_offset(obj) | ILK_FBC_RT_VALID);
 	/* enable it... */
 	I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
 
@@ -274,7 +273,7 @@
 	struct drm_i915_gem_object *obj = intel_fb->obj;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 
-	I915_WRITE(IVB_FBC_RT_BASE, obj->gtt_offset);
+	I915_WRITE(IVB_FBC_RT_BASE, i915_gem_obj_ggtt_offset(obj));
 
 	I915_WRITE(ILK_DPFC_CONTROL, DPFC_CTL_EN | DPFC_CTL_LIMIT_1X |
 		   IVB_DPFC_CTL_FENCE_EN |
@@ -325,7 +324,7 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	mutex_lock(&dev->struct_mutex);
-	if (work == dev_priv->fbc_work) {
+	if (work == dev_priv->fbc.fbc_work) {
 		/* Double check that we haven't switched fb without cancelling
 		 * the prior work.
 		 */
@@ -333,12 +332,12 @@
 			dev_priv->display.enable_fbc(work->crtc,
 						     work->interval);
 
-			dev_priv->cfb_plane = to_intel_crtc(work->crtc)->plane;
-			dev_priv->cfb_fb = work->crtc->fb->base.id;
-			dev_priv->cfb_y = work->crtc->y;
+			dev_priv->fbc.plane = to_intel_crtc(work->crtc)->plane;
+			dev_priv->fbc.fb_id = work->crtc->fb->base.id;
+			dev_priv->fbc.y = work->crtc->y;
 		}
 
-		dev_priv->fbc_work = NULL;
+		dev_priv->fbc.fbc_work = NULL;
 	}
 	mutex_unlock(&dev->struct_mutex);
 
@@ -347,28 +346,28 @@
 
 static void intel_cancel_fbc_work(struct drm_i915_private *dev_priv)
 {
-	if (dev_priv->fbc_work == NULL)
+	if (dev_priv->fbc.fbc_work == NULL)
 		return;
 
 	DRM_DEBUG_KMS("cancelling pending FBC enable\n");
 
 	/* Synchronisation is provided by struct_mutex and checking of
-	 * dev_priv->fbc_work, so we can perform the cancellation
+	 * dev_priv->fbc.fbc_work, so we can perform the cancellation
 	 * entirely asynchronously.
 	 */
-	if (cancel_delayed_work(&dev_priv->fbc_work->work))
+	if (cancel_delayed_work(&dev_priv->fbc.fbc_work->work))
 		/* tasklet was killed before being run, clean up */
-		kfree(dev_priv->fbc_work);
+		kfree(dev_priv->fbc.fbc_work);
 
 	/* Mark the work as no longer wanted so that if it does
 	 * wake-up (because the work was already running and waiting
 	 * for our mutex), it will discover that is no longer
 	 * necessary to run.
 	 */
-	dev_priv->fbc_work = NULL;
+	dev_priv->fbc.fbc_work = NULL;
 }
 
-void intel_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
+static void intel_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
 {
 	struct intel_fbc_work *work;
 	struct drm_device *dev = crtc->dev;
@@ -381,6 +380,7 @@
 
 	work = kzalloc(sizeof *work, GFP_KERNEL);
 	if (work == NULL) {
+		DRM_ERROR("Failed to allocate FBC work structure\n");
 		dev_priv->display.enable_fbc(crtc, interval);
 		return;
 	}
@@ -390,9 +390,7 @@
 	work->interval = interval;
 	INIT_DELAYED_WORK(&work->work, intel_fbc_work_fn);
 
-	dev_priv->fbc_work = work;
-
-	DRM_DEBUG_KMS("scheduling delayed FBC enable\n");
+	dev_priv->fbc.fbc_work = work;
 
 	/* Delay the actual enabling to let pageflipping cease and the
 	 * display to settle before starting the compression. Note that
@@ -404,6 +402,8 @@
 	 * following the termination of the page-flipping sequence
 	 * and indeed performing the enable as a co-routine and not
 	 * waiting synchronously upon the vblank.
+	 *
+	 * WaFbcWaitForVBlankBeforeEnable:ilk,snb
 	 */
 	schedule_delayed_work(&work->work, msecs_to_jiffies(50));
 }
@@ -418,7 +418,17 @@
 		return;
 
 	dev_priv->display.disable_fbc(dev);
-	dev_priv->cfb_plane = -1;
+	dev_priv->fbc.plane = -1;
+}
+
+static bool set_no_fbc_reason(struct drm_i915_private *dev_priv,
+			      enum no_fbc_reason reason)
+{
+	if (dev_priv->fbc.no_fbc_reason == reason)
+		return false;
+
+	dev_priv->fbc.no_fbc_reason = reason;
+	return true;
 }
 
 /**
@@ -448,14 +458,18 @@
 	struct drm_framebuffer *fb;
 	struct intel_framebuffer *intel_fb;
 	struct drm_i915_gem_object *obj;
-	int enable_fbc;
 	unsigned int max_hdisplay, max_vdisplay;
 
-	if (!i915_powersave)
+	if (!I915_HAS_FBC(dev)) {
+		set_no_fbc_reason(dev_priv, FBC_UNSUPPORTED);
 		return;
+	}
 
-	if (!I915_HAS_FBC(dev))
+	if (!i915_powersave) {
+		if (set_no_fbc_reason(dev_priv, FBC_MODULE_PARAM))
+			DRM_DEBUG_KMS("fbc disabled per module param\n");
 		return;
+	}
 
 	/*
 	 * If FBC is already on, we just have to verify that we can
@@ -470,8 +484,8 @@
 		if (intel_crtc_active(tmp_crtc) &&
 		    !to_intel_crtc(tmp_crtc)->primary_disabled) {
 			if (crtc) {
-				DRM_DEBUG_KMS("more than one pipe active, disabling compression\n");
-				dev_priv->no_fbc_reason = FBC_MULTIPLE_PIPES;
+				if (set_no_fbc_reason(dev_priv, FBC_MULTIPLE_PIPES))
+					DRM_DEBUG_KMS("more than one pipe active, disabling compression\n");
 				goto out_disable;
 			}
 			crtc = tmp_crtc;
@@ -479,8 +493,8 @@
 	}
 
 	if (!crtc || crtc->fb == NULL) {
-		DRM_DEBUG_KMS("no output, disabling\n");
-		dev_priv->no_fbc_reason = FBC_NO_OUTPUT;
+		if (set_no_fbc_reason(dev_priv, FBC_NO_OUTPUT))
+			DRM_DEBUG_KMS("no output, disabling\n");
 		goto out_disable;
 	}
 
@@ -489,23 +503,22 @@
 	intel_fb = to_intel_framebuffer(fb);
 	obj = intel_fb->obj;
 
-	enable_fbc = i915_enable_fbc;
-	if (enable_fbc < 0) {
-		DRM_DEBUG_KMS("fbc set to per-chip default\n");
-		enable_fbc = 1;
-		if (INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev))
-			enable_fbc = 0;
+	if (i915_enable_fbc < 0 &&
+	    INTEL_INFO(dev)->gen <= 7 && !IS_HASWELL(dev)) {
+		if (set_no_fbc_reason(dev_priv, FBC_CHIP_DEFAULT))
+			DRM_DEBUG_KMS("disabled per chip default\n");
+		goto out_disable;
 	}
-	if (!enable_fbc) {
-		DRM_DEBUG_KMS("fbc disabled per module param\n");
-		dev_priv->no_fbc_reason = FBC_MODULE_PARAM;
+	if (!i915_enable_fbc) {
+		if (set_no_fbc_reason(dev_priv, FBC_MODULE_PARAM))
+			DRM_DEBUG_KMS("fbc disabled per module param\n");
 		goto out_disable;
 	}
 	if ((crtc->mode.flags & DRM_MODE_FLAG_INTERLACE) ||
 	    (crtc->mode.flags & DRM_MODE_FLAG_DBLSCAN)) {
-		DRM_DEBUG_KMS("mode incompatible with compression, "
-			      "disabling\n");
-		dev_priv->no_fbc_reason = FBC_UNSUPPORTED_MODE;
+		if (set_no_fbc_reason(dev_priv, FBC_UNSUPPORTED_MODE))
+			DRM_DEBUG_KMS("mode incompatible with compression, "
+				      "disabling\n");
 		goto out_disable;
 	}
 
@@ -518,14 +531,14 @@
 	}
 	if ((crtc->mode.hdisplay > max_hdisplay) ||
 	    (crtc->mode.vdisplay > max_vdisplay)) {
-		DRM_DEBUG_KMS("mode too large for compression, disabling\n");
-		dev_priv->no_fbc_reason = FBC_MODE_TOO_LARGE;
+		if (set_no_fbc_reason(dev_priv, FBC_MODE_TOO_LARGE))
+			DRM_DEBUG_KMS("mode too large for compression, disabling\n");
 		goto out_disable;
 	}
 	if ((IS_I915GM(dev) || IS_I945GM(dev) || IS_HASWELL(dev)) &&
 	    intel_crtc->plane != 0) {
-		DRM_DEBUG_KMS("plane not 0, disabling compression\n");
-		dev_priv->no_fbc_reason = FBC_BAD_PLANE;
+		if (set_no_fbc_reason(dev_priv, FBC_BAD_PLANE))
+			DRM_DEBUG_KMS("plane not 0, disabling compression\n");
 		goto out_disable;
 	}
 
@@ -534,8 +547,8 @@
 	 */
 	if (obj->tiling_mode != I915_TILING_X ||
 	    obj->fence_reg == I915_FENCE_REG_NONE) {
-		DRM_DEBUG_KMS("framebuffer not tiled or fenced, disabling compression\n");
-		dev_priv->no_fbc_reason = FBC_NOT_TILED;
+		if (set_no_fbc_reason(dev_priv, FBC_NOT_TILED))
+			DRM_DEBUG_KMS("framebuffer not tiled or fenced, disabling compression\n");
 		goto out_disable;
 	}
 
@@ -544,8 +557,8 @@
 		goto out_disable;
 
 	if (i915_gem_stolen_setup_compression(dev, intel_fb->obj->base.size)) {
-		DRM_DEBUG_KMS("framebuffer too large, disabling compression\n");
-		dev_priv->no_fbc_reason = FBC_STOLEN_TOO_SMALL;
+		if (set_no_fbc_reason(dev_priv, FBC_STOLEN_TOO_SMALL))
+			DRM_DEBUG_KMS("framebuffer too large, disabling compression\n");
 		goto out_disable;
 	}
 
@@ -554,9 +567,9 @@
 	 * cannot be unpinned (and have its GTT offset and fence revoked)
 	 * without first being decoupled from the scanout and FBC disabled.
 	 */
-	if (dev_priv->cfb_plane == intel_crtc->plane &&
-	    dev_priv->cfb_fb == fb->base.id &&
-	    dev_priv->cfb_y == crtc->y)
+	if (dev_priv->fbc.plane == intel_crtc->plane &&
+	    dev_priv->fbc.fb_id == fb->base.id &&
+	    dev_priv->fbc.y == crtc->y)
 		return;
 
 	if (intel_fbc_enabled(dev)) {
@@ -588,6 +601,7 @@
 	}
 
 	intel_enable_fbc(crtc, 500);
+	dev_priv->fbc.no_fbc_reason = FBC_OK;
 	return;
 
 out_disable:
@@ -1666,9 +1680,6 @@
 	I915_WRITE(FW_BLC, fwater_lo);
 }
 
-#define ILK_LP0_PLANE_LATENCY		700
-#define ILK_LP0_CURSOR_LATENCY		1300
-
 /*
  * Check the wm result.
  *
@@ -1783,9 +1794,9 @@
 	enabled = 0;
 	if (g4x_compute_wm0(dev, PIPE_A,
 			    &ironlake_display_wm_info,
-			    ILK_LP0_PLANE_LATENCY,
+			    dev_priv->wm.pri_latency[0] * 100,
 			    &ironlake_cursor_wm_info,
-			    ILK_LP0_CURSOR_LATENCY,
+			    dev_priv->wm.cur_latency[0] * 100,
 			    &plane_wm, &cursor_wm)) {
 		I915_WRITE(WM0_PIPEA_ILK,
 			   (plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm);
@@ -1797,9 +1808,9 @@
 
 	if (g4x_compute_wm0(dev, PIPE_B,
 			    &ironlake_display_wm_info,
-			    ILK_LP0_PLANE_LATENCY,
+			    dev_priv->wm.pri_latency[0] * 100,
 			    &ironlake_cursor_wm_info,
-			    ILK_LP0_CURSOR_LATENCY,
+			    dev_priv->wm.cur_latency[0] * 100,
 			    &plane_wm, &cursor_wm)) {
 		I915_WRITE(WM0_PIPEB_ILK,
 			   (plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm);
@@ -1823,7 +1834,7 @@
 
 	/* WM1 */
 	if (!ironlake_compute_srwm(dev, 1, enabled,
-				   ILK_READ_WM1_LATENCY() * 500,
+				   dev_priv->wm.pri_latency[1] * 500,
 				   &ironlake_display_srwm_info,
 				   &ironlake_cursor_srwm_info,
 				   &fbc_wm, &plane_wm, &cursor_wm))
@@ -1831,14 +1842,14 @@
 
 	I915_WRITE(WM1_LP_ILK,
 		   WM1_LP_SR_EN |
-		   (ILK_READ_WM1_LATENCY() << WM1_LP_LATENCY_SHIFT) |
+		   (dev_priv->wm.pri_latency[1] << WM1_LP_LATENCY_SHIFT) |
 		   (fbc_wm << WM1_LP_FBC_SHIFT) |
 		   (plane_wm << WM1_LP_SR_SHIFT) |
 		   cursor_wm);
 
 	/* WM2 */
 	if (!ironlake_compute_srwm(dev, 2, enabled,
-				   ILK_READ_WM2_LATENCY() * 500,
+				   dev_priv->wm.pri_latency[2] * 500,
 				   &ironlake_display_srwm_info,
 				   &ironlake_cursor_srwm_info,
 				   &fbc_wm, &plane_wm, &cursor_wm))
@@ -1846,7 +1857,7 @@
 
 	I915_WRITE(WM2_LP_ILK,
 		   WM2_LP_EN |
-		   (ILK_READ_WM2_LATENCY() << WM1_LP_LATENCY_SHIFT) |
+		   (dev_priv->wm.pri_latency[2] << WM1_LP_LATENCY_SHIFT) |
 		   (fbc_wm << WM1_LP_FBC_SHIFT) |
 		   (plane_wm << WM1_LP_SR_SHIFT) |
 		   cursor_wm);
@@ -1860,7 +1871,7 @@
 static void sandybridge_update_wm(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int latency = SNB_READ_WM0_LATENCY() * 100;	/* In unit 0.1us */
+	int latency = dev_priv->wm.pri_latency[0] * 100;	/* In unit 0.1us */
 	u32 val;
 	int fbc_wm, plane_wm, cursor_wm;
 	unsigned int enabled;
@@ -1915,7 +1926,7 @@
 
 	/* WM1 */
 	if (!ironlake_compute_srwm(dev, 1, enabled,
-				   SNB_READ_WM1_LATENCY() * 500,
+				   dev_priv->wm.pri_latency[1] * 500,
 				   &sandybridge_display_srwm_info,
 				   &sandybridge_cursor_srwm_info,
 				   &fbc_wm, &plane_wm, &cursor_wm))
@@ -1923,14 +1934,14 @@
 
 	I915_WRITE(WM1_LP_ILK,
 		   WM1_LP_SR_EN |
-		   (SNB_READ_WM1_LATENCY() << WM1_LP_LATENCY_SHIFT) |
+		   (dev_priv->wm.pri_latency[1] << WM1_LP_LATENCY_SHIFT) |
 		   (fbc_wm << WM1_LP_FBC_SHIFT) |
 		   (plane_wm << WM1_LP_SR_SHIFT) |
 		   cursor_wm);
 
 	/* WM2 */
 	if (!ironlake_compute_srwm(dev, 2, enabled,
-				   SNB_READ_WM2_LATENCY() * 500,
+				   dev_priv->wm.pri_latency[2] * 500,
 				   &sandybridge_display_srwm_info,
 				   &sandybridge_cursor_srwm_info,
 				   &fbc_wm, &plane_wm, &cursor_wm))
@@ -1938,14 +1949,14 @@
 
 	I915_WRITE(WM2_LP_ILK,
 		   WM2_LP_EN |
-		   (SNB_READ_WM2_LATENCY() << WM1_LP_LATENCY_SHIFT) |
+		   (dev_priv->wm.pri_latency[2] << WM1_LP_LATENCY_SHIFT) |
 		   (fbc_wm << WM1_LP_FBC_SHIFT) |
 		   (plane_wm << WM1_LP_SR_SHIFT) |
 		   cursor_wm);
 
 	/* WM3 */
 	if (!ironlake_compute_srwm(dev, 3, enabled,
-				   SNB_READ_WM3_LATENCY() * 500,
+				   dev_priv->wm.pri_latency[3] * 500,
 				   &sandybridge_display_srwm_info,
 				   &sandybridge_cursor_srwm_info,
 				   &fbc_wm, &plane_wm, &cursor_wm))
@@ -1953,7 +1964,7 @@
 
 	I915_WRITE(WM3_LP_ILK,
 		   WM3_LP_EN |
-		   (SNB_READ_WM3_LATENCY() << WM1_LP_LATENCY_SHIFT) |
+		   (dev_priv->wm.pri_latency[3] << WM1_LP_LATENCY_SHIFT) |
 		   (fbc_wm << WM1_LP_FBC_SHIFT) |
 		   (plane_wm << WM1_LP_SR_SHIFT) |
 		   cursor_wm);
@@ -1962,7 +1973,7 @@
 static void ivybridge_update_wm(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int latency = SNB_READ_WM0_LATENCY() * 100;	/* In unit 0.1us */
+	int latency = dev_priv->wm.pri_latency[0] * 100;	/* In unit 0.1us */
 	u32 val;
 	int fbc_wm, plane_wm, cursor_wm;
 	int ignore_fbc_wm, ignore_plane_wm, ignore_cursor_wm;
@@ -2032,7 +2043,7 @@
 
 	/* WM1 */
 	if (!ironlake_compute_srwm(dev, 1, enabled,
-				   SNB_READ_WM1_LATENCY() * 500,
+				   dev_priv->wm.pri_latency[1] * 500,
 				   &sandybridge_display_srwm_info,
 				   &sandybridge_cursor_srwm_info,
 				   &fbc_wm, &plane_wm, &cursor_wm))
@@ -2040,14 +2051,14 @@
 
 	I915_WRITE(WM1_LP_ILK,
 		   WM1_LP_SR_EN |
-		   (SNB_READ_WM1_LATENCY() << WM1_LP_LATENCY_SHIFT) |
+		   (dev_priv->wm.pri_latency[1] << WM1_LP_LATENCY_SHIFT) |
 		   (fbc_wm << WM1_LP_FBC_SHIFT) |
 		   (plane_wm << WM1_LP_SR_SHIFT) |
 		   cursor_wm);
 
 	/* WM2 */
 	if (!ironlake_compute_srwm(dev, 2, enabled,
-				   SNB_READ_WM2_LATENCY() * 500,
+				   dev_priv->wm.pri_latency[2] * 500,
 				   &sandybridge_display_srwm_info,
 				   &sandybridge_cursor_srwm_info,
 				   &fbc_wm, &plane_wm, &cursor_wm))
@@ -2055,19 +2066,19 @@
 
 	I915_WRITE(WM2_LP_ILK,
 		   WM2_LP_EN |
-		   (SNB_READ_WM2_LATENCY() << WM1_LP_LATENCY_SHIFT) |
+		   (dev_priv->wm.pri_latency[2] << WM1_LP_LATENCY_SHIFT) |
 		   (fbc_wm << WM1_LP_FBC_SHIFT) |
 		   (plane_wm << WM1_LP_SR_SHIFT) |
 		   cursor_wm);
 
 	/* WM3, note we have to correct the cursor latency */
 	if (!ironlake_compute_srwm(dev, 3, enabled,
-				   SNB_READ_WM3_LATENCY() * 500,
+				   dev_priv->wm.pri_latency[3] * 500,
 				   &sandybridge_display_srwm_info,
 				   &sandybridge_cursor_srwm_info,
 				   &fbc_wm, &plane_wm, &ignore_cursor_wm) ||
 	    !ironlake_compute_srwm(dev, 3, enabled,
-				   2 * SNB_READ_WM3_LATENCY() * 500,
+				   dev_priv->wm.cur_latency[3] * 500,
 				   &sandybridge_display_srwm_info,
 				   &sandybridge_cursor_srwm_info,
 				   &ignore_fbc_wm, &ignore_plane_wm, &cursor_wm))
@@ -2075,14 +2086,14 @@
 
 	I915_WRITE(WM3_LP_ILK,
 		   WM3_LP_EN |
-		   (SNB_READ_WM3_LATENCY() << WM1_LP_LATENCY_SHIFT) |
+		   (dev_priv->wm.pri_latency[3] << WM1_LP_LATENCY_SHIFT) |
 		   (fbc_wm << WM1_LP_FBC_SHIFT) |
 		   (plane_wm << WM1_LP_SR_SHIFT) |
 		   cursor_wm);
 }
 
-static uint32_t hsw_wm_get_pixel_rate(struct drm_device *dev,
-				      struct drm_crtc *crtc)
+static uint32_t ilk_pipe_pixel_rate(struct drm_device *dev,
+				    struct drm_crtc *crtc)
 {
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	uint32_t pixel_rate, pfit_size;
@@ -2112,30 +2123,38 @@
 	return pixel_rate;
 }
 
-static uint32_t hsw_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,
+/* latency must be in 0.1us units. */
+static uint32_t ilk_wm_method1(uint32_t pixel_rate, uint8_t bytes_per_pixel,
 			       uint32_t latency)
 {
 	uint64_t ret;
 
+	if (WARN(latency == 0, "Latency value missing\n"))
+		return UINT_MAX;
+
 	ret = (uint64_t) pixel_rate * bytes_per_pixel * latency;
 	ret = DIV_ROUND_UP_ULL(ret, 64 * 10000) + 2;
 
 	return ret;
 }
 
-static uint32_t hsw_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
+/* latency must be in 0.1us units. */
+static uint32_t ilk_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
 			       uint32_t horiz_pixels, uint8_t bytes_per_pixel,
 			       uint32_t latency)
 {
 	uint32_t ret;
 
+	if (WARN(latency == 0, "Latency value missing\n"))
+		return UINT_MAX;
+
 	ret = (latency * pixel_rate) / (pipe_htotal * 10000);
 	ret = (ret + 1) * horiz_pixels * bytes_per_pixel;
 	ret = DIV_ROUND_UP(ret, 64) + 2;
 	return ret;
 }
 
-static uint32_t hsw_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels,
+static uint32_t ilk_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels,
 			   uint8_t bytes_per_pixel)
 {
 	return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
@@ -2143,15 +2162,11 @@
 
 struct hsw_pipe_wm_parameters {
 	bool active;
-	bool sprite_enabled;
-	uint8_t pri_bytes_per_pixel;
-	uint8_t spr_bytes_per_pixel;
-	uint8_t cur_bytes_per_pixel;
-	uint32_t pri_horiz_pixels;
-	uint32_t spr_horiz_pixels;
-	uint32_t cur_horiz_pixels;
 	uint32_t pipe_htotal;
 	uint32_t pixel_rate;
+	struct intel_plane_wm_parameters pri;
+	struct intel_plane_wm_parameters spr;
+	struct intel_plane_wm_parameters cur;
 };
 
 struct hsw_wm_maximums {
@@ -2161,15 +2176,6 @@
 	uint16_t fbc;
 };
 
-struct hsw_lp_wm_result {
-	bool enable;
-	bool fbc_enable;
-	uint32_t pri_val;
-	uint32_t spr_val;
-	uint32_t cur_val;
-	uint32_t fbc_val;
-};
-
 struct hsw_wm_values {
 	uint32_t wm_pipe[3];
 	uint32_t wm_lp[3];
@@ -2178,128 +2184,289 @@
 	bool enable_fbc_wm;
 };
 
-enum hsw_data_buf_partitioning {
-	HSW_DATA_BUF_PART_1_2,
-	HSW_DATA_BUF_PART_5_6,
+/* used in computing the new watermarks state */
+struct intel_wm_config {
+	unsigned int num_pipes_active;
+	bool sprites_enabled;
+	bool sprites_scaled;
+	bool fbc_wm_enabled;
 };
 
-/* For both WM_PIPE and WM_LP. */
-static uint32_t hsw_compute_pri_wm(struct hsw_pipe_wm_parameters *params,
+/*
+ * For both WM_PIPE and WM_LP.
+ * mem_value must be in 0.1us units.
+ */
+static uint32_t ilk_compute_pri_wm(struct hsw_pipe_wm_parameters *params,
 				   uint32_t mem_value,
 				   bool is_lp)
 {
 	uint32_t method1, method2;
 
-	/* TODO: for now, assume the primary plane is always enabled. */
-	if (!params->active)
+	if (!params->active || !params->pri.enabled)
 		return 0;
 
-	method1 = hsw_wm_method1(params->pixel_rate,
-				 params->pri_bytes_per_pixel,
+	method1 = ilk_wm_method1(params->pixel_rate,
+				 params->pri.bytes_per_pixel,
 				 mem_value);
 
 	if (!is_lp)
 		return method1;
 
-	method2 = hsw_wm_method2(params->pixel_rate,
+	method2 = ilk_wm_method2(params->pixel_rate,
 				 params->pipe_htotal,
-				 params->pri_horiz_pixels,
-				 params->pri_bytes_per_pixel,
+				 params->pri.horiz_pixels,
+				 params->pri.bytes_per_pixel,
 				 mem_value);
 
 	return min(method1, method2);
 }
 
-/* For both WM_PIPE and WM_LP. */
-static uint32_t hsw_compute_spr_wm(struct hsw_pipe_wm_parameters *params,
+/*
+ * For both WM_PIPE and WM_LP.
+ * mem_value must be in 0.1us units.
+ */
+static uint32_t ilk_compute_spr_wm(struct hsw_pipe_wm_parameters *params,
 				   uint32_t mem_value)
 {
 	uint32_t method1, method2;
 
-	if (!params->active || !params->sprite_enabled)
+	if (!params->active || !params->spr.enabled)
 		return 0;
 
-	method1 = hsw_wm_method1(params->pixel_rate,
-				 params->spr_bytes_per_pixel,
+	method1 = ilk_wm_method1(params->pixel_rate,
+				 params->spr.bytes_per_pixel,
 				 mem_value);
-	method2 = hsw_wm_method2(params->pixel_rate,
+	method2 = ilk_wm_method2(params->pixel_rate,
 				 params->pipe_htotal,
-				 params->spr_horiz_pixels,
-				 params->spr_bytes_per_pixel,
+				 params->spr.horiz_pixels,
+				 params->spr.bytes_per_pixel,
 				 mem_value);
 	return min(method1, method2);
 }
 
-/* For both WM_PIPE and WM_LP. */
-static uint32_t hsw_compute_cur_wm(struct hsw_pipe_wm_parameters *params,
+/*
+ * For both WM_PIPE and WM_LP.
+ * mem_value must be in 0.1us units.
+ */
+static uint32_t ilk_compute_cur_wm(struct hsw_pipe_wm_parameters *params,
 				   uint32_t mem_value)
 {
-	if (!params->active)
+	if (!params->active || !params->cur.enabled)
 		return 0;
 
-	return hsw_wm_method2(params->pixel_rate,
+	return ilk_wm_method2(params->pixel_rate,
 			      params->pipe_htotal,
-			      params->cur_horiz_pixels,
-			      params->cur_bytes_per_pixel,
+			      params->cur.horiz_pixels,
+			      params->cur.bytes_per_pixel,
 			      mem_value);
 }
 
 /* Only for WM_LP. */
-static uint32_t hsw_compute_fbc_wm(struct hsw_pipe_wm_parameters *params,
-				   uint32_t pri_val,
-				   uint32_t mem_value)
+static uint32_t ilk_compute_fbc_wm(struct hsw_pipe_wm_parameters *params,
+				   uint32_t pri_val)
 {
-	if (!params->active)
+	if (!params->active || !params->pri.enabled)
 		return 0;
 
-	return hsw_wm_fbc(pri_val,
-			  params->pri_horiz_pixels,
-			  params->pri_bytes_per_pixel);
+	return ilk_wm_fbc(pri_val,
+			  params->pri.horiz_pixels,
+			  params->pri.bytes_per_pixel);
 }
 
-static bool hsw_compute_lp_wm(uint32_t mem_value, struct hsw_wm_maximums *max,
-			      struct hsw_pipe_wm_parameters *params,
-			      struct hsw_lp_wm_result *result)
+static unsigned int ilk_display_fifo_size(const struct drm_device *dev)
 {
-	enum pipe pipe;
-	uint32_t pri_val[3], spr_val[3], cur_val[3], fbc_val[3];
+	if (INTEL_INFO(dev)->gen >= 7)
+		return 768;
+	else
+		return 512;
+}
 
-	for (pipe = PIPE_A; pipe <= PIPE_C; pipe++) {
-		struct hsw_pipe_wm_parameters *p = &params[pipe];
+/* Calculate the maximum primary/sprite plane watermark */
+static unsigned int ilk_plane_wm_max(const struct drm_device *dev,
+				     int level,
+				     const struct intel_wm_config *config,
+				     enum intel_ddb_partitioning ddb_partitioning,
+				     bool is_sprite)
+{
+	unsigned int fifo_size = ilk_display_fifo_size(dev);
+	unsigned int max;
 
-		pri_val[pipe] = hsw_compute_pri_wm(p, mem_value, true);
-		spr_val[pipe] = hsw_compute_spr_wm(p, mem_value);
-		cur_val[pipe] = hsw_compute_cur_wm(p, mem_value);
-		fbc_val[pipe] = hsw_compute_fbc_wm(p, pri_val[pipe], mem_value);
+	/* if sprites aren't enabled, sprites get nothing */
+	if (is_sprite && !config->sprites_enabled)
+		return 0;
+
+	/* HSW allows LP1+ watermarks even with multiple pipes */
+	if (level == 0 || config->num_pipes_active > 1) {
+		fifo_size /= INTEL_INFO(dev)->num_pipes;
+
+		/*
+		 * For some reason the non self refresh
+		 * FIFO size is only half of the self
+		 * refresh FIFO size on ILK/SNB.
+		 */
+		if (INTEL_INFO(dev)->gen <= 6)
+			fifo_size /= 2;
 	}
 
-	result->pri_val = max3(pri_val[0], pri_val[1], pri_val[2]);
-	result->spr_val = max3(spr_val[0], spr_val[1], spr_val[2]);
-	result->cur_val = max3(cur_val[0], cur_val[1], cur_val[2]);
-	result->fbc_val = max3(fbc_val[0], fbc_val[1], fbc_val[2]);
-
-	if (result->fbc_val > max->fbc) {
-		result->fbc_enable = false;
-		result->fbc_val = 0;
-	} else {
-		result->fbc_enable = true;
+	if (config->sprites_enabled) {
+		/* level 0 is always calculated with 1:1 split */
+		if (level > 0 && ddb_partitioning == INTEL_DDB_PART_5_6) {
+			if (is_sprite)
+				fifo_size *= 5;
+			fifo_size /= 6;
+		} else {
+			fifo_size /= 2;
+		}
 	}
 
+	/* clamp to max that the registers can hold */
+	if (INTEL_INFO(dev)->gen >= 7)
+		/* IVB/HSW primary/sprite plane watermarks */
+		max = level == 0 ? 127 : 1023;
+	else if (!is_sprite)
+		/* ILK/SNB primary plane watermarks */
+		max = level == 0 ? 127 : 511;
+	else
+		/* ILK/SNB sprite plane watermarks */
+		max = level == 0 ? 63 : 255;
+
+	return min(fifo_size, max);
+}
+
+/* Calculate the maximum cursor plane watermark */
+static unsigned int ilk_cursor_wm_max(const struct drm_device *dev,
+				      int level,
+				      const struct intel_wm_config *config)
+{
+	/* HSW LP1+ watermarks w/ multiple pipes */
+	if (level > 0 && config->num_pipes_active > 1)
+		return 64;
+
+	/* otherwise just report max that registers can hold */
+	if (INTEL_INFO(dev)->gen >= 7)
+		return level == 0 ? 63 : 255;
+	else
+		return level == 0 ? 31 : 63;
+}
+
+/* Calculate the maximum FBC watermark */
+static unsigned int ilk_fbc_wm_max(void)
+{
+	/* max that registers can hold */
+	return 15;
+}
+
+static void ilk_wm_max(struct drm_device *dev,
+		       int level,
+		       const struct intel_wm_config *config,
+		       enum intel_ddb_partitioning ddb_partitioning,
+		       struct hsw_wm_maximums *max)
+{
+	max->pri = ilk_plane_wm_max(dev, level, config, ddb_partitioning, false);
+	max->spr = ilk_plane_wm_max(dev, level, config, ddb_partitioning, true);
+	max->cur = ilk_cursor_wm_max(dev, level, config);
+	max->fbc = ilk_fbc_wm_max();
+}
+
+static bool ilk_check_wm(int level,
+			 const struct hsw_wm_maximums *max,
+			 struct intel_wm_level *result)
+{
+	bool ret;
+
+	/* already determined to be invalid? */
+	if (!result->enable)
+		return false;
+
 	result->enable = result->pri_val <= max->pri &&
 			 result->spr_val <= max->spr &&
 			 result->cur_val <= max->cur;
-	return result->enable;
+
+	ret = result->enable;
+
+	/*
+	 * HACK until we can pre-compute everything,
+	 * and thus fail gracefully if LP0 watermarks
+	 * are exceeded...
+	 */
+	if (level == 0 && !result->enable) {
+		if (result->pri_val > max->pri)
+			DRM_DEBUG_KMS("Primary WM%d too large %u (max %u)\n",
+				      level, result->pri_val, max->pri);
+		if (result->spr_val > max->spr)
+			DRM_DEBUG_KMS("Sprite WM%d too large %u (max %u)\n",
+				      level, result->spr_val, max->spr);
+		if (result->cur_val > max->cur)
+			DRM_DEBUG_KMS("Cursor WM%d too large %u (max %u)\n",
+				      level, result->cur_val, max->cur);
+
+		result->pri_val = min_t(uint32_t, result->pri_val, max->pri);
+		result->spr_val = min_t(uint32_t, result->spr_val, max->spr);
+		result->cur_val = min_t(uint32_t, result->cur_val, max->cur);
+		result->enable = true;
+	}
+
+	DRM_DEBUG_KMS("WM%d: %sabled\n", level, result->enable ? "en" : "dis");
+
+	return ret;
+}
+
+static void ilk_compute_wm_level(struct drm_i915_private *dev_priv,
+				 int level,
+				 struct hsw_pipe_wm_parameters *p,
+				 struct intel_wm_level *result)
+{
+	uint16_t pri_latency = dev_priv->wm.pri_latency[level];
+	uint16_t spr_latency = dev_priv->wm.spr_latency[level];
+	uint16_t cur_latency = dev_priv->wm.cur_latency[level];
+
+	/* WM1+ latency values stored in 0.5us units */
+	if (level > 0) {
+		pri_latency *= 5;
+		spr_latency *= 5;
+		cur_latency *= 5;
+	}
+
+	result->pri_val = ilk_compute_pri_wm(p, pri_latency, level);
+	result->spr_val = ilk_compute_spr_wm(p, spr_latency);
+	result->cur_val = ilk_compute_cur_wm(p, cur_latency);
+	result->fbc_val = ilk_compute_fbc_wm(p, result->pri_val);
+	result->enable = true;
+}
+
+static bool hsw_compute_lp_wm(struct drm_i915_private *dev_priv,
+			      int level, struct hsw_wm_maximums *max,
+			      struct hsw_pipe_wm_parameters *params,
+			      struct intel_wm_level *result)
+{
+	enum pipe pipe;
+	struct intel_wm_level res[3];
+
+	for (pipe = PIPE_A; pipe <= PIPE_C; pipe++)
+		ilk_compute_wm_level(dev_priv, level, &params[pipe], &res[pipe]);
+
+	result->pri_val = max3(res[0].pri_val, res[1].pri_val, res[2].pri_val);
+	result->spr_val = max3(res[0].spr_val, res[1].spr_val, res[2].spr_val);
+	result->cur_val = max3(res[0].cur_val, res[1].cur_val, res[2].cur_val);
+	result->fbc_val = max3(res[0].fbc_val, res[1].fbc_val, res[2].fbc_val);
+	result->enable = true;
+
+	return ilk_check_wm(level, max, result);
 }
 
 static uint32_t hsw_compute_wm_pipe(struct drm_i915_private *dev_priv,
-				    uint32_t mem_value, enum pipe pipe,
+				    enum pipe pipe,
 				    struct hsw_pipe_wm_parameters *params)
 {
 	uint32_t pri_val, cur_val, spr_val;
+	/* WM0 latency values stored in 0.1us units */
+	uint16_t pri_latency = dev_priv->wm.pri_latency[0];
+	uint16_t spr_latency = dev_priv->wm.spr_latency[0];
+	uint16_t cur_latency = dev_priv->wm.cur_latency[0];
 
-	pri_val = hsw_compute_pri_wm(params, mem_value, false);
-	spr_val = hsw_compute_spr_wm(params, mem_value);
-	cur_val = hsw_compute_cur_wm(params, mem_value);
+	pri_val = ilk_compute_pri_wm(params, pri_latency, false);
+	spr_val = ilk_compute_spr_wm(params, spr_latency);
+	cur_val = ilk_compute_cur_wm(params, cur_latency);
 
 	WARN(pri_val > 127,
 	     "Primary WM error, mode not supported for pipe %c\n",
@@ -2338,27 +2505,116 @@
 	       PIPE_WM_LINETIME_TIME(linetime);
 }
 
+static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[5])
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (IS_HASWELL(dev)) {
+		uint64_t sskpd = I915_READ64(MCH_SSKPD);
+
+		wm[0] = (sskpd >> 56) & 0xFF;
+		if (wm[0] == 0)
+			wm[0] = sskpd & 0xF;
+		wm[1] = (sskpd >> 4) & 0xFF;
+		wm[2] = (sskpd >> 12) & 0xFF;
+		wm[3] = (sskpd >> 20) & 0x1FF;
+		wm[4] = (sskpd >> 32) & 0x1FF;
+	} else if (INTEL_INFO(dev)->gen >= 6) {
+		uint32_t sskpd = I915_READ(MCH_SSKPD);
+
+		wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK;
+		wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK;
+		wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK;
+		wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK;
+	} else if (INTEL_INFO(dev)->gen >= 5) {
+		uint32_t mltr = I915_READ(MLTR_ILK);
+
+		/* ILK primary LP0 latency is 700 ns */
+		wm[0] = 7;
+		wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK;
+		wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK;
+	}
+}
+
+static void intel_fixup_spr_wm_latency(struct drm_device *dev, uint16_t wm[5])
+{
+	/* ILK sprite LP0 latency is 1300 ns */
+	if (INTEL_INFO(dev)->gen == 5)
+		wm[0] = 13;
+}
+
+static void intel_fixup_cur_wm_latency(struct drm_device *dev, uint16_t wm[5])
+{
+	/* ILK cursor LP0 latency is 1300 ns */
+	if (INTEL_INFO(dev)->gen == 5)
+		wm[0] = 13;
+
+	/* WaDoubleCursorLP3Latency:ivb */
+	if (IS_IVYBRIDGE(dev))
+		wm[3] *= 2;
+}
+
+static void intel_print_wm_latency(struct drm_device *dev,
+				   const char *name,
+				   const uint16_t wm[5])
+{
+	int level, max_level;
+
+	/* how many WM levels are we expecting */
+	if (IS_HASWELL(dev))
+		max_level = 4;
+	else if (INTEL_INFO(dev)->gen >= 6)
+		max_level = 3;
+	else
+		max_level = 2;
+
+	for (level = 0; level <= max_level; level++) {
+		unsigned int latency = wm[level];
+
+		if (latency == 0) {
+			DRM_ERROR("%s WM%d latency not provided\n",
+				  name, level);
+			continue;
+		}
+
+		/* WM1+ latency values in 0.5us units */
+		if (level > 0)
+			latency *= 5;
+
+		DRM_DEBUG_KMS("%s WM%d latency %u (%u.%u usec)\n",
+			      name, level, wm[level],
+			      latency / 10, latency % 10);
+	}
+}
+
+static void intel_setup_wm_latency(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	intel_read_wm_latency(dev, dev_priv->wm.pri_latency);
+
+	memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency,
+	       sizeof(dev_priv->wm.pri_latency));
+	memcpy(dev_priv->wm.cur_latency, dev_priv->wm.pri_latency,
+	       sizeof(dev_priv->wm.pri_latency));
+
+	intel_fixup_spr_wm_latency(dev, dev_priv->wm.spr_latency);
+	intel_fixup_cur_wm_latency(dev, dev_priv->wm.cur_latency);
+
+	intel_print_wm_latency(dev, "Primary", dev_priv->wm.pri_latency);
+	intel_print_wm_latency(dev, "Sprite", dev_priv->wm.spr_latency);
+	intel_print_wm_latency(dev, "Cursor", dev_priv->wm.cur_latency);
+}
+
 static void hsw_compute_wm_parameters(struct drm_device *dev,
 				      struct hsw_pipe_wm_parameters *params,
-				      uint32_t *wm,
 				      struct hsw_wm_maximums *lp_max_1_2,
 				      struct hsw_wm_maximums *lp_max_5_6)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_crtc *crtc;
 	struct drm_plane *plane;
-	uint64_t sskpd = I915_READ64(MCH_SSKPD);
 	enum pipe pipe;
-	int pipes_active = 0, sprites_enabled = 0;
-
-	if ((sskpd >> 56) & 0xFF)
-		wm[0] = (sskpd >> 56) & 0xFF;
-	else
-		wm[0] = sskpd & 0xF;
-	wm[1] = ((sskpd >> 4) & 0xFF) * 5;
-	wm[2] = ((sskpd >> 12) & 0xFF) * 5;
-	wm[3] = ((sskpd >> 20) & 0x1FF) * 5;
-	wm[4] = ((sskpd >> 32) & 0x1FF) * 5;
+	struct intel_wm_config config = {};
 
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
 		struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -2371,15 +2627,18 @@
 		if (!p->active)
 			continue;
 
-		pipes_active++;
+		config.num_pipes_active++;
 
 		p->pipe_htotal = intel_crtc->config.adjusted_mode.htotal;
-		p->pixel_rate = hsw_wm_get_pixel_rate(dev, crtc);
-		p->pri_bytes_per_pixel = crtc->fb->bits_per_pixel / 8;
-		p->cur_bytes_per_pixel = 4;
-		p->pri_horiz_pixels =
+		p->pixel_rate = ilk_pipe_pixel_rate(dev, crtc);
+		p->pri.bytes_per_pixel = crtc->fb->bits_per_pixel / 8;
+		p->cur.bytes_per_pixel = 4;
+		p->pri.horiz_pixels =
 			intel_crtc->config.requested_mode.hdisplay;
-		p->cur_horiz_pixels = 64;
+		p->cur.horiz_pixels = 64;
+		/* TODO: for now, assume primary and cursor planes are always enabled. */
+		p->pri.enabled = true;
+		p->cur.enabled = true;
 	}
 
 	list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
@@ -2389,59 +2648,53 @@
 		pipe = intel_plane->pipe;
 		p = &params[pipe];
 
-		p->sprite_enabled = intel_plane->wm.enable;
-		p->spr_bytes_per_pixel = intel_plane->wm.bytes_per_pixel;
-		p->spr_horiz_pixels = intel_plane->wm.horiz_pixels;
+		p->spr = intel_plane->wm;
 
-		if (p->sprite_enabled)
-			sprites_enabled++;
+		config.sprites_enabled |= p->spr.enabled;
+		config.sprites_scaled |= p->spr.scaled;
 	}
 
-	if (pipes_active > 1) {
-		lp_max_1_2->pri = lp_max_5_6->pri = sprites_enabled ? 128 : 256;
-		lp_max_1_2->spr = lp_max_5_6->spr = 128;
-		lp_max_1_2->cur = lp_max_5_6->cur = 64;
-	} else {
-		lp_max_1_2->pri = sprites_enabled ? 384 : 768;
-		lp_max_5_6->pri = sprites_enabled ? 128 : 768;
-		lp_max_1_2->spr = 384;
-		lp_max_5_6->spr = 640;
-		lp_max_1_2->cur = lp_max_5_6->cur = 255;
-	}
-	lp_max_1_2->fbc = lp_max_5_6->fbc = 15;
+	ilk_wm_max(dev, 1, &config, INTEL_DDB_PART_1_2, lp_max_1_2);
+
+	/* 5/6 split only in single pipe config on IVB+ */
+	if (INTEL_INFO(dev)->gen >= 7 && config.num_pipes_active <= 1)
+		ilk_wm_max(dev, 1, &config, INTEL_DDB_PART_5_6, lp_max_5_6);
+	else
+		*lp_max_5_6 = *lp_max_1_2;
 }
 
 static void hsw_compute_wm_results(struct drm_device *dev,
 				   struct hsw_pipe_wm_parameters *params,
-				   uint32_t *wm,
 				   struct hsw_wm_maximums *lp_maximums,
 				   struct hsw_wm_values *results)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_crtc *crtc;
-	struct hsw_lp_wm_result lp_results[4] = {};
+	struct intel_wm_level lp_results[4] = {};
 	enum pipe pipe;
 	int level, max_level, wm_lp;
 
 	for (level = 1; level <= 4; level++)
-		if (!hsw_compute_lp_wm(wm[level], lp_maximums, params,
+		if (!hsw_compute_lp_wm(dev_priv, level,
+				       lp_maximums, params,
 				       &lp_results[level - 1]))
 			break;
 	max_level = level - 1;
 
+	memset(results, 0, sizeof(*results));
+
 	/* The spec says it is preferred to disable FBC WMs instead of disabling
 	 * a WM level. */
 	results->enable_fbc_wm = true;
 	for (level = 1; level <= max_level; level++) {
-		if (!lp_results[level - 1].fbc_enable) {
+		if (lp_results[level - 1].fbc_val > lp_maximums->fbc) {
 			results->enable_fbc_wm = false;
-			break;
+			lp_results[level - 1].fbc_val = 0;
 		}
 	}
 
-	memset(results, 0, sizeof(*results));
 	for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
-		const struct hsw_lp_wm_result *r;
+		const struct intel_wm_level *r;
 
 		level = (max_level == 4 && wm_lp > 1) ? wm_lp + 1 : wm_lp;
 		if (level > max_level)
@@ -2456,8 +2709,7 @@
 	}
 
 	for_each_pipe(pipe)
-		results->wm_pipe[pipe] = hsw_compute_wm_pipe(dev_priv, wm[0],
-							     pipe,
+		results->wm_pipe[pipe] = hsw_compute_wm_pipe(dev_priv, pipe,
 							     &params[pipe]);
 
 	for_each_pipe(pipe) {
@@ -2468,8 +2720,8 @@
 
 /* Find the result with the highest level enabled. Check for enable_fbc_wm in
  * case both are at the same level. Prefer r1 in case they're the same. */
-struct hsw_wm_values *hsw_find_best_result(struct hsw_wm_values *r1,
-					   struct hsw_wm_values *r2)
+static struct hsw_wm_values *hsw_find_best_result(struct hsw_wm_values *r1,
+						  struct hsw_wm_values *r2)
 {
 	int i, val_r1 = 0, val_r2 = 0;
 
@@ -2498,11 +2750,11 @@
  */
 static void hsw_write_wm_values(struct drm_i915_private *dev_priv,
 				struct hsw_wm_values *results,
-				enum hsw_data_buf_partitioning partitioning)
+				enum intel_ddb_partitioning partitioning)
 {
 	struct hsw_wm_values previous;
 	uint32_t val;
-	enum hsw_data_buf_partitioning prev_partitioning;
+	enum intel_ddb_partitioning prev_partitioning;
 	bool prev_enable_fbc_wm;
 
 	previous.wm_pipe[0] = I915_READ(WM0_PIPEA_ILK);
@@ -2519,7 +2771,7 @@
 	previous.wm_linetime[2] = I915_READ(PIPE_WM_LINETIME(PIPE_C));
 
 	prev_partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
-			    HSW_DATA_BUF_PART_5_6 : HSW_DATA_BUF_PART_1_2;
+				INTEL_DDB_PART_5_6 : INTEL_DDB_PART_1_2;
 
 	prev_enable_fbc_wm = !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
 
@@ -2558,7 +2810,7 @@
 
 	if (prev_partitioning != partitioning) {
 		val = I915_READ(WM_MISC);
-		if (partitioning == HSW_DATA_BUF_PART_1_2)
+		if (partitioning == INTEL_DDB_PART_1_2)
 			val &= ~WM_MISC_DATA_PARTITION_5_6;
 		else
 			val |= WM_MISC_DATA_PARTITION_5_6;
@@ -2595,44 +2847,39 @@
 	struct hsw_wm_maximums lp_max_1_2, lp_max_5_6;
 	struct hsw_pipe_wm_parameters params[3];
 	struct hsw_wm_values results_1_2, results_5_6, *best_results;
-	uint32_t wm[5];
-	enum hsw_data_buf_partitioning partitioning;
+	enum intel_ddb_partitioning partitioning;
 
-	hsw_compute_wm_parameters(dev, params, wm, &lp_max_1_2, &lp_max_5_6);
+	hsw_compute_wm_parameters(dev, params, &lp_max_1_2, &lp_max_5_6);
 
-	hsw_compute_wm_results(dev, params, wm, &lp_max_1_2, &results_1_2);
+	hsw_compute_wm_results(dev, params,
+			       &lp_max_1_2, &results_1_2);
 	if (lp_max_1_2.pri != lp_max_5_6.pri) {
-		hsw_compute_wm_results(dev, params, wm, &lp_max_5_6,
-				       &results_5_6);
+		hsw_compute_wm_results(dev, params,
+				       &lp_max_5_6, &results_5_6);
 		best_results = hsw_find_best_result(&results_1_2, &results_5_6);
 	} else {
 		best_results = &results_1_2;
 	}
 
 	partitioning = (best_results == &results_1_2) ?
-		       HSW_DATA_BUF_PART_1_2 : HSW_DATA_BUF_PART_5_6;
+		       INTEL_DDB_PART_1_2 : INTEL_DDB_PART_5_6;
 
 	hsw_write_wm_values(dev_priv, best_results, partitioning);
 }
 
-static void haswell_update_sprite_wm(struct drm_device *dev, int pipe,
+static void haswell_update_sprite_wm(struct drm_plane *plane,
+				     struct drm_crtc *crtc,
 				     uint32_t sprite_width, int pixel_size,
-				     bool enable)
+				     bool enabled, bool scaled)
 {
-	struct drm_plane *plane;
+	struct intel_plane *intel_plane = to_intel_plane(plane);
 
-	list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
-		struct intel_plane *intel_plane = to_intel_plane(plane);
+	intel_plane->wm.enabled = enabled;
+	intel_plane->wm.scaled = scaled;
+	intel_plane->wm.horiz_pixels = sprite_width;
+	intel_plane->wm.bytes_per_pixel = pixel_size;
 
-		if (intel_plane->pipe == pipe) {
-			intel_plane->wm.enable = enable;
-			intel_plane->wm.horiz_pixels = sprite_width + 1;
-			intel_plane->wm.bytes_per_pixel = pixel_size;
-			break;
-		}
-	}
-
-	haswell_update_wm(dev);
+	haswell_update_wm(plane->dev);
 }
 
 static bool
@@ -2711,17 +2958,20 @@
 	return *sprite_wm > 0x3ff ? false : true;
 }
 
-static void sandybridge_update_sprite_wm(struct drm_device *dev, int pipe,
+static void sandybridge_update_sprite_wm(struct drm_plane *plane,
+					 struct drm_crtc *crtc,
 					 uint32_t sprite_width, int pixel_size,
-					 bool enable)
+					 bool enabled, bool scaled)
 {
+	struct drm_device *dev = plane->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int latency = SNB_READ_WM0_LATENCY() * 100;	/* In unit 0.1us */
+	int pipe = to_intel_plane(plane)->pipe;
+	int latency = dev_priv->wm.spr_latency[0] * 100;	/* In unit 0.1us */
 	u32 val;
 	int sprite_wm, reg;
 	int ret;
 
-	if (!enable)
+	if (!enabled)
 		return;
 
 	switch (pipe) {
@@ -2756,7 +3006,7 @@
 	ret = sandybridge_compute_sprite_srwm(dev, pipe, sprite_width,
 					      pixel_size,
 					      &sandybridge_display_srwm_info,
-					      SNB_READ_WM1_LATENCY() * 500,
+					      dev_priv->wm.spr_latency[1] * 500,
 					      &sprite_wm);
 	if (!ret) {
 		DRM_DEBUG_KMS("failed to compute sprite lp1 wm on pipe %c\n",
@@ -2772,7 +3022,7 @@
 	ret = sandybridge_compute_sprite_srwm(dev, pipe, sprite_width,
 					      pixel_size,
 					      &sandybridge_display_srwm_info,
-					      SNB_READ_WM2_LATENCY() * 500,
+					      dev_priv->wm.spr_latency[2] * 500,
 					      &sprite_wm);
 	if (!ret) {
 		DRM_DEBUG_KMS("failed to compute sprite lp2 wm on pipe %c\n",
@@ -2784,7 +3034,7 @@
 	ret = sandybridge_compute_sprite_srwm(dev, pipe, sprite_width,
 					      pixel_size,
 					      &sandybridge_display_srwm_info,
-					      SNB_READ_WM3_LATENCY() * 500,
+					      dev_priv->wm.spr_latency[3] * 500,
 					      &sprite_wm);
 	if (!ret) {
 		DRM_DEBUG_KMS("failed to compute sprite lp3 wm on pipe %c\n",
@@ -2834,15 +3084,16 @@
 		dev_priv->display.update_wm(dev);
 }
 
-void intel_update_sprite_watermarks(struct drm_device *dev, int pipe,
+void intel_update_sprite_watermarks(struct drm_plane *plane,
+				    struct drm_crtc *crtc,
 				    uint32_t sprite_width, int pixel_size,
-				    bool enable)
+				    bool enabled, bool scaled)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_private *dev_priv = plane->dev->dev_private;
 
 	if (dev_priv->display.update_sprite_wm)
-		dev_priv->display.update_sprite_wm(dev, pipe, sprite_width,
-						   pixel_size, enable);
+		dev_priv->display.update_sprite_wm(plane, crtc, sprite_width,
+						   pixel_size, enabled, scaled);
 }
 
 static struct drm_i915_gem_object *
@@ -2859,7 +3110,7 @@
 		return NULL;
 	}
 
-	ret = i915_gem_object_pin(ctx, 4096, true, false);
+	ret = i915_gem_obj_ggtt_pin(ctx, 4096, true, false);
 	if (ret) {
 		DRM_ERROR("failed to pin power context: %d\n", ret);
 		goto err_unref;
@@ -3076,19 +3327,12 @@
  */
 static void vlv_update_rps_cur_delay(struct drm_i915_private *dev_priv)
 {
-	unsigned long timeout = jiffies + msecs_to_jiffies(10);
 	u32 pval;
 
 	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
 
-	do {
-		pval = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS);
-		if (time_after(jiffies, timeout)) {
-			DRM_DEBUG_DRIVER("timed out waiting for Punit\n");
-			break;
-		}
-		udelay(10);
-	} while (pval & 1);
+	if (wait_for(((pval = vlv_punit_read(dev_priv, PUNIT_REG_GPU_FREQ_STS)) & GENFREQSTATUS) == 0, 10))
+		DRM_DEBUG_DRIVER("timed out waiting for Punit\n");
 
 	pval >>= 8;
 
@@ -3129,13 +3373,10 @@
 	trace_intel_gpu_freq_change(vlv_gpu_freq(dev_priv->mem_freq, val));
 }
 
-
-static void gen6_disable_rps(struct drm_device *dev)
+static void gen6_disable_rps_interrupts(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	I915_WRITE(GEN6_RC_CONTROL, 0);
-	I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
 	I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
 	I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) & ~GEN6_PM_RPS_EVENTS);
 	/* Complete PM interrupt masking here doesn't race with the rps work
@@ -3143,30 +3384,30 @@
 	 * register (PMIMR) to mask PM interrupts. The only risk is in leaving
 	 * stale bits in PMIIR and PMIMR which gen6_enable_rps will clean up. */
 
-	spin_lock_irq(&dev_priv->rps.lock);
+	spin_lock_irq(&dev_priv->irq_lock);
 	dev_priv->rps.pm_iir = 0;
-	spin_unlock_irq(&dev_priv->rps.lock);
+	spin_unlock_irq(&dev_priv->irq_lock);
 
 	I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
 }
 
+static void gen6_disable_rps(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	I915_WRITE(GEN6_RC_CONTROL, 0);
+	I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
+
+	gen6_disable_rps_interrupts(dev);
+}
+
 static void valleyview_disable_rps(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	I915_WRITE(GEN6_RC_CONTROL, 0);
-	I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
-	I915_WRITE(GEN6_PMIER, 0);
-	/* Complete PM interrupt masking here doesn't race with the rps work
-	 * item again unmasking PM interrupts because that is using a different
-	 * register (PMIMR) to mask PM interrupts. The only risk is in leaving
-	 * stale bits in PMIIR and PMIMR which gen6_enable_rps will clean up. */
 
-	spin_lock_irq(&dev_priv->rps.lock);
-	dev_priv->rps.pm_iir = 0;
-	spin_unlock_irq(&dev_priv->rps.lock);
-
-	I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
+	gen6_disable_rps_interrupts(dev);
 
 	if (dev_priv->vlv_pctx) {
 		drm_gem_object_unreference(&dev_priv->vlv_pctx->base);
@@ -3176,6 +3417,10 @@
 
 int intel_enable_rc6(const struct drm_device *dev)
 {
+	/* No RC6 before Ironlake */
+	if (INTEL_INFO(dev)->gen < 5)
+		return 0;
+
 	/* Respect the kernel parameter if it is set */
 	if (i915_enable_rc6 >= 0)
 		return i915_enable_rc6;
@@ -3199,6 +3444,19 @@
 	return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE);
 }
 
+static void gen6_enable_rps_interrupts(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	spin_lock_irq(&dev_priv->irq_lock);
+	WARN_ON(dev_priv->rps.pm_iir);
+	snb_enable_pm_irq(dev_priv, GEN6_PM_RPS_EVENTS);
+	I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
+	spin_unlock_irq(&dev_priv->irq_lock);
+	/* only unmask PM interrupts we need. Mask all others. */
+	I915_WRITE(GEN6_PMINTRMSK, ~GEN6_PM_RPS_EVENTS);
+}
+
 static void gen6_enable_rps(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -3250,7 +3508,10 @@
 
 	I915_WRITE(GEN6_RC_SLEEP, 0);
 	I915_WRITE(GEN6_RC1e_THRESHOLD, 1000);
-	I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
+	if (INTEL_INFO(dev)->gen <= 6 || IS_IVYBRIDGE(dev))
+		I915_WRITE(GEN6_RC6_THRESHOLD, 125000);
+	else
+		I915_WRITE(GEN6_RC6_THRESHOLD, 50000);
 	I915_WRITE(GEN6_RC6p_THRESHOLD, 150000);
 	I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
 
@@ -3327,17 +3588,7 @@
 
 	gen6_set_rps(dev_priv->dev, (gt_perf_status & 0xff00) >> 8);
 
-	/* requires MSI enabled */
-	I915_WRITE(GEN6_PMIER, I915_READ(GEN6_PMIER) | GEN6_PM_RPS_EVENTS);
-	spin_lock_irq(&dev_priv->rps.lock);
-	/* FIXME: Our interrupt enabling sequence is bonghits.
-	 * dev_priv->rps.pm_iir really should be 0 here. */
-	dev_priv->rps.pm_iir = 0;
-	I915_WRITE(GEN6_PMIMR, I915_READ(GEN6_PMIMR) & ~GEN6_PM_RPS_EVENTS);
-	I915_WRITE(GEN6_PMIIR, GEN6_PM_RPS_EVENTS);
-	spin_unlock_irq(&dev_priv->rps.lock);
-	/* unmask all PM interrupts */
-	I915_WRITE(GEN6_PMINTRMSK, 0);
+	gen6_enable_rps_interrupts(dev);
 
 	rc6vids = 0;
 	ret = sandybridge_pcode_read(dev_priv, GEN6_PCODE_READ_RC6VIDS, &rc6vids);
@@ -3356,7 +3607,7 @@
 	gen6_gt_force_wake_put(dev_priv);
 }
 
-static void gen6_update_ring_freq(struct drm_device *dev)
+void gen6_update_ring_freq(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int min_freq = 15;
@@ -3482,7 +3733,7 @@
 		pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base;
 		pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv->dev,
 								      pcbr_offset,
-								      -1,
+								      I915_GTT_OFFSET_NONE,
 								      pctx_size);
 		goto out;
 	}
@@ -3607,14 +3858,7 @@
 
 	valleyview_set_rps(dev_priv->dev, dev_priv->rps.rpe_delay);
 
-	/* requires MSI enabled */
-	I915_WRITE(GEN6_PMIER, GEN6_PM_RPS_EVENTS);
-	spin_lock_irq(&dev_priv->rps.lock);
-	WARN_ON(dev_priv->rps.pm_iir != 0);
-	I915_WRITE(GEN6_PMIMR, 0);
-	spin_unlock_irq(&dev_priv->rps.lock);
-	/* enable all PM interrupts */
-	I915_WRITE(GEN6_PMINTRMSK, 0);
+	gen6_enable_rps_interrupts(dev);
 
 	gen6_gt_force_wake_put(dev_priv);
 }
@@ -3708,7 +3952,7 @@
 
 	intel_ring_emit(ring, MI_SUSPEND_FLUSH | MI_SUSPEND_FLUSH_EN);
 	intel_ring_emit(ring, MI_SET_CONTEXT);
-	intel_ring_emit(ring, dev_priv->ips.renderctx->gtt_offset |
+	intel_ring_emit(ring, i915_gem_obj_ggtt_offset(dev_priv->ips.renderctx) |
 			MI_MM_SPACE_GTT |
 			MI_SAVE_EXT_STATE_EN |
 			MI_RESTORE_EXT_STATE_EN |
@@ -3731,7 +3975,7 @@
 		return;
 	}
 
-	I915_WRITE(PWRCTXA, dev_priv->ips.pwrctx->gtt_offset | PWRCTX_EN);
+	I915_WRITE(PWRCTXA, i915_gem_obj_ggtt_offset(dev_priv->ips.pwrctx) | PWRCTX_EN);
 	I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT);
 }
 
@@ -4429,7 +4673,10 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE;
 
-	/* Required for FBC */
+	/*
+	 * Required for FBC
+	 * WaFbcDisableDpfcClockGating:ilk
+	 */
 	dspclk_gate |= ILK_DPFCRUNIT_CLOCK_GATE_DISABLE |
 		   ILK_DPFCUNIT_CLOCK_GATE_DISABLE |
 		   ILK_DPFDUNIT_CLOCK_GATE_ENABLE;
@@ -4466,6 +4713,7 @@
 	 * The bit 7,8,9 of 0x42020.
 	 */
 	if (IS_IRONLAKE_M(dev)) {
+		/* WaFbcAsynchFlipDisableFbcQueue:ilk */
 		I915_WRITE(ILK_DISPLAY_CHICKEN1,
 			   I915_READ(ILK_DISPLAY_CHICKEN1) |
 			   ILK_FBCQ_DIS);
@@ -4602,6 +4850,8 @@
 	 * The bit5 and bit7 of 0x42020
 	 * The bit14 of 0x70180
 	 * The bit14 of 0x71180
+	 *
+	 * WaFbcAsynchFlipDisableFbcQueue:snb
 	 */
 	I915_WRITE(ILK_DISPLAY_CHICKEN1,
 		   I915_READ(ILK_DISPLAY_CHICKEN1) |
@@ -4614,10 +4864,6 @@
 		   ILK_DPARBUNIT_CLOCK_GATE_ENABLE  |
 		   ILK_DPFDUNIT_CLOCK_GATE_ENABLE);
 
-	/* WaMbcDriverBootEnable:snb */
-	I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
-		   GEN6_MBCTL_ENABLE_BOOT_FETCH);
-
 	g4x_disable_trickle_feed(dev);
 
 	/* The default value should be 0x200 according to docs, but the two
@@ -4713,10 +4959,6 @@
 	I915_WRITE(CACHE_MODE_1,
 		   _MASKED_BIT_ENABLE(PIXEL_SUBSPAN_COLLECT_OPT_DISABLE));
 
-	/* WaMbcDriverBootEnable:hsw */
-	I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
-		   GEN6_MBCTL_ENABLE_BOOT_FETCH);
-
 	/* WaSwitchSolVfFArbitrationPriority:hsw */
 	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL);
 
@@ -4800,10 +5042,6 @@
 
 	g4x_disable_trickle_feed(dev);
 
-	/* WaMbcDriverBootEnable:ivb */
-	I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
-		   GEN6_MBCTL_ENABLE_BOOT_FETCH);
-
 	/* WaVSRefCountFullforceMissDisable:ivb */
 	gen7_setup_fixed_func_scheduler(dev_priv);
 
@@ -4863,11 +5101,6 @@
 		   I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
 		   GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB);
 
-	/* WaMbcDriverBootEnable:vlv */
-	I915_WRITE(GEN6_MBCTL, I915_READ(GEN6_MBCTL) |
-		   GEN6_MBCTL_ENABLE_BOOT_FETCH);
-
-
 	/* According to the BSpec vol1g, bit 12 (RCPBUNIT) clock
 	 * gating disable must be set.  Failure to set it results in
 	 * flickering pixels due to Z write ordering failures after
@@ -5035,7 +5268,7 @@
 	case POWER_DOMAIN_TRANSCODER_B:
 	case POWER_DOMAIN_TRANSCODER_C:
 		return I915_READ(HSW_PWR_WELL_DRIVER) ==
-		       (HSW_PWR_WELL_ENABLE | HSW_PWR_WELL_STATE);
+		     (HSW_PWR_WELL_ENABLE_REQUEST | HSW_PWR_WELL_STATE_ENABLED);
 	default:
 		BUG();
 	}
@@ -5048,23 +5281,42 @@
 	uint32_t tmp;
 
 	tmp = I915_READ(HSW_PWR_WELL_DRIVER);
-	is_enabled = tmp & HSW_PWR_WELL_STATE;
-	enable_requested = tmp & HSW_PWR_WELL_ENABLE;
+	is_enabled = tmp & HSW_PWR_WELL_STATE_ENABLED;
+	enable_requested = tmp & HSW_PWR_WELL_ENABLE_REQUEST;
 
 	if (enable) {
 		if (!enable_requested)
-			I915_WRITE(HSW_PWR_WELL_DRIVER, HSW_PWR_WELL_ENABLE);
+			I915_WRITE(HSW_PWR_WELL_DRIVER,
+				   HSW_PWR_WELL_ENABLE_REQUEST);
 
 		if (!is_enabled) {
 			DRM_DEBUG_KMS("Enabling power well\n");
 			if (wait_for((I915_READ(HSW_PWR_WELL_DRIVER) &
-				      HSW_PWR_WELL_STATE), 20))
+				      HSW_PWR_WELL_STATE_ENABLED), 20))
 				DRM_ERROR("Timeout enabling power well\n");
 		}
 	} else {
 		if (enable_requested) {
+			unsigned long irqflags;
+			enum pipe p;
+
 			I915_WRITE(HSW_PWR_WELL_DRIVER, 0);
+			POSTING_READ(HSW_PWR_WELL_DRIVER);
 			DRM_DEBUG_KMS("Requesting to disable the power well\n");
+
+			/*
+			 * After this, the registers on the pipes that are part
+			 * of the power well will become zero, so we have to
+			 * adjust our counters according to that.
+			 *
+			 * FIXME: Should we do this in general in
+			 * drm_vblank_post_modeset?
+			 */
+			spin_lock_irqsave(&dev->vbl_lock, irqflags);
+			for_each_pipe(p)
+				if (p != PIPE_A)
+					dev->last_vblank[p] = 0;
+			spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
 		}
 	}
 }
@@ -5160,10 +5412,21 @@
 
 	/* We're taking over the BIOS, so clear any requests made by it since
 	 * the driver is in charge now. */
-	if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE)
+	if (I915_READ(HSW_PWR_WELL_BIOS) & HSW_PWR_WELL_ENABLE_REQUEST)
 		I915_WRITE(HSW_PWR_WELL_BIOS, 0);
 }
 
+/* Disables PC8 so we can use the GMBUS and DP AUX interrupts. */
+void intel_aux_display_runtime_get(struct drm_i915_private *dev_priv)
+{
+	hsw_disable_package_c8(dev_priv);
+}
+
+void intel_aux_display_runtime_put(struct drm_i915_private *dev_priv)
+{
+	hsw_enable_package_c8(dev_priv);
+}
+
 /* Set up chip specific power management-related functions */
 void intel_init_pm(struct drm_device *dev)
 {
@@ -5199,8 +5462,12 @@
 
 	/* For FIFO watermark updates */
 	if (HAS_PCH_SPLIT(dev)) {
+		intel_setup_wm_latency(dev);
+
 		if (IS_GEN5(dev)) {
-			if (I915_READ(MLTR_ILK) & ILK_SRLT_MASK)
+			if (dev_priv->wm.pri_latency[1] &&
+			    dev_priv->wm.spr_latency[1] &&
+			    dev_priv->wm.cur_latency[1])
 				dev_priv->display.update_wm = ironlake_update_wm;
 			else {
 				DRM_DEBUG_KMS("Failed to get proper latency. "
@@ -5209,7 +5476,9 @@
 			}
 			dev_priv->display.init_clock_gating = ironlake_init_clock_gating;
 		} else if (IS_GEN6(dev)) {
-			if (SNB_READ_WM0_LATENCY()) {
+			if (dev_priv->wm.pri_latency[0] &&
+			    dev_priv->wm.spr_latency[0] &&
+			    dev_priv->wm.cur_latency[0]) {
 				dev_priv->display.update_wm = sandybridge_update_wm;
 				dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm;
 			} else {
@@ -5219,7 +5488,9 @@
 			}
 			dev_priv->display.init_clock_gating = gen6_init_clock_gating;
 		} else if (IS_IVYBRIDGE(dev)) {
-			if (SNB_READ_WM0_LATENCY()) {
+			if (dev_priv->wm.pri_latency[0] &&
+			    dev_priv->wm.spr_latency[0] &&
+			    dev_priv->wm.cur_latency[0]) {
 				dev_priv->display.update_wm = ivybridge_update_wm;
 				dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm;
 			} else {
@@ -5229,7 +5500,9 @@
 			}
 			dev_priv->display.init_clock_gating = ivybridge_init_clock_gating;
 		} else if (IS_HASWELL(dev)) {
-			if (I915_READ64(MCH_SSKPD)) {
+			if (dev_priv->wm.pri_latency[0] &&
+			    dev_priv->wm.spr_latency[0] &&
+			    dev_priv->wm.cur_latency[0]) {
 				dev_priv->display.update_wm = haswell_update_wm;
 				dev_priv->display.update_sprite_wm =
 					haswell_update_sprite_wm;
@@ -5292,254 +5565,6 @@
 	}
 }
 
-static void __gen6_gt_wait_for_thread_c0(struct drm_i915_private *dev_priv)
-{
-	u32 gt_thread_status_mask;
-
-	if (IS_HASWELL(dev_priv->dev))
-		gt_thread_status_mask = GEN6_GT_THREAD_STATUS_CORE_MASK_HSW;
-	else
-		gt_thread_status_mask = GEN6_GT_THREAD_STATUS_CORE_MASK;
-
-	/* w/a for a sporadic read returning 0 by waiting for the GT
-	 * thread to wake up.
-	 */
-	if (wait_for_atomic_us((I915_READ_NOTRACE(GEN6_GT_THREAD_STATUS_REG) & gt_thread_status_mask) == 0, 500))
-		DRM_ERROR("GT thread status wait timed out\n");
-}
-
-static void __gen6_gt_force_wake_reset(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE_NOTRACE(FORCEWAKE, 0);
-	POSTING_READ(ECOBUS); /* something from same cacheline, but !FORCEWAKE */
-}
-
-static void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
-{
-	if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK) & 1) == 0,
-			    FORCEWAKE_ACK_TIMEOUT_MS))
-		DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n");
-
-	I915_WRITE_NOTRACE(FORCEWAKE, 1);
-	POSTING_READ(ECOBUS); /* something from same cacheline, but !FORCEWAKE */
-
-	if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK) & 1),
-			    FORCEWAKE_ACK_TIMEOUT_MS))
-		DRM_ERROR("Timed out waiting for forcewake to ack request.\n");
-
-	/* WaRsForcewakeWaitTC0:snb */
-	__gen6_gt_wait_for_thread_c0(dev_priv);
-}
-
-static void __gen6_gt_force_wake_mt_reset(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_DISABLE(0xffff));
-	/* something from same cacheline, but !FORCEWAKE_MT */
-	POSTING_READ(ECOBUS);
-}
-
-static void __gen6_gt_force_wake_mt_get(struct drm_i915_private *dev_priv)
-{
-	u32 forcewake_ack;
-
-	if (IS_HASWELL(dev_priv->dev))
-		forcewake_ack = FORCEWAKE_ACK_HSW;
-	else
-		forcewake_ack = FORCEWAKE_MT_ACK;
-
-	if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack) & FORCEWAKE_KERNEL) == 0,
-			    FORCEWAKE_ACK_TIMEOUT_MS))
-		DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n");
-
-	I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL));
-	/* something from same cacheline, but !FORCEWAKE_MT */
-	POSTING_READ(ECOBUS);
-
-	if (wait_for_atomic((I915_READ_NOTRACE(forcewake_ack) & FORCEWAKE_KERNEL),
-			    FORCEWAKE_ACK_TIMEOUT_MS))
-		DRM_ERROR("Timed out waiting for forcewake to ack request.\n");
-
-	/* WaRsForcewakeWaitTC0:ivb,hsw */
-	__gen6_gt_wait_for_thread_c0(dev_priv);
-}
-
-/*
- * Generally this is called implicitly by the register read function. However,
- * if some sequence requires the GT to not power down then this function should
- * be called at the beginning of the sequence followed by a call to
- * gen6_gt_force_wake_put() at the end of the sequence.
- */
-void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
-{
-	unsigned long irqflags;
-
-	spin_lock_irqsave(&dev_priv->gt_lock, irqflags);
-	if (dev_priv->forcewake_count++ == 0)
-		dev_priv->gt.force_wake_get(dev_priv);
-	spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags);
-}
-
-void gen6_gt_check_fifodbg(struct drm_i915_private *dev_priv)
-{
-	u32 gtfifodbg;
-	gtfifodbg = I915_READ_NOTRACE(GTFIFODBG);
-	if (WARN(gtfifodbg & GT_FIFO_CPU_ERROR_MASK,
-	     "MMIO read or write has been dropped %x\n", gtfifodbg))
-		I915_WRITE_NOTRACE(GTFIFODBG, GT_FIFO_CPU_ERROR_MASK);
-}
-
-static void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE_NOTRACE(FORCEWAKE, 0);
-	/* something from same cacheline, but !FORCEWAKE */
-	POSTING_READ(ECOBUS);
-	gen6_gt_check_fifodbg(dev_priv);
-}
-
-static void __gen6_gt_force_wake_mt_put(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE_NOTRACE(FORCEWAKE_MT, _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL));
-	/* something from same cacheline, but !FORCEWAKE_MT */
-	POSTING_READ(ECOBUS);
-	gen6_gt_check_fifodbg(dev_priv);
-}
-
-/*
- * see gen6_gt_force_wake_get()
- */
-void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
-{
-	unsigned long irqflags;
-
-	spin_lock_irqsave(&dev_priv->gt_lock, irqflags);
-	if (--dev_priv->forcewake_count == 0)
-		dev_priv->gt.force_wake_put(dev_priv);
-	spin_unlock_irqrestore(&dev_priv->gt_lock, irqflags);
-}
-
-int __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
-{
-	int ret = 0;
-
-	if (dev_priv->gt_fifo_count < GT_FIFO_NUM_RESERVED_ENTRIES) {
-		int loop = 500;
-		u32 fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
-		while (fifo <= GT_FIFO_NUM_RESERVED_ENTRIES && loop--) {
-			udelay(10);
-			fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
-		}
-		if (WARN_ON(loop < 0 && fifo <= GT_FIFO_NUM_RESERVED_ENTRIES))
-			++ret;
-		dev_priv->gt_fifo_count = fifo;
-	}
-	dev_priv->gt_fifo_count--;
-
-	return ret;
-}
-
-static void vlv_force_wake_reset(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_DISABLE(0xffff));
-	/* something from same cacheline, but !FORCEWAKE_VLV */
-	POSTING_READ(FORCEWAKE_ACK_VLV);
-}
-
-static void vlv_force_wake_get(struct drm_i915_private *dev_priv)
-{
-	if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV) & FORCEWAKE_KERNEL) == 0,
-			    FORCEWAKE_ACK_TIMEOUT_MS))
-		DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n");
-
-	I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL));
-	I915_WRITE_NOTRACE(FORCEWAKE_MEDIA_VLV,
-			   _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL));
-
-	if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_VLV) & FORCEWAKE_KERNEL),
-			    FORCEWAKE_ACK_TIMEOUT_MS))
-		DRM_ERROR("Timed out waiting for GT to ack forcewake request.\n");
-
-	if (wait_for_atomic((I915_READ_NOTRACE(FORCEWAKE_ACK_MEDIA_VLV) &
-			     FORCEWAKE_KERNEL),
-			    FORCEWAKE_ACK_TIMEOUT_MS))
-		DRM_ERROR("Timed out waiting for media to ack forcewake request.\n");
-
-	/* WaRsForcewakeWaitTC0:vlv */
-	__gen6_gt_wait_for_thread_c0(dev_priv);
-}
-
-static void vlv_force_wake_put(struct drm_i915_private *dev_priv)
-{
-	I915_WRITE_NOTRACE(FORCEWAKE_VLV, _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL));
-	I915_WRITE_NOTRACE(FORCEWAKE_MEDIA_VLV,
-			   _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL));
-	/* The below doubles as a POSTING_READ */
-	gen6_gt_check_fifodbg(dev_priv);
-}
-
-void intel_gt_reset(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	if (IS_VALLEYVIEW(dev)) {
-		vlv_force_wake_reset(dev_priv);
-	} else if (INTEL_INFO(dev)->gen >= 6) {
-		__gen6_gt_force_wake_reset(dev_priv);
-		if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev))
-			__gen6_gt_force_wake_mt_reset(dev_priv);
-	}
-}
-
-void intel_gt_init(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	spin_lock_init(&dev_priv->gt_lock);
-
-	intel_gt_reset(dev);
-
-	if (IS_VALLEYVIEW(dev)) {
-		dev_priv->gt.force_wake_get = vlv_force_wake_get;
-		dev_priv->gt.force_wake_put = vlv_force_wake_put;
-	} else if (IS_HASWELL(dev)) {
-		dev_priv->gt.force_wake_get = __gen6_gt_force_wake_mt_get;
-		dev_priv->gt.force_wake_put = __gen6_gt_force_wake_mt_put;
-	} else if (IS_IVYBRIDGE(dev)) {
-		u32 ecobus;
-
-		/* IVB configs may use multi-threaded forcewake */
-
-		/* A small trick here - if the bios hasn't configured
-		 * MT forcewake, and if the device is in RC6, then
-		 * force_wake_mt_get will not wake the device and the
-		 * ECOBUS read will return zero. Which will be
-		 * (correctly) interpreted by the test below as MT
-		 * forcewake being disabled.
-		 */
-		mutex_lock(&dev->struct_mutex);
-		__gen6_gt_force_wake_mt_get(dev_priv);
-		ecobus = I915_READ_NOTRACE(ECOBUS);
-		__gen6_gt_force_wake_mt_put(dev_priv);
-		mutex_unlock(&dev->struct_mutex);
-
-		if (ecobus & FORCEWAKE_MT_ENABLE) {
-			dev_priv->gt.force_wake_get =
-						__gen6_gt_force_wake_mt_get;
-			dev_priv->gt.force_wake_put =
-						__gen6_gt_force_wake_mt_put;
-		} else {
-			DRM_INFO("No MT forcewake available on Ivybridge, this can result in issues\n");
-			DRM_INFO("when using vblank-synced partial screen updates.\n");
-			dev_priv->gt.force_wake_get = __gen6_gt_force_wake_get;
-			dev_priv->gt.force_wake_put = __gen6_gt_force_wake_put;
-		}
-	} else if (IS_GEN6(dev)) {
-		dev_priv->gt.force_wake_get = __gen6_gt_force_wake_get;
-		dev_priv->gt.force_wake_put = __gen6_gt_force_wake_put;
-	}
-	INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
-			  intel_gen6_powersave_work);
-}
-
 int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u8 mbox, u32 *val)
 {
 	WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));
@@ -5642,3 +5667,11 @@
 	return val;
 }
 
+void intel_pm_init(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work,
+			  intel_gen6_powersave_work);
+}
+
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 664118d..f05ccea 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -440,14 +440,14 @@
 	 * registers with the above sequence (the readback of the HEAD registers
 	 * also enforces ordering), otherwise the hw might lose the new ring
 	 * register values. */
-	I915_WRITE_START(ring, obj->gtt_offset);
+	I915_WRITE_START(ring, i915_gem_obj_ggtt_offset(obj));
 	I915_WRITE_CTL(ring,
 			((ring->size - PAGE_SIZE) & RING_NR_PAGES)
 			| RING_VALID);
 
 	/* If the head is still not zero, the ring is dead */
 	if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 &&
-		     I915_READ_START(ring) == obj->gtt_offset &&
+		     I915_READ_START(ring) == i915_gem_obj_ggtt_offset(obj) &&
 		     (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) {
 		DRM_ERROR("%s initialization failed "
 				"ctl %08x head %08x tail %08x start %08x\n",
@@ -501,11 +501,11 @@
 
 	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
 
-	ret = i915_gem_object_pin(obj, 4096, true, false);
+	ret = i915_gem_obj_ggtt_pin(obj, 4096, true, false);
 	if (ret)
 		goto err_unref;
 
-	pc->gtt_offset = obj->gtt_offset;
+	pc->gtt_offset = i915_gem_obj_ggtt_offset(obj);
 	pc->cpu_page = kmap(sg_page(obj->pages->sgl));
 	if (pc->cpu_page == NULL) {
 		ret = -ENOMEM;
@@ -836,11 +836,8 @@
 		return false;
 
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
-	if (ring->irq_refcount.gt++ == 0) {
-		dev_priv->gt_irq_mask &= ~ring->irq_enable_mask;
-		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
-		POSTING_READ(GTIMR);
-	}
+	if (ring->irq_refcount++ == 0)
+		ilk_enable_gt_irq(dev_priv, ring->irq_enable_mask);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 
 	return true;
@@ -854,11 +851,8 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
-	if (--ring->irq_refcount.gt == 0) {
-		dev_priv->gt_irq_mask |= ring->irq_enable_mask;
-		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
-		POSTING_READ(GTIMR);
-	}
+	if (--ring->irq_refcount == 0)
+		ilk_disable_gt_irq(dev_priv, ring->irq_enable_mask);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 }
 
@@ -873,7 +867,7 @@
 		return false;
 
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
-	if (ring->irq_refcount.gt++ == 0) {
+	if (ring->irq_refcount++ == 0) {
 		dev_priv->irq_mask &= ~ring->irq_enable_mask;
 		I915_WRITE(IMR, dev_priv->irq_mask);
 		POSTING_READ(IMR);
@@ -891,7 +885,7 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
-	if (--ring->irq_refcount.gt == 0) {
+	if (--ring->irq_refcount == 0) {
 		dev_priv->irq_mask |= ring->irq_enable_mask;
 		I915_WRITE(IMR, dev_priv->irq_mask);
 		POSTING_READ(IMR);
@@ -910,7 +904,7 @@
 		return false;
 
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
-	if (ring->irq_refcount.gt++ == 0) {
+	if (ring->irq_refcount++ == 0) {
 		dev_priv->irq_mask &= ~ring->irq_enable_mask;
 		I915_WRITE16(IMR, dev_priv->irq_mask);
 		POSTING_READ16(IMR);
@@ -928,7 +922,7 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
-	if (--ring->irq_refcount.gt == 0) {
+	if (--ring->irq_refcount == 0) {
 		dev_priv->irq_mask |= ring->irq_enable_mask;
 		I915_WRITE16(IMR, dev_priv->irq_mask);
 		POSTING_READ16(IMR);
@@ -968,6 +962,18 @@
 
 	I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
 	POSTING_READ(mmio);
+
+	/* Flush the TLB for this page */
+	if (INTEL_INFO(dev)->gen >= 6) {
+		u32 reg = RING_INSTPM(ring->mmio_base);
+		I915_WRITE(reg,
+			   _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
+					      INSTPM_SYNC_FLUSH));
+		if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0,
+			     1000))
+			DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
+				  ring->name);
+	}
 }
 
 static int
@@ -1021,16 +1027,14 @@
 	gen6_gt_force_wake_get(dev_priv);
 
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
-	if (ring->irq_refcount.gt++ == 0) {
+	if (ring->irq_refcount++ == 0) {
 		if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
 			I915_WRITE_IMR(ring,
 				       ~(ring->irq_enable_mask |
 					 GT_RENDER_L3_PARITY_ERROR_INTERRUPT));
 		else
 			I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
-		dev_priv->gt_irq_mask &= ~ring->irq_enable_mask;
-		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
-		POSTING_READ(GTIMR);
+		ilk_enable_gt_irq(dev_priv, ring->irq_enable_mask);
 	}
 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 
@@ -1045,15 +1049,13 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&dev_priv->irq_lock, flags);
-	if (--ring->irq_refcount.gt == 0) {
+	if (--ring->irq_refcount == 0) {
 		if (HAS_L3_GPU_CACHE(dev) && ring->id == RCS)
 			I915_WRITE_IMR(ring,
 				       ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
 		else
 			I915_WRITE_IMR(ring, ~0);
-		dev_priv->gt_irq_mask |= ring->irq_enable_mask;
-		I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
-		POSTING_READ(GTIMR);
+		ilk_disable_gt_irq(dev_priv, ring->irq_enable_mask);
 	}
 	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 
@@ -1070,14 +1072,12 @@
 	if (!dev->irq_enabled)
 		return false;
 
-	spin_lock_irqsave(&dev_priv->rps.lock, flags);
-	if (ring->irq_refcount.pm++ == 0) {
-		u32 pm_imr = I915_READ(GEN6_PMIMR);
+	spin_lock_irqsave(&dev_priv->irq_lock, flags);
+	if (ring->irq_refcount++ == 0) {
 		I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
-		I915_WRITE(GEN6_PMIMR, pm_imr & ~ring->irq_enable_mask);
-		POSTING_READ(GEN6_PMIMR);
+		snb_enable_pm_irq(dev_priv, ring->irq_enable_mask);
 	}
-	spin_unlock_irqrestore(&dev_priv->rps.lock, flags);
+	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 
 	return true;
 }
@@ -1092,14 +1092,12 @@
 	if (!dev->irq_enabled)
 		return;
 
-	spin_lock_irqsave(&dev_priv->rps.lock, flags);
-	if (--ring->irq_refcount.pm == 0) {
-		u32 pm_imr = I915_READ(GEN6_PMIMR);
+	spin_lock_irqsave(&dev_priv->irq_lock, flags);
+	if (--ring->irq_refcount == 0) {
 		I915_WRITE_IMR(ring, ~0);
-		I915_WRITE(GEN6_PMIMR, pm_imr | ring->irq_enable_mask);
-		POSTING_READ(GEN6_PMIMR);
+		snb_disable_pm_irq(dev_priv, ring->irq_enable_mask);
 	}
-	spin_unlock_irqrestore(&dev_priv->rps.lock, flags);
+	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
 }
 
 static int
@@ -1144,7 +1142,7 @@
 		intel_ring_advance(ring);
 	} else {
 		struct drm_i915_gem_object *obj = ring->private;
-		u32 cs_offset = obj->gtt_offset;
+		u32 cs_offset = i915_gem_obj_ggtt_offset(obj);
 
 		if (len > I830_BATCH_LIMIT)
 			return -ENOSPC;
@@ -1224,12 +1222,12 @@
 
 	i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
 
-	ret = i915_gem_object_pin(obj, 4096, true, false);
+	ret = i915_gem_obj_ggtt_pin(obj, 4096, true, false);
 	if (ret != 0) {
 		goto err_unref;
 	}
 
-	ring->status_page.gfx_addr = obj->gtt_offset;
+	ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
 	ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
 	if (ring->status_page.page_addr == NULL) {
 		ret = -ENOMEM;
@@ -1307,7 +1305,7 @@
 
 	ring->obj = obj;
 
-	ret = i915_gem_object_pin(obj, PAGE_SIZE, true, false);
+	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, true, false);
 	if (ret)
 		goto err_unref;
 
@@ -1316,7 +1314,7 @@
 		goto err_unpin;
 
 	ring->virtual_start =
-		ioremap_wc(dev_priv->gtt.mappable_base + obj->gtt_offset,
+		ioremap_wc(dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj),
 			   ring->size);
 	if (ring->virtual_start == NULL) {
 		DRM_ERROR("Failed to map ringbuffer.\n");
@@ -1594,6 +1592,8 @@
 	if (INTEL_INFO(ring->dev)->gen >= 6) {
 		I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
 		I915_WRITE(RING_SYNC_1(ring->mmio_base), 0);
+		if (HAS_VEBOX(ring->dev))
+			I915_WRITE(RING_SYNC_2(ring->mmio_base), 0);
 	}
 
 	ring->set_seqno(ring, seqno);
@@ -1828,7 +1828,7 @@
 			return -ENOMEM;
 		}
 
-		ret = i915_gem_object_pin(obj, 0, true, false);
+		ret = i915_gem_obj_ggtt_pin(obj, 0, true, false);
 		if (ret != 0) {
 			drm_gem_object_unreference(&obj->base);
 			DRM_ERROR("Failed to ping batch bo\n");
@@ -2008,8 +2008,7 @@
 	ring->add_request = gen6_add_request;
 	ring->get_seqno = gen6_ring_get_seqno;
 	ring->set_seqno = ring_set_seqno;
-	ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT |
-		PM_VEBOX_CS_ERROR_INTERRUPT;
+	ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
 	ring->irq_get = hsw_vebox_get_irq;
 	ring->irq_put = hsw_vebox_put_irq;
 	ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 799f04c..432ad53 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -33,11 +33,12 @@
 #define I915_READ_IMR(ring) I915_READ(RING_IMR((ring)->mmio_base))
 #define I915_WRITE_IMR(ring, val) I915_WRITE(RING_IMR((ring)->mmio_base), val)
 
-#define I915_READ_NOPID(ring) I915_READ(RING_NOPID((ring)->mmio_base))
-#define I915_READ_SYNC_0(ring) I915_READ(RING_SYNC_0((ring)->mmio_base))
-#define I915_READ_SYNC_1(ring) I915_READ(RING_SYNC_1((ring)->mmio_base))
-
-enum intel_ring_hangcheck_action { wait, active, kick, hung };
+enum intel_ring_hangcheck_action {
+	HANGCHECK_WAIT,
+	HANGCHECK_ACTIVE,
+	HANGCHECK_KICK,
+	HANGCHECK_HUNG,
+};
 
 struct intel_ring_hangcheck {
 	bool deadlock;
@@ -78,10 +79,7 @@
 	 */
 	u32		last_retired_head;
 
-	struct {
-		u32	gt; /*  protected by dev_priv->irq_lock */
-		u32	pm; /*  protected by dev_priv->rps.lock (sucks) */
-	} irq_refcount;
+	unsigned irq_refcount; /* protected by dev_priv->irq_lock */
 	u32		irq_enable_mask;	/* bitmask to enable ring interrupt */
 	u32		trace_irq_seqno;
 	u32		sync_seqno[I915_NUM_RINGS-1];
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index 2628d56..317e058 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -202,15 +202,14 @@
 	u32	cur_dot_crawl,	max_dot_crawl;
 };
 
-static struct intel_sdvo *to_intel_sdvo(struct drm_encoder *encoder)
+static struct intel_sdvo *to_sdvo(struct intel_encoder *encoder)
 {
-	return container_of(encoder, struct intel_sdvo, base.base);
+	return container_of(encoder, struct intel_sdvo, base);
 }
 
 static struct intel_sdvo *intel_attached_sdvo(struct drm_connector *connector)
 {
-	return container_of(intel_attached_encoder(connector),
-			    struct intel_sdvo, base);
+	return to_sdvo(intel_attached_encoder(connector));
 }
 
 static struct intel_sdvo_connector *to_intel_sdvo_connector(struct drm_connector *connector)
@@ -539,7 +538,8 @@
 				  &status))
 		goto log_fail;
 
-	while (status == SDVO_CMD_STATUS_PENDING && --retry) {
+	while ((status == SDVO_CMD_STATUS_PENDING ||
+			status == SDVO_CMD_STATUS_TARGET_NOT_SPECIFIED) && --retry) {
 		if (retry < 10)
 			msleep(15);
 		else
@@ -964,30 +964,32 @@
 static bool intel_sdvo_set_avi_infoframe(struct intel_sdvo *intel_sdvo,
 					 const struct drm_display_mode *adjusted_mode)
 {
-	struct dip_infoframe avi_if = {
-		.type = DIP_TYPE_AVI,
-		.ver = DIP_VERSION_AVI,
-		.len = DIP_LEN_AVI,
-	};
-	uint8_t sdvo_data[4 + sizeof(avi_if.body.avi)];
-	struct intel_crtc *intel_crtc = to_intel_crtc(intel_sdvo->base.base.crtc);
+	uint8_t sdvo_data[HDMI_INFOFRAME_SIZE(AVI)];
+	struct drm_crtc *crtc = intel_sdvo->base.base.crtc;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	union hdmi_infoframe frame;
+	int ret;
+	ssize_t len;
+
+	ret = drm_hdmi_avi_infoframe_from_display_mode(&frame.avi,
+						       adjusted_mode);
+	if (ret < 0) {
+		DRM_ERROR("couldn't fill AVI infoframe\n");
+		return false;
+	}
 
 	if (intel_sdvo->rgb_quant_range_selectable) {
 		if (intel_crtc->config.limited_color_range)
-			avi_if.body.avi.ITC_EC_Q_SC |= DIP_AVI_RGB_QUANT_RANGE_LIMITED;
+			frame.avi.quantization_range =
+				HDMI_QUANTIZATION_RANGE_LIMITED;
 		else
-			avi_if.body.avi.ITC_EC_Q_SC |= DIP_AVI_RGB_QUANT_RANGE_FULL;
+			frame.avi.quantization_range =
+				HDMI_QUANTIZATION_RANGE_FULL;
 	}
 
-	avi_if.body.avi.VIC = drm_match_cea_mode(adjusted_mode);
-
-	intel_dip_infoframe_csum(&avi_if);
-
-	/* sdvo spec says that the ecc is handled by the hw, and it looks like
-	 * we must not send the ecc field, either. */
-	memcpy(sdvo_data, &avi_if, 3);
-	sdvo_data[3] = avi_if.checksum;
-	memcpy(&sdvo_data[4], &avi_if.body, sizeof(avi_if.body.avi));
+	len = hdmi_infoframe_pack(&frame, sdvo_data, sizeof(sdvo_data));
+	if (len < 0)
+		return false;
 
 	return intel_sdvo_write_infoframe(intel_sdvo, SDVO_HBUF_INDEX_AVI_IF,
 					  SDVO_HBUF_TX_VSYNC,
@@ -1084,7 +1086,7 @@
 static bool intel_sdvo_compute_config(struct intel_encoder *encoder,
 				      struct intel_crtc_config *pipe_config)
 {
-	struct intel_sdvo *intel_sdvo = to_intel_sdvo(&encoder->base);
+	struct intel_sdvo *intel_sdvo = to_sdvo(encoder);
 	struct drm_display_mode *adjusted_mode = &pipe_config->adjusted_mode;
 	struct drm_display_mode *mode = &pipe_config->requested_mode;
 
@@ -1154,7 +1156,7 @@
 	struct drm_display_mode *adjusted_mode =
 		&intel_crtc->config.adjusted_mode;
 	struct drm_display_mode *mode = &intel_crtc->config.requested_mode;
-	struct intel_sdvo *intel_sdvo = to_intel_sdvo(&intel_encoder->base);
+	struct intel_sdvo *intel_sdvo = to_sdvo(intel_encoder);
 	u32 sdvox;
 	struct intel_sdvo_in_out_map in_out;
 	struct intel_sdvo_dtd input_dtd, output_dtd;
@@ -1292,7 +1294,7 @@
 {
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_sdvo *intel_sdvo = to_intel_sdvo(&encoder->base);
+	struct intel_sdvo *intel_sdvo = to_sdvo(encoder);
 	u16 active_outputs = 0;
 	u32 tmp;
 
@@ -1315,7 +1317,7 @@
 {
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_sdvo *intel_sdvo = to_intel_sdvo(&encoder->base);
+	struct intel_sdvo *intel_sdvo = to_sdvo(encoder);
 	struct intel_sdvo_dtd dtd;
 	int encoder_pixel_multiplier = 0;
 	u32 flags = 0, sdvox;
@@ -1357,22 +1359,21 @@
 	}
 
 	/* Cross check the port pixel multiplier with the sdvo encoder state. */
-	intel_sdvo_get_value(intel_sdvo, SDVO_CMD_GET_CLOCK_RATE_MULT, &val, 1);
-	switch (val) {
-	case SDVO_CLOCK_RATE_MULT_1X:
-		encoder_pixel_multiplier = 1;
-		break;
-	case SDVO_CLOCK_RATE_MULT_2X:
-		encoder_pixel_multiplier = 2;
-		break;
-	case SDVO_CLOCK_RATE_MULT_4X:
-		encoder_pixel_multiplier = 4;
-		break;
+	if (intel_sdvo_get_value(intel_sdvo, SDVO_CMD_GET_CLOCK_RATE_MULT,
+				 &val, 1)) {
+		switch (val) {
+		case SDVO_CLOCK_RATE_MULT_1X:
+			encoder_pixel_multiplier = 1;
+			break;
+		case SDVO_CLOCK_RATE_MULT_2X:
+			encoder_pixel_multiplier = 2;
+			break;
+		case SDVO_CLOCK_RATE_MULT_4X:
+			encoder_pixel_multiplier = 4;
+			break;
+		}
 	}
 
-	if(HAS_PCH_SPLIT(dev))
-		return; /* no pixel multiplier readout support yet */
-
 	WARN(encoder_pixel_multiplier != pipe_config->pixel_multiplier,
 	     "SDVO pixel multiplier mismatch, port: %i, encoder: %i\n",
 	     pipe_config->pixel_multiplier, encoder_pixel_multiplier);
@@ -1381,7 +1382,7 @@
 static void intel_disable_sdvo(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = encoder->base.dev->dev_private;
-	struct intel_sdvo *intel_sdvo = to_intel_sdvo(&encoder->base);
+	struct intel_sdvo *intel_sdvo = to_sdvo(encoder);
 	u32 temp;
 
 	intel_sdvo_set_active_outputs(intel_sdvo, 0);
@@ -1423,7 +1424,7 @@
 {
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_sdvo *intel_sdvo = to_intel_sdvo(&encoder->base);
+	struct intel_sdvo *intel_sdvo = to_sdvo(encoder);
 	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc);
 	u32 temp;
 	bool input1, input2;
@@ -1584,7 +1585,7 @@
 
 static void intel_sdvo_enable_hotplug(struct intel_encoder *encoder)
 {
-	struct intel_sdvo *intel_sdvo = to_intel_sdvo(&encoder->base);
+	struct intel_sdvo *intel_sdvo = to_sdvo(encoder);
 
 	intel_sdvo_write_cmd(intel_sdvo, SDVO_CMD_SET_ACTIVE_HOT_PLUG,
 			&intel_sdvo->hotplug_active, 2);
@@ -1697,6 +1698,9 @@
 	struct intel_sdvo_connector *intel_sdvo_connector = to_intel_sdvo_connector(connector);
 	enum drm_connector_status ret;
 
+	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
+		      connector->base.id, drm_get_connector_name(connector));
+
 	if (!intel_sdvo_get_value(intel_sdvo,
 				  SDVO_CMD_GET_ATTACHED_DISPLAYS,
 				  &response, 2))
@@ -2188,7 +2192,7 @@
 
 static void intel_sdvo_enc_destroy(struct drm_encoder *encoder)
 {
-	struct intel_sdvo *intel_sdvo = to_intel_sdvo(encoder);
+	struct intel_sdvo *intel_sdvo = to_sdvo(to_intel_encoder(encoder));
 
 	if (intel_sdvo->sdvo_lvds_fixed_mode != NULL)
 		drm_mode_destroy(encoder->dev,
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 1fa5612..78b621c 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -38,7 +38,8 @@
 #include "i915_drv.h"
 
 static void
-vlv_update_plane(struct drm_plane *dplane, struct drm_framebuffer *fb,
+vlv_update_plane(struct drm_plane *dplane, struct drm_crtc *crtc,
+		 struct drm_framebuffer *fb,
 		 struct drm_i915_gem_object *obj, int crtc_x, int crtc_y,
 		 unsigned int crtc_w, unsigned int crtc_h,
 		 uint32_t x, uint32_t y,
@@ -108,14 +109,15 @@
 
 	sprctl |= SP_ENABLE;
 
+	intel_update_sprite_watermarks(dplane, crtc, src_w, pixel_size, true,
+				       src_w != crtc_w || src_h != crtc_h);
+
 	/* Sizes are 0 based */
 	src_w--;
 	src_h--;
 	crtc_w--;
 	crtc_h--;
 
-	intel_update_sprite_watermarks(dev, pipe, crtc_w, pixel_size, true);
-
 	I915_WRITE(SPSTRIDE(pipe, plane), fb->pitches[0]);
 	I915_WRITE(SPPOS(pipe, plane), (crtc_y << 16) | crtc_x);
 
@@ -133,13 +135,13 @@
 
 	I915_WRITE(SPSIZE(pipe, plane), (crtc_h << 16) | crtc_w);
 	I915_WRITE(SPCNTR(pipe, plane), sprctl);
-	I915_MODIFY_DISPBASE(SPSURF(pipe, plane), obj->gtt_offset +
+	I915_MODIFY_DISPBASE(SPSURF(pipe, plane), i915_gem_obj_ggtt_offset(obj) +
 			     sprsurf_offset);
 	POSTING_READ(SPSURF(pipe, plane));
 }
 
 static void
-vlv_disable_plane(struct drm_plane *dplane)
+vlv_disable_plane(struct drm_plane *dplane, struct drm_crtc *crtc)
 {
 	struct drm_device *dev = dplane->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -152,6 +154,8 @@
 	/* Activate double buffered register update */
 	I915_MODIFY_DISPBASE(SPSURF(pipe, plane), 0);
 	POSTING_READ(SPSURF(pipe, plane));
+
+	intel_update_sprite_watermarks(dplane, crtc, 0, 0, false, false);
 }
 
 static int
@@ -206,7 +210,8 @@
 }
 
 static void
-ivb_update_plane(struct drm_plane *plane, struct drm_framebuffer *fb,
+ivb_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
+		 struct drm_framebuffer *fb,
 		 struct drm_i915_gem_object *obj, int crtc_x, int crtc_y,
 		 unsigned int crtc_w, unsigned int crtc_h,
 		 uint32_t x, uint32_t y,
@@ -262,14 +267,15 @@
 	if (IS_HASWELL(dev))
 		sprctl |= SPRITE_PIPE_CSC_ENABLE;
 
+	intel_update_sprite_watermarks(plane, crtc, src_w, pixel_size, true,
+				       src_w != crtc_w || src_h != crtc_h);
+
 	/* Sizes are 0 based */
 	src_w--;
 	src_h--;
 	crtc_w--;
 	crtc_h--;
 
-	intel_update_sprite_watermarks(dev, pipe, crtc_w, pixel_size, true);
-
 	/*
 	 * IVB workaround: must disable low power watermarks for at least
 	 * one frame before enabling scaling.  LP watermarks can be re-enabled
@@ -308,7 +314,8 @@
 	if (intel_plane->can_scale)
 		I915_WRITE(SPRSCALE(pipe), sprscale);
 	I915_WRITE(SPRCTL(pipe), sprctl);
-	I915_MODIFY_DISPBASE(SPRSURF(pipe), obj->gtt_offset + sprsurf_offset);
+	I915_MODIFY_DISPBASE(SPRSURF(pipe),
+			     i915_gem_obj_ggtt_offset(obj) + sprsurf_offset);
 	POSTING_READ(SPRSURF(pipe));
 
 	/* potentially re-enable LP watermarks */
@@ -317,7 +324,7 @@
 }
 
 static void
-ivb_disable_plane(struct drm_plane *plane)
+ivb_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc)
 {
 	struct drm_device *dev = plane->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -335,7 +342,7 @@
 
 	dev_priv->sprite_scaling_enabled &= ~(1 << pipe);
 
-	intel_update_sprite_watermarks(dev, pipe, 0, 0, false);
+	intel_update_sprite_watermarks(plane, crtc, 0, 0, false, false);
 
 	/* potentially re-enable LP watermarks */
 	if (scaling_was_enabled && !dev_priv->sprite_scaling_enabled)
@@ -397,7 +404,8 @@
 }
 
 static void
-ilk_update_plane(struct drm_plane *plane, struct drm_framebuffer *fb,
+ilk_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
+		 struct drm_framebuffer *fb,
 		 struct drm_i915_gem_object *obj, int crtc_x, int crtc_y,
 		 unsigned int crtc_w, unsigned int crtc_h,
 		 uint32_t x, uint32_t y,
@@ -449,14 +457,15 @@
 		dvscntr |= DVS_TRICKLE_FEED_DISABLE; /* must disable */
 	dvscntr |= DVS_ENABLE;
 
+	intel_update_sprite_watermarks(plane, crtc, src_w, pixel_size, true,
+				       src_w != crtc_w || src_h != crtc_h);
+
 	/* Sizes are 0 based */
 	src_w--;
 	src_h--;
 	crtc_w--;
 	crtc_h--;
 
-	intel_update_sprite_watermarks(dev, pipe, crtc_w, pixel_size, true);
-
 	dvsscale = 0;
 	if (IS_GEN5(dev) || crtc_w != src_w || crtc_h != src_h)
 		dvsscale = DVS_SCALE_ENABLE | (src_w << 16) | src_h;
@@ -478,12 +487,13 @@
 	I915_WRITE(DVSSIZE(pipe), (crtc_h << 16) | crtc_w);
 	I915_WRITE(DVSSCALE(pipe), dvsscale);
 	I915_WRITE(DVSCNTR(pipe), dvscntr);
-	I915_MODIFY_DISPBASE(DVSSURF(pipe), obj->gtt_offset + dvssurf_offset);
+	I915_MODIFY_DISPBASE(DVSSURF(pipe),
+			     i915_gem_obj_ggtt_offset(obj) + dvssurf_offset);
 	POSTING_READ(DVSSURF(pipe));
 }
 
 static void
-ilk_disable_plane(struct drm_plane *plane)
+ilk_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc)
 {
 	struct drm_device *dev = plane->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -496,6 +506,8 @@
 	/* Flush double buffered register updates */
 	I915_MODIFY_DISPBASE(DVSSURF(pipe), 0);
 	POSTING_READ(DVSSURF(pipe));
+
+	intel_update_sprite_watermarks(plane, crtc, 0, 0, false, false);
 }
 
 static void
@@ -818,11 +830,11 @@
 		intel_enable_primary(crtc);
 
 	if (visible)
-		intel_plane->update_plane(plane, fb, obj,
+		intel_plane->update_plane(plane, crtc, fb, obj,
 					  crtc_x, crtc_y, crtc_w, crtc_h,
 					  src_x, src_y, src_w, src_h);
 	else
-		intel_plane->disable_plane(plane);
+		intel_plane->disable_plane(plane, crtc);
 
 	if (disable_primary)
 		intel_disable_primary(crtc);
@@ -855,9 +867,14 @@
 	struct intel_plane *intel_plane = to_intel_plane(plane);
 	int ret = 0;
 
-	if (plane->crtc)
-		intel_enable_primary(plane->crtc);
-	intel_plane->disable_plane(plane);
+	if (!plane->fb)
+		return 0;
+
+	if (WARN_ON(!plane->crtc))
+		return -EINVAL;
+
+	intel_enable_primary(plane->crtc);
+	intel_plane->disable_plane(plane, plane->crtc);
 
 	if (!intel_plane->obj)
 		goto out;
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index 39debd8..f2c6d79 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -823,16 +823,14 @@
 	},
 };
 
-static struct intel_tv *enc_to_intel_tv(struct drm_encoder *encoder)
+static struct intel_tv *enc_to_tv(struct intel_encoder *encoder)
 {
-	return container_of(encoder, struct intel_tv, base.base);
+	return container_of(encoder, struct intel_tv, base);
 }
 
 static struct intel_tv *intel_attached_tv(struct drm_connector *connector)
 {
-	return container_of(intel_attached_encoder(connector),
-			    struct intel_tv,
-			    base);
+	return enc_to_tv(intel_attached_encoder(connector));
 }
 
 static bool
@@ -908,7 +906,7 @@
 intel_tv_compute_config(struct intel_encoder *encoder,
 			struct intel_crtc_config *pipe_config)
 {
-	struct intel_tv *intel_tv = enc_to_intel_tv(&encoder->base);
+	struct intel_tv *intel_tv = enc_to_tv(encoder);
 	const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv);
 
 	if (!tv_mode)
@@ -921,15 +919,12 @@
 	return true;
 }
 
-static void
-intel_tv_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
-		  struct drm_display_mode *adjusted_mode)
+static void intel_tv_mode_set(struct intel_encoder *encoder)
 {
-	struct drm_device *dev = encoder->dev;
+	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_crtc *crtc = encoder->crtc;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct intel_tv *intel_tv = enc_to_intel_tv(encoder);
+	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc);
+	struct intel_tv *intel_tv = enc_to_tv(encoder);
 	const struct tv_mode *tv_mode = intel_tv_mode_find(intel_tv);
 	u32 tv_ctl;
 	u32 hctl1, hctl2, hctl3;
@@ -1305,6 +1300,10 @@
 	struct intel_tv *intel_tv = intel_attached_tv(connector);
 	int type;
 
+	DRM_DEBUG_KMS("[CONNECTOR:%d:%s] force=%d\n",
+		      connector->base.id, drm_get_connector_name(connector),
+		      force);
+
 	mode = reported_modes[0];
 
 	if (force) {
@@ -1483,10 +1482,6 @@
 	return ret;
 }
 
-static const struct drm_encoder_helper_funcs intel_tv_helper_funcs = {
-	.mode_set = intel_tv_mode_set,
-};
-
 static const struct drm_connector_funcs intel_tv_connector_funcs = {
 	.dpms = intel_connector_dpms,
 	.detect = intel_tv_detect,
@@ -1619,6 +1614,7 @@
 			 DRM_MODE_ENCODER_TVDAC);
 
 	intel_encoder->compute_config = intel_tv_compute_config;
+	intel_encoder->mode_set = intel_tv_mode_set;
 	intel_encoder->enable = intel_enable_tv;
 	intel_encoder->disable = intel_disable_tv;
 	intel_encoder->get_hw_state = intel_tv_get_hw_state;
@@ -1640,7 +1636,6 @@
 
 	intel_tv->tv_format = tv_modes[initial_mode].name;
 
-	drm_encoder_helper_add(&intel_encoder->base, &intel_tv_helper_funcs);
 	drm_connector_helper_add(connector, &intel_tv_connector_helper_funcs);
 	connector->interlace_allowed = false;
 	connector->doublescan_allowed = false;
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
new file mode 100644
index 0000000..8f5bc86
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -0,0 +1,595 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "i915_drv.h"
+#include "intel_drv.h"
+
+#define FORCEWAKE_ACK_TIMEOUT_MS 2
+
+#define __raw_i915_read8(dev_priv__, reg__) readb((dev_priv__)->regs + (reg__))
+#define __raw_i915_write8(dev_priv__, reg__, val__) writeb(val__, (dev_priv__)->regs + (reg__))
+
+#define __raw_i915_read16(dev_priv__, reg__) readw((dev_priv__)->regs + (reg__))
+#define __raw_i915_write16(dev_priv__, reg__, val__) writew(val__, (dev_priv__)->regs + (reg__))
+
+#define __raw_i915_read32(dev_priv__, reg__) readl((dev_priv__)->regs + (reg__))
+#define __raw_i915_write32(dev_priv__, reg__, val__) writel(val__, (dev_priv__)->regs + (reg__))
+
+#define __raw_i915_read64(dev_priv__, reg__) readq((dev_priv__)->regs + (reg__))
+#define __raw_i915_write64(dev_priv__, reg__, val__) writeq(val__, (dev_priv__)->regs + (reg__))
+
+#define __raw_posting_read(dev_priv__, reg__) (void)__raw_i915_read32(dev_priv__, reg__)
+
+
+static void __gen6_gt_wait_for_thread_c0(struct drm_i915_private *dev_priv)
+{
+	u32 gt_thread_status_mask;
+
+	if (IS_HASWELL(dev_priv->dev))
+		gt_thread_status_mask = GEN6_GT_THREAD_STATUS_CORE_MASK_HSW;
+	else
+		gt_thread_status_mask = GEN6_GT_THREAD_STATUS_CORE_MASK;
+
+	/* w/a for a sporadic read returning 0 by waiting for the GT
+	 * thread to wake up.
+	 */
+	if (wait_for_atomic_us((__raw_i915_read32(dev_priv, GEN6_GT_THREAD_STATUS_REG) & gt_thread_status_mask) == 0, 500))
+		DRM_ERROR("GT thread status wait timed out\n");
+}
+
+static void __gen6_gt_force_wake_reset(struct drm_i915_private *dev_priv)
+{
+	__raw_i915_write32(dev_priv, FORCEWAKE, 0);
+	/* something from same cacheline, but !FORCEWAKE */
+	__raw_posting_read(dev_priv, ECOBUS);
+}
+
+static void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
+{
+	if (wait_for_atomic((__raw_i915_read32(dev_priv, FORCEWAKE_ACK) & 1) == 0,
+			    FORCEWAKE_ACK_TIMEOUT_MS))
+		DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n");
+
+	__raw_i915_write32(dev_priv, FORCEWAKE, 1);
+	/* something from same cacheline, but !FORCEWAKE */
+	__raw_posting_read(dev_priv, ECOBUS);
+
+	if (wait_for_atomic((__raw_i915_read32(dev_priv, FORCEWAKE_ACK) & 1),
+			    FORCEWAKE_ACK_TIMEOUT_MS))
+		DRM_ERROR("Timed out waiting for forcewake to ack request.\n");
+
+	/* WaRsForcewakeWaitTC0:snb */
+	__gen6_gt_wait_for_thread_c0(dev_priv);
+}
+
+static void __gen6_gt_force_wake_mt_reset(struct drm_i915_private *dev_priv)
+{
+	__raw_i915_write32(dev_priv, FORCEWAKE_MT, _MASKED_BIT_DISABLE(0xffff));
+	/* something from same cacheline, but !FORCEWAKE_MT */
+	__raw_posting_read(dev_priv, ECOBUS);
+}
+
+static void __gen6_gt_force_wake_mt_get(struct drm_i915_private *dev_priv)
+{
+	u32 forcewake_ack;
+
+	if (IS_HASWELL(dev_priv->dev))
+		forcewake_ack = FORCEWAKE_ACK_HSW;
+	else
+		forcewake_ack = FORCEWAKE_MT_ACK;
+
+	if (wait_for_atomic((__raw_i915_read32(dev_priv, forcewake_ack) & FORCEWAKE_KERNEL) == 0,
+			    FORCEWAKE_ACK_TIMEOUT_MS))
+		DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n");
+
+	__raw_i915_write32(dev_priv, FORCEWAKE_MT,
+			   _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL));
+	/* something from same cacheline, but !FORCEWAKE_MT */
+	__raw_posting_read(dev_priv, ECOBUS);
+
+	if (wait_for_atomic((__raw_i915_read32(dev_priv, forcewake_ack) & FORCEWAKE_KERNEL),
+			    FORCEWAKE_ACK_TIMEOUT_MS))
+		DRM_ERROR("Timed out waiting for forcewake to ack request.\n");
+
+	/* WaRsForcewakeWaitTC0:ivb,hsw */
+	__gen6_gt_wait_for_thread_c0(dev_priv);
+}
+
+static void gen6_gt_check_fifodbg(struct drm_i915_private *dev_priv)
+{
+	u32 gtfifodbg;
+
+	gtfifodbg = __raw_i915_read32(dev_priv, GTFIFODBG);
+	if (WARN(gtfifodbg & GT_FIFO_CPU_ERROR_MASK,
+	     "MMIO read or write has been dropped %x\n", gtfifodbg))
+		__raw_i915_write32(dev_priv, GTFIFODBG, GT_FIFO_CPU_ERROR_MASK);
+}
+
+static void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
+{
+	__raw_i915_write32(dev_priv, FORCEWAKE, 0);
+	/* something from same cacheline, but !FORCEWAKE */
+	__raw_posting_read(dev_priv, ECOBUS);
+	gen6_gt_check_fifodbg(dev_priv);
+}
+
+static void __gen6_gt_force_wake_mt_put(struct drm_i915_private *dev_priv)
+{
+	__raw_i915_write32(dev_priv, FORCEWAKE_MT,
+			   _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL));
+	/* something from same cacheline, but !FORCEWAKE_MT */
+	__raw_posting_read(dev_priv, ECOBUS);
+	gen6_gt_check_fifodbg(dev_priv);
+}
+
+static int __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
+{
+	int ret = 0;
+
+	if (dev_priv->uncore.fifo_count < GT_FIFO_NUM_RESERVED_ENTRIES) {
+		int loop = 500;
+		u32 fifo = __raw_i915_read32(dev_priv, GT_FIFO_FREE_ENTRIES);
+		while (fifo <= GT_FIFO_NUM_RESERVED_ENTRIES && loop--) {
+			udelay(10);
+			fifo = __raw_i915_read32(dev_priv, GT_FIFO_FREE_ENTRIES);
+		}
+		if (WARN_ON(loop < 0 && fifo <= GT_FIFO_NUM_RESERVED_ENTRIES))
+			++ret;
+		dev_priv->uncore.fifo_count = fifo;
+	}
+	dev_priv->uncore.fifo_count--;
+
+	return ret;
+}
+
+static void vlv_force_wake_reset(struct drm_i915_private *dev_priv)
+{
+	__raw_i915_write32(dev_priv, FORCEWAKE_VLV,
+			   _MASKED_BIT_DISABLE(0xffff));
+	/* something from same cacheline, but !FORCEWAKE_VLV */
+	__raw_posting_read(dev_priv, FORCEWAKE_ACK_VLV);
+}
+
+static void vlv_force_wake_get(struct drm_i915_private *dev_priv)
+{
+	if (wait_for_atomic((__raw_i915_read32(dev_priv, FORCEWAKE_ACK_VLV) & FORCEWAKE_KERNEL) == 0,
+			    FORCEWAKE_ACK_TIMEOUT_MS))
+		DRM_ERROR("Timed out waiting for forcewake old ack to clear.\n");
+
+	__raw_i915_write32(dev_priv, FORCEWAKE_VLV,
+			   _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL));
+	__raw_i915_write32(dev_priv, FORCEWAKE_MEDIA_VLV,
+			   _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL));
+
+	if (wait_for_atomic((__raw_i915_read32(dev_priv, FORCEWAKE_ACK_VLV) & FORCEWAKE_KERNEL),
+			    FORCEWAKE_ACK_TIMEOUT_MS))
+		DRM_ERROR("Timed out waiting for GT to ack forcewake request.\n");
+
+	if (wait_for_atomic((__raw_i915_read32(dev_priv, FORCEWAKE_ACK_MEDIA_VLV) &
+			     FORCEWAKE_KERNEL),
+			    FORCEWAKE_ACK_TIMEOUT_MS))
+		DRM_ERROR("Timed out waiting for media to ack forcewake request.\n");
+
+	/* WaRsForcewakeWaitTC0:vlv */
+	__gen6_gt_wait_for_thread_c0(dev_priv);
+}
+
+static void vlv_force_wake_put(struct drm_i915_private *dev_priv)
+{
+	__raw_i915_write32(dev_priv, FORCEWAKE_VLV,
+			   _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL));
+	__raw_i915_write32(dev_priv, FORCEWAKE_MEDIA_VLV,
+			   _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL));
+	/* The below doubles as a POSTING_READ */
+	gen6_gt_check_fifodbg(dev_priv);
+}
+
+void intel_uncore_early_sanitize(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (HAS_FPGA_DBG_UNCLAIMED(dev))
+		__raw_i915_write32(dev_priv, FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
+}
+
+void intel_uncore_init(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (IS_VALLEYVIEW(dev)) {
+		dev_priv->uncore.funcs.force_wake_get = vlv_force_wake_get;
+		dev_priv->uncore.funcs.force_wake_put = vlv_force_wake_put;
+	} else if (IS_HASWELL(dev)) {
+		dev_priv->uncore.funcs.force_wake_get = __gen6_gt_force_wake_mt_get;
+		dev_priv->uncore.funcs.force_wake_put = __gen6_gt_force_wake_mt_put;
+	} else if (IS_IVYBRIDGE(dev)) {
+		u32 ecobus;
+
+		/* IVB configs may use multi-threaded forcewake */
+
+		/* A small trick here - if the bios hasn't configured
+		 * MT forcewake, and if the device is in RC6, then
+		 * force_wake_mt_get will not wake the device and the
+		 * ECOBUS read will return zero. Which will be
+		 * (correctly) interpreted by the test below as MT
+		 * forcewake being disabled.
+		 */
+		mutex_lock(&dev->struct_mutex);
+		__gen6_gt_force_wake_mt_get(dev_priv);
+		ecobus = __raw_i915_read32(dev_priv, ECOBUS);
+		__gen6_gt_force_wake_mt_put(dev_priv);
+		mutex_unlock(&dev->struct_mutex);
+
+		if (ecobus & FORCEWAKE_MT_ENABLE) {
+			dev_priv->uncore.funcs.force_wake_get =
+				__gen6_gt_force_wake_mt_get;
+			dev_priv->uncore.funcs.force_wake_put =
+				__gen6_gt_force_wake_mt_put;
+		} else {
+			DRM_INFO("No MT forcewake available on Ivybridge, this can result in issues\n");
+			DRM_INFO("when using vblank-synced partial screen updates.\n");
+			dev_priv->uncore.funcs.force_wake_get =
+				__gen6_gt_force_wake_get;
+			dev_priv->uncore.funcs.force_wake_put =
+				__gen6_gt_force_wake_put;
+		}
+	} else if (IS_GEN6(dev)) {
+		dev_priv->uncore.funcs.force_wake_get =
+			__gen6_gt_force_wake_get;
+		dev_priv->uncore.funcs.force_wake_put =
+			__gen6_gt_force_wake_put;
+	}
+}
+
+void intel_uncore_sanitize(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (IS_VALLEYVIEW(dev)) {
+		vlv_force_wake_reset(dev_priv);
+	} else if (INTEL_INFO(dev)->gen >= 6) {
+		__gen6_gt_force_wake_reset(dev_priv);
+		if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev))
+			__gen6_gt_force_wake_mt_reset(dev_priv);
+	}
+
+	/* BIOS often leaves RC6 enabled, but disable it for hw init */
+	intel_disable_gt_powersave(dev);
+}
+
+/*
+ * Generally this is called implicitly by the register read function. However,
+ * if some sequence requires the GT to not power down then this function should
+ * be called at the beginning of the sequence followed by a call to
+ * gen6_gt_force_wake_put() at the end of the sequence.
+ */
+void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
+{
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
+	if (dev_priv->uncore.forcewake_count++ == 0)
+		dev_priv->uncore.funcs.force_wake_get(dev_priv);
+	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
+}
+
+/*
+ * see gen6_gt_force_wake_get()
+ */
+void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
+{
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
+	if (--dev_priv->uncore.forcewake_count == 0)
+		dev_priv->uncore.funcs.force_wake_put(dev_priv);
+	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
+}
+
+/* We give fast paths for the really cool registers */
+#define NEEDS_FORCE_WAKE(dev_priv, reg) \
+	((HAS_FORCE_WAKE((dev_priv)->dev)) && \
+	 ((reg) < 0x40000) &&            \
+	 ((reg) != FORCEWAKE))
+
+static void
+ilk_dummy_write(struct drm_i915_private *dev_priv)
+{
+	/* WaIssueDummyWriteToWakeupFromRC6:ilk Issue a dummy write to wake up
+	 * the chip from rc6 before touching it for real. MI_MODE is masked,
+	 * hence harmless to write 0 into. */
+	__raw_i915_write32(dev_priv, MI_MODE, 0);
+}
+
+static void
+hsw_unclaimed_reg_clear(struct drm_i915_private *dev_priv, u32 reg)
+{
+	if (HAS_FPGA_DBG_UNCLAIMED(dev_priv->dev) &&
+	    (__raw_i915_read32(dev_priv, FPGA_DBG) & FPGA_DBG_RM_NOCLAIM)) {
+		DRM_ERROR("Unknown unclaimed register before writing to %x\n",
+			  reg);
+		__raw_i915_write32(dev_priv, FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
+	}
+}
+
+static void
+hsw_unclaimed_reg_check(struct drm_i915_private *dev_priv, u32 reg)
+{
+	if (HAS_FPGA_DBG_UNCLAIMED(dev_priv->dev) &&
+	    (__raw_i915_read32(dev_priv, FPGA_DBG) & FPGA_DBG_RM_NOCLAIM)) {
+		DRM_ERROR("Unclaimed write to %x\n", reg);
+		__raw_i915_write32(dev_priv, FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
+	}
+}
+
+#define __i915_read(x) \
+u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg, bool trace) { \
+	unsigned long irqflags; \
+	u##x val = 0; \
+	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); \
+	if (dev_priv->info->gen == 5) \
+		ilk_dummy_write(dev_priv); \
+	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
+		if (dev_priv->uncore.forcewake_count == 0) \
+			dev_priv->uncore.funcs.force_wake_get(dev_priv); \
+		val = __raw_i915_read##x(dev_priv, reg); \
+		if (dev_priv->uncore.forcewake_count == 0) \
+			dev_priv->uncore.funcs.force_wake_put(dev_priv); \
+	} else { \
+		val = __raw_i915_read##x(dev_priv, reg); \
+	} \
+	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \
+	trace_i915_reg_rw(false, reg, val, sizeof(val), trace); \
+	return val; \
+}
+
+__i915_read(8)
+__i915_read(16)
+__i915_read(32)
+__i915_read(64)
+#undef __i915_read
+
+#define __i915_write(x) \
+void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val, bool trace) { \
+	unsigned long irqflags; \
+	u32 __fifo_ret = 0; \
+	trace_i915_reg_rw(true, reg, val, sizeof(val), trace); \
+	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags); \
+	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
+		__fifo_ret = __gen6_gt_wait_for_fifo(dev_priv); \
+	} \
+	if (dev_priv->info->gen == 5) \
+		ilk_dummy_write(dev_priv); \
+	hsw_unclaimed_reg_clear(dev_priv, reg); \
+	__raw_i915_write##x(dev_priv, reg, val); \
+	if (unlikely(__fifo_ret)) { \
+		gen6_gt_check_fifodbg(dev_priv); \
+	} \
+	hsw_unclaimed_reg_check(dev_priv, reg); \
+	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \
+}
+__i915_write(8)
+__i915_write(16)
+__i915_write(32)
+__i915_write(64)
+#undef __i915_write
+
+static const struct register_whitelist {
+	uint64_t offset;
+	uint32_t size;
+	uint32_t gen_bitmask; /* support gens, 0x10 for 4, 0x30 for 4 and 5, etc. */
+} whitelist[] = {
+	{ RING_TIMESTAMP(RENDER_RING_BASE), 8, 0xF0 },
+};
+
+int i915_reg_read_ioctl(struct drm_device *dev,
+			void *data, struct drm_file *file)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_reg_read *reg = data;
+	struct register_whitelist const *entry = whitelist;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(whitelist); i++, entry++) {
+		if (entry->offset == reg->offset &&
+		    (1 << INTEL_INFO(dev)->gen & entry->gen_bitmask))
+			break;
+	}
+
+	if (i == ARRAY_SIZE(whitelist))
+		return -EINVAL;
+
+	switch (entry->size) {
+	case 8:
+		reg->val = I915_READ64(reg->offset);
+		break;
+	case 4:
+		reg->val = I915_READ(reg->offset);
+		break;
+	case 2:
+		reg->val = I915_READ16(reg->offset);
+		break;
+	case 1:
+		reg->val = I915_READ8(reg->offset);
+		break;
+	default:
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int i8xx_do_reset(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (IS_I85X(dev))
+		return -ENODEV;
+
+	I915_WRITE(D_STATE, I915_READ(D_STATE) | DSTATE_GFX_RESET_I830);
+	POSTING_READ(D_STATE);
+
+	if (IS_I830(dev) || IS_845G(dev)) {
+		I915_WRITE(DEBUG_RESET_I830,
+			   DEBUG_RESET_DISPLAY |
+			   DEBUG_RESET_RENDER |
+			   DEBUG_RESET_FULL);
+		POSTING_READ(DEBUG_RESET_I830);
+		msleep(1);
+
+		I915_WRITE(DEBUG_RESET_I830, 0);
+		POSTING_READ(DEBUG_RESET_I830);
+	}
+
+	msleep(1);
+
+	I915_WRITE(D_STATE, I915_READ(D_STATE) & ~DSTATE_GFX_RESET_I830);
+	POSTING_READ(D_STATE);
+
+	return 0;
+}
+
+static int i965_reset_complete(struct drm_device *dev)
+{
+	u8 gdrst;
+	pci_read_config_byte(dev->pdev, I965_GDRST, &gdrst);
+	return (gdrst & GRDOM_RESET_ENABLE) == 0;
+}
+
+static int i965_do_reset(struct drm_device *dev)
+{
+	int ret;
+
+	/*
+	 * Set the domains we want to reset (GRDOM/bits 2 and 3) as
+	 * well as the reset bit (GR/bit 0).  Setting the GR bit
+	 * triggers the reset; when done, the hardware will clear it.
+	 */
+	pci_write_config_byte(dev->pdev, I965_GDRST,
+			      GRDOM_RENDER | GRDOM_RESET_ENABLE);
+	ret =  wait_for(i965_reset_complete(dev), 500);
+	if (ret)
+		return ret;
+
+	/* We can't reset render&media without also resetting display ... */
+	pci_write_config_byte(dev->pdev, I965_GDRST,
+			      GRDOM_MEDIA | GRDOM_RESET_ENABLE);
+
+	ret =  wait_for(i965_reset_complete(dev), 500);
+	if (ret)
+		return ret;
+
+	pci_write_config_byte(dev->pdev, I965_GDRST, 0);
+
+	return 0;
+}
+
+static int ironlake_do_reset(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 gdrst;
+	int ret;
+
+	gdrst = I915_READ(MCHBAR_MIRROR_BASE + ILK_GDSR);
+	gdrst &= ~GRDOM_MASK;
+	I915_WRITE(MCHBAR_MIRROR_BASE + ILK_GDSR,
+		   gdrst | GRDOM_RENDER | GRDOM_RESET_ENABLE);
+	ret = wait_for(I915_READ(MCHBAR_MIRROR_BASE + ILK_GDSR) & 0x1, 500);
+	if (ret)
+		return ret;
+
+	/* We can't reset render&media without also resetting display ... */
+	gdrst = I915_READ(MCHBAR_MIRROR_BASE + ILK_GDSR);
+	gdrst &= ~GRDOM_MASK;
+	I915_WRITE(MCHBAR_MIRROR_BASE + ILK_GDSR,
+		   gdrst | GRDOM_MEDIA | GRDOM_RESET_ENABLE);
+	return wait_for(I915_READ(MCHBAR_MIRROR_BASE + ILK_GDSR) & 0x1, 500);
+}
+
+static int gen6_do_reset(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int	ret;
+	unsigned long irqflags;
+
+	/* Hold uncore.lock across reset to prevent any register access
+	 * with forcewake not set correctly
+	 */
+	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
+
+	/* Reset the chip */
+
+	/* GEN6_GDRST is not in the gt power well, no need to check
+	 * for fifo space for the write or forcewake the chip for
+	 * the read
+	 */
+	__raw_i915_write32(dev_priv, GEN6_GDRST, GEN6_GRDOM_FULL);
+
+	/* Spin waiting for the device to ack the reset request */
+	ret = wait_for((__raw_i915_read32(dev_priv, GEN6_GDRST) & GEN6_GRDOM_FULL) == 0, 500);
+
+	/* If reset with a user forcewake, try to restore, otherwise turn it off */
+	if (dev_priv->uncore.forcewake_count)
+		dev_priv->uncore.funcs.force_wake_get(dev_priv);
+	else
+		dev_priv->uncore.funcs.force_wake_put(dev_priv);
+
+	/* Restore fifo count */
+	dev_priv->uncore.fifo_count = __raw_i915_read32(dev_priv, GT_FIFO_FREE_ENTRIES);
+
+	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
+	return ret;
+}
+
+int intel_gpu_reset(struct drm_device *dev)
+{
+	switch (INTEL_INFO(dev)->gen) {
+	case 7:
+	case 6: return gen6_do_reset(dev);
+	case 5: return ironlake_do_reset(dev);
+	case 4: return i965_do_reset(dev);
+	case 2: return i8xx_do_reset(dev);
+	default: return -ENODEV;
+	}
+}
+
+void intel_uncore_clear_errors(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	/* XXX needs spinlock around caller's grouping */
+	if (HAS_FPGA_DBG_UNCLAIMED(dev))
+		__raw_i915_write32(dev_priv, FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
+}
+
+void intel_uncore_check_errors(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (HAS_FPGA_DBG_UNCLAIMED(dev) &&
+	    (__raw_i915_read32(dev_priv, FPGA_DBG) & FPGA_DBG_RM_NOCLAIM)) {
+		DRM_ERROR("Unclaimed register before interrupt\n");
+		__raw_i915_write32(dev_priv, FPGA_DBG, FPGA_DBG_RM_NOCLAIM);
+	}
+}
diff --git a/drivers/gpu/drm/mga/mga_drv.c b/drivers/gpu/drm/mga/mga_drv.c
index 17d0a63..6b1a87c 100644
--- a/drivers/gpu/drm/mga/mga_drv.c
+++ b/drivers/gpu/drm/mga/mga_drv.c
@@ -50,7 +50,6 @@
 	.unlocked_ioctl = drm_ioctl,
 	.mmap = drm_mmap,
 	.poll = drm_poll,
-	.fasync = drm_fasync,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = mga_compat_ioctl,
 #endif
@@ -59,7 +58,7 @@
 
 static struct drm_driver driver = {
 	.driver_features =
-	    DRIVER_USE_AGP | DRIVER_USE_MTRR | DRIVER_PCI_DMA |
+	    DRIVER_USE_AGP | DRIVER_PCI_DMA |
 	    DRIVER_HAVE_DMA | DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED,
 	.dev_priv_size = sizeof(drm_mga_buf_priv_t),
 	.load = mga_driver_load,
diff --git a/drivers/gpu/drm/mga/mga_drv.h b/drivers/gpu/drm/mga/mga_drv.h
index 54558a0..ca4bc54 100644
--- a/drivers/gpu/drm/mga/mga_drv.h
+++ b/drivers/gpu/drm/mga/mga_drv.h
@@ -149,7 +149,7 @@
 	unsigned int agp_size;
 } drm_mga_private_t;
 
-extern struct drm_ioctl_desc mga_ioctls[];
+extern const struct drm_ioctl_desc mga_ioctls[];
 extern int mga_max_ioctl;
 
 				/* mga_dma.c */
diff --git a/drivers/gpu/drm/mga/mga_state.c b/drivers/gpu/drm/mga/mga_state.c
index 9c14514..37cc2fb 100644
--- a/drivers/gpu/drm/mga/mga_state.c
+++ b/drivers/gpu/drm/mga/mga_state.c
@@ -1083,7 +1083,7 @@
 	return 0;
 }
 
-struct drm_ioctl_desc mga_ioctls[] = {
+const struct drm_ioctl_desc mga_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(MGA_INIT, mga_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(MGA_FLUSH, mga_dma_flush, DRM_AUTH),
 	DRM_IOCTL_DEF_DRV(MGA_RESET, mga_dma_reset, DRM_AUTH),
diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.c b/drivers/gpu/drm/mgag200/mgag200_drv.c
index 122b571c..fcce7b2 100644
--- a/drivers/gpu/drm/mgag200/mgag200_drv.c
+++ b/drivers/gpu/drm/mgag200/mgag200_drv.c
@@ -81,7 +81,6 @@
 	.unlocked_ioctl = drm_ioctl,
 	.mmap = mgag200_mmap,
 	.poll = drm_poll,
-	.fasync = drm_fasync,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = drm_compat_ioctl,
 #endif
@@ -89,7 +88,7 @@
 };
 
 static struct drm_driver driver = {
-	.driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_USE_MTRR,
+	.driver_features = DRIVER_GEM | DRIVER_MODESET,
 	.load = mgag200_driver_load,
 	.unload = mgag200_driver_unload,
 	.fops = &mgag200_driver_fops,
@@ -104,7 +103,7 @@
 	.gem_free_object = mgag200_gem_free_object,
 	.dumb_create = mgag200_dumb_create,
 	.dumb_map_offset = mgag200_dumb_mmap_offset,
-	.dumb_destroy = mgag200_dumb_destroy,
+	.dumb_destroy = drm_gem_dumb_destroy,
 };
 
 static struct pci_driver mgag200_pci_driver = {
diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.h b/drivers/gpu/drm/mgag200/mgag200_drv.h
index 12e2499..baaae193 100644
--- a/drivers/gpu/drm/mgag200/mgag200_drv.h
+++ b/drivers/gpu/drm/mgag200/mgag200_drv.h
@@ -264,9 +264,6 @@
 int mgag200_dumb_create(struct drm_file *file,
 			struct drm_device *dev,
 			struct drm_mode_create_dumb *args);
-int mgag200_dumb_destroy(struct drm_file *file,
-			 struct drm_device *dev,
-			 uint32_t handle);
 void mgag200_gem_free_object(struct drm_gem_object *obj);
 int
 mgag200_dumb_mmap_offset(struct drm_file *file,
diff --git a/drivers/gpu/drm/mgag200/mgag200_main.c b/drivers/gpu/drm/mgag200/mgag200_main.c
index 9fa5685..0f8b861 100644
--- a/drivers/gpu/drm/mgag200/mgag200_main.c
+++ b/drivers/gpu/drm/mgag200/mgag200_main.c
@@ -310,13 +310,6 @@
 	return 0;
 }
 
-int mgag200_dumb_destroy(struct drm_file *file,
-		     struct drm_device *dev,
-		     uint32_t handle)
-{
-	return drm_gem_handle_delete(file, handle);
-}
-
 int mgag200_gem_init_object(struct drm_gem_object *obj)
 {
 	BUG();
@@ -349,7 +342,7 @@
 
 static inline u64 mgag200_bo_mmap_offset(struct mgag200_bo *bo)
 {
-	return bo->bo.addr_space_offset;
+	return drm_vma_node_offset_addr(&bo->bo.vma_node);
 }
 
 int
diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
index 251784a..503a414 100644
--- a/drivers/gpu/drm/mgag200/mgag200_mode.c
+++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
@@ -29,6 +29,7 @@
 	struct mga_crtc *mga_crtc = to_mga_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
 	struct mga_device *mdev = dev->dev_private;
+	struct drm_framebuffer *fb = crtc->fb;
 	int i;
 
 	if (!crtc->enabled)
@@ -36,6 +37,28 @@
 
 	WREG8(DAC_INDEX + MGA1064_INDEX, 0);
 
+	if (fb && fb->bits_per_pixel == 16) {
+		int inc = (fb->depth == 15) ? 8 : 4;
+		u8 r, b;
+		for (i = 0; i < MGAG200_LUT_SIZE; i += inc) {
+			if (fb->depth == 16) {
+				if (i > (MGAG200_LUT_SIZE >> 1)) {
+					r = b = 0;
+				} else {
+					r = mga_crtc->lut_r[i << 1];
+					b = mga_crtc->lut_b[i << 1];
+				}
+			} else {
+				r = mga_crtc->lut_r[i];
+				b = mga_crtc->lut_b[i];
+			}
+			/* VGA registers */
+			WREG8(DAC_INDEX + MGA1064_COL_PAL, r);
+			WREG8(DAC_INDEX + MGA1064_COL_PAL, mga_crtc->lut_g[i]);
+			WREG8(DAC_INDEX + MGA1064_COL_PAL, b);
+		}
+		return;
+	}
 	for (i = 0; i < MGAG200_LUT_SIZE; i++) {
 		/* VGA registers */
 		WREG8(DAC_INDEX + MGA1064_COL_PAL, mga_crtc->lut_r[i]);
@@ -877,7 +900,7 @@
 
 	pitch = crtc->fb->pitches[0] / (crtc->fb->bits_per_pixel / 8);
 	if (crtc->fb->bits_per_pixel == 24)
-		pitch = pitch >> (4 - bppshift);
+		pitch = (pitch * 3) >> (4 - bppshift);
 	else
 		pitch = pitch >> (4 - bppshift);
 
@@ -1251,6 +1274,24 @@
 	kfree(mga_crtc);
 }
 
+static void mga_crtc_disable(struct drm_crtc *crtc)
+{
+	int ret;
+	DRM_DEBUG_KMS("\n");
+	mga_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
+	if (crtc->fb) {
+		struct mga_framebuffer *mga_fb = to_mga_framebuffer(crtc->fb);
+		struct drm_gem_object *obj = mga_fb->obj;
+		struct mgag200_bo *bo = gem_to_mga_bo(obj);
+		ret = mgag200_bo_reserve(bo, false);
+		if (ret)
+			return;
+		mgag200_bo_push_sysram(bo);
+		mgag200_bo_unreserve(bo);
+	}
+	crtc->fb = NULL;
+}
+
 /* These provide the minimum set of functions required to handle a CRTC */
 static const struct drm_crtc_funcs mga_crtc_funcs = {
 	.cursor_set = mga_crtc_cursor_set,
@@ -1261,6 +1302,7 @@
 };
 
 static const struct drm_crtc_helper_funcs mga_helper_funcs = {
+	.disable = mga_crtc_disable,
 	.dpms = mga_crtc_dpms,
 	.mode_fixup = mga_crtc_mode_fixup,
 	.mode_set = mga_crtc_mode_set,
@@ -1581,6 +1623,8 @@
 
 	drm_connector_helper_add(connector, &mga_vga_connector_helper_funcs);
 
+	drm_sysfs_connector_add(connector);
+
 	mga_connector->i2c = mgag200_i2c_create(dev);
 	if (!mga_connector->i2c)
 		DRM_ERROR("failed to add ddc bus\n");
diff --git a/drivers/gpu/drm/mgag200/mgag200_ttm.c b/drivers/gpu/drm/mgag200/mgag200_ttm.c
index 3acb2b0..07b192f 100644
--- a/drivers/gpu/drm/mgag200/mgag200_ttm.c
+++ b/drivers/gpu/drm/mgag200/mgag200_ttm.c
@@ -148,7 +148,9 @@
 
 static int mgag200_bo_verify_access(struct ttm_buffer_object *bo, struct file *filp)
 {
-	return 0;
+	struct mgag200_bo *mgabo = mgag200_bo(bo);
+
+	return drm_vma_node_verify_access(&mgabo->gem.vma_node, filp);
 }
 
 static int mgag200_ttm_io_mem_reserve(struct ttm_bo_device *bdev,
@@ -321,8 +323,8 @@
 		return ret;
 	}
 
-	mgabo->gem.driver_private = NULL;
 	mgabo->bo.bdev = &mdev->ttm.bdev;
+	mgabo->bo.bdev->dev_mapping = dev->dev_mapping;
 
 	mgag200_ttm_placement(mgabo, TTM_PL_FLAG_VRAM | TTM_PL_FLAG_SYSTEM);
 
@@ -353,6 +355,7 @@
 		bo->pin_count++;
 		if (gpu_addr)
 			*gpu_addr = mgag200_bo_gpu_offset(bo);
+		return 0;
 	}
 
 	mgag200_ttm_placement(bo, pl_flag);
diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
new file mode 100644
index 0000000..a06c19c
--- /dev/null
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -0,0 +1,34 @@
+
+config DRM_MSM
+	tristate "MSM DRM"
+	depends on DRM
+	depends on ARCH_MSM
+	depends on ARCH_MSM8960
+	select DRM_KMS_HELPER
+	select SHMEM
+	select TMPFS
+	default y
+	help
+	  DRM/KMS driver for MSM/snapdragon.
+
+config DRM_MSM_FBDEV
+	bool "Enable legacy fbdev support for MSM modesetting driver"
+	depends on DRM_MSM
+	select FB_SYS_FILLRECT
+	select FB_SYS_COPYAREA
+	select FB_SYS_IMAGEBLIT
+	select FB_SYS_FOPS
+	default y
+	help
+	  Choose this option if you have a need for the legacy fbdev
+	  support. Note that this support also provide the linux console
+	  support on top of the MSM modesetting driver.
+
+config DRM_MSM_REGISTER_LOGGING
+	bool "MSM DRM register logging"
+	depends on DRM_MSM
+	default n
+	help
+	  Compile in support for logging register reads/writes in a format
+	  that can be parsed by envytools demsm tool.  If enabled, register
+	  logging can be switched on via msm.reglog=y module param.
diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
new file mode 100644
index 0000000..e179148
--- /dev/null
+++ b/drivers/gpu/drm/msm/Makefile
@@ -0,0 +1,30 @@
+ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/msm
+ifeq (, $(findstring -W,$(EXTRA_CFLAGS)))
+	ccflags-y += -Werror
+endif
+
+msm-y := \
+	adreno/adreno_gpu.o \
+	adreno/a3xx_gpu.o \
+	hdmi/hdmi.o \
+	hdmi/hdmi_bridge.o \
+	hdmi/hdmi_connector.o \
+	hdmi/hdmi_i2c.o \
+	hdmi/hdmi_phy_8960.o \
+	hdmi/hdmi_phy_8x60.o \
+	mdp4/mdp4_crtc.o \
+	mdp4/mdp4_dtv_encoder.o \
+	mdp4/mdp4_format.o \
+	mdp4/mdp4_irq.o \
+	mdp4/mdp4_kms.o \
+	mdp4/mdp4_plane.o \
+	msm_drv.o \
+	msm_fb.o \
+	msm_gem.o \
+	msm_gem_submit.o \
+	msm_gpu.o \
+	msm_ringbuffer.o
+
+msm-$(CONFIG_DRM_MSM_FBDEV) += msm_fbdev.o
+
+obj-$(CONFIG_DRM_MSM)	+= msm.o
diff --git a/drivers/gpu/drm/msm/NOTES b/drivers/gpu/drm/msm/NOTES
new file mode 100644
index 0000000..e036f6c1
--- /dev/null
+++ b/drivers/gpu/drm/msm/NOTES
@@ -0,0 +1,69 @@
+NOTES about msm drm/kms driver:
+
+In the current snapdragon SoC's, we have (at least) 3 different
+display controller blocks at play:
+ + MDP3 - ?? seems to be what is on geeksphone peak device
+ + MDP4 - S3 (APQ8060, touchpad), S4-pro (APQ8064, nexus4 & ifc6410)
+ + MDSS - snapdragon 800
+
+(I don't have a completely clear picture on which display controller
+maps to which part #)
+
+Plus a handful of blocks around them for HDMI/DSI/etc output.
+
+And on gpu side of things:
+ + zero, one, or two 2d cores (z180)
+ + and either a2xx or a3xx 3d core.
+
+But, HDMI/DSI/etc blocks seem like they can be shared across multiple
+display controller blocks.  And I for sure don't want to have to deal
+with N different kms devices from xf86-video-freedreno.  Plus, it
+seems like we can do some clever tricks like use GPU to trigger
+pageflip after rendering completes (ie. have the kms/crtc code build
+up gpu cmdstream to update scanout and write FLUSH register after).
+
+So, the approach is one drm driver, with some modularity.  Different
+'struct msm_kms' implementations, depending on display controller.
+And one or more 'struct msm_gpu' for the various different gpu sub-
+modules.
+
+(Second part is not implemented yet.  So far this is just basic KMS
+driver, and not exposing any custom ioctls to userspace for now.)
+
+The kms module provides the plane, crtc, and encoder objects, and
+loads whatever connectors are appropriate.
+
+For MDP4, the mapping is:
+
+  plane   -> PIPE{RGBn,VGn}              \
+  crtc    -> OVLP{n} + DMA{P,S,E} (??)   |-> MDP "device"
+  encoder -> DTV/LCDC/DSI (within MDP4)  /
+  connector -> HDMI/DSI/etc              --> other device(s)
+
+Since the irq's that drm core mostly cares about are vblank/framedone,
+we'll let msm_mdp4_kms provide the irq install/uninstall/etc functions
+and treat the MDP4 block's irq as "the" irq.  Even though the connectors
+may have their own irqs which they install themselves.  For this reason
+the display controller is the "master" device.
+
+Each connector probably ends up being a separate device, just for the
+logistics of finding/mapping io region, irq, etc.  Idealy we would
+have a better way than just stashing the platform device in a global
+(ie. like DT super-node.. but I don't have any snapdragon hw yet that
+is using DT).
+
+Note that so far I've not been able to get any docs on the hw, and it
+seems that access to such docs would prevent me from working on the
+freedreno gallium driver.  So there may be some mistakes in register
+names (I had to invent a few, since no sufficient hint was given in
+the downstream android fbdev driver), bitfield sizes, etc.  My current
+state of understanding the registers is given in the envytools rnndb
+files at:
+
+  https://github.com/freedreno/envytools/tree/master/rnndb
+  (the mdp4/hdmi/dsi directories)
+
+These files are used both for a parser tool (in the same tree) to
+parse logged register reads/writes (both from downstream android fbdev
+driver, and this driver with register logging enabled), as well as to
+generate the register level headers.
diff --git a/drivers/gpu/drm/msm/adreno/a2xx.xml.h b/drivers/gpu/drm/msm/adreno/a2xx.xml.h
new file mode 100644
index 0000000..3546386
--- /dev/null
+++ b/drivers/gpu/drm/msm/adreno/a2xx.xml.h
@@ -0,0 +1,1438 @@
+#ifndef A2XX_XML
+#define A2XX_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml              (    327 bytes, from 2013-07-05 19:21:12)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2013-03-31 16:51:27)
+- /home/robclark/src/freedreno/envytools/rnndb/a2xx/a2xx.xml           (  30005 bytes, from 2013-07-19 21:30:48)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml       (   8983 bytes, from 2013-07-24 01:38:36)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml          (   9712 bytes, from 2013-05-26 15:22:37)
+- /home/robclark/src/freedreno/envytools/rnndb/a3xx/a3xx.xml           (  51415 bytes, from 2013-08-03 14:26:05)
+
+Copyright (C) 2013 by the following authors:
+- Rob Clark <robdclark@gmail.com> (robclark)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+enum a2xx_rb_dither_type {
+	DITHER_PIXEL = 0,
+	DITHER_SUBPIXEL = 1,
+};
+
+enum a2xx_colorformatx {
+	COLORX_4_4_4_4 = 0,
+	COLORX_1_5_5_5 = 1,
+	COLORX_5_6_5 = 2,
+	COLORX_8 = 3,
+	COLORX_8_8 = 4,
+	COLORX_8_8_8_8 = 5,
+	COLORX_S8_8_8_8 = 6,
+	COLORX_16_FLOAT = 7,
+	COLORX_16_16_FLOAT = 8,
+	COLORX_16_16_16_16_FLOAT = 9,
+	COLORX_32_FLOAT = 10,
+	COLORX_32_32_FLOAT = 11,
+	COLORX_32_32_32_32_FLOAT = 12,
+	COLORX_2_3_3 = 13,
+	COLORX_8_8_8 = 14,
+};
+
+enum a2xx_sq_surfaceformat {
+	FMT_1_REVERSE = 0,
+	FMT_1 = 1,
+	FMT_8 = 2,
+	FMT_1_5_5_5 = 3,
+	FMT_5_6_5 = 4,
+	FMT_6_5_5 = 5,
+	FMT_8_8_8_8 = 6,
+	FMT_2_10_10_10 = 7,
+	FMT_8_A = 8,
+	FMT_8_B = 9,
+	FMT_8_8 = 10,
+	FMT_Cr_Y1_Cb_Y0 = 11,
+	FMT_Y1_Cr_Y0_Cb = 12,
+	FMT_5_5_5_1 = 13,
+	FMT_8_8_8_8_A = 14,
+	FMT_4_4_4_4 = 15,
+	FMT_10_11_11 = 16,
+	FMT_11_11_10 = 17,
+	FMT_DXT1 = 18,
+	FMT_DXT2_3 = 19,
+	FMT_DXT4_5 = 20,
+	FMT_24_8 = 22,
+	FMT_24_8_FLOAT = 23,
+	FMT_16 = 24,
+	FMT_16_16 = 25,
+	FMT_16_16_16_16 = 26,
+	FMT_16_EXPAND = 27,
+	FMT_16_16_EXPAND = 28,
+	FMT_16_16_16_16_EXPAND = 29,
+	FMT_16_FLOAT = 30,
+	FMT_16_16_FLOAT = 31,
+	FMT_16_16_16_16_FLOAT = 32,
+	FMT_32 = 33,
+	FMT_32_32 = 34,
+	FMT_32_32_32_32 = 35,
+	FMT_32_FLOAT = 36,
+	FMT_32_32_FLOAT = 37,
+	FMT_32_32_32_32_FLOAT = 38,
+	FMT_32_AS_8 = 39,
+	FMT_32_AS_8_8 = 40,
+	FMT_16_MPEG = 41,
+	FMT_16_16_MPEG = 42,
+	FMT_8_INTERLACED = 43,
+	FMT_32_AS_8_INTERLACED = 44,
+	FMT_32_AS_8_8_INTERLACED = 45,
+	FMT_16_INTERLACED = 46,
+	FMT_16_MPEG_INTERLACED = 47,
+	FMT_16_16_MPEG_INTERLACED = 48,
+	FMT_DXN = 49,
+	FMT_8_8_8_8_AS_16_16_16_16 = 50,
+	FMT_DXT1_AS_16_16_16_16 = 51,
+	FMT_DXT2_3_AS_16_16_16_16 = 52,
+	FMT_DXT4_5_AS_16_16_16_16 = 53,
+	FMT_2_10_10_10_AS_16_16_16_16 = 54,
+	FMT_10_11_11_AS_16_16_16_16 = 55,
+	FMT_11_11_10_AS_16_16_16_16 = 56,
+	FMT_32_32_32_FLOAT = 57,
+	FMT_DXT3A = 58,
+	FMT_DXT5A = 59,
+	FMT_CTX1 = 60,
+	FMT_DXT3A_AS_1_1_1_1 = 61,
+};
+
+enum a2xx_sq_ps_vtx_mode {
+	POSITION_1_VECTOR = 0,
+	POSITION_2_VECTORS_UNUSED = 1,
+	POSITION_2_VECTORS_SPRITE = 2,
+	POSITION_2_VECTORS_EDGE = 3,
+	POSITION_2_VECTORS_KILL = 4,
+	POSITION_2_VECTORS_SPRITE_KILL = 5,
+	POSITION_2_VECTORS_EDGE_KILL = 6,
+	MULTIPASS = 7,
+};
+
+enum a2xx_sq_sample_cntl {
+	CENTROIDS_ONLY = 0,
+	CENTERS_ONLY = 1,
+	CENTROIDS_AND_CENTERS = 2,
+};
+
+enum a2xx_dx_clip_space {
+	DXCLIP_OPENGL = 0,
+	DXCLIP_DIRECTX = 1,
+};
+
+enum a2xx_pa_su_sc_polymode {
+	POLY_DISABLED = 0,
+	POLY_DUALMODE = 1,
+};
+
+enum a2xx_rb_edram_mode {
+	EDRAM_NOP = 0,
+	COLOR_DEPTH = 4,
+	DEPTH_ONLY = 5,
+	EDRAM_COPY = 6,
+};
+
+enum a2xx_pa_sc_pattern_bit_order {
+	LITTLE = 0,
+	BIG = 1,
+};
+
+enum a2xx_pa_sc_auto_reset_cntl {
+	NEVER = 0,
+	EACH_PRIMITIVE = 1,
+	EACH_PACKET = 2,
+};
+
+enum a2xx_pa_pixcenter {
+	PIXCENTER_D3D = 0,
+	PIXCENTER_OGL = 1,
+};
+
+enum a2xx_pa_roundmode {
+	TRUNCATE = 0,
+	ROUND = 1,
+	ROUNDTOEVEN = 2,
+	ROUNDTOODD = 3,
+};
+
+enum a2xx_pa_quantmode {
+	ONE_SIXTEENTH = 0,
+	ONE_EIGTH = 1,
+	ONE_QUARTER = 2,
+	ONE_HALF = 3,
+	ONE = 4,
+};
+
+enum a2xx_rb_copy_sample_select {
+	SAMPLE_0 = 0,
+	SAMPLE_1 = 1,
+	SAMPLE_2 = 2,
+	SAMPLE_3 = 3,
+	SAMPLE_01 = 4,
+	SAMPLE_23 = 5,
+	SAMPLE_0123 = 6,
+};
+
+enum sq_tex_clamp {
+	SQ_TEX_WRAP = 0,
+	SQ_TEX_MIRROR = 1,
+	SQ_TEX_CLAMP_LAST_TEXEL = 2,
+	SQ_TEX_MIRROR_ONCE_LAST_TEXEL = 3,
+	SQ_TEX_CLAMP_HALF_BORDER = 4,
+	SQ_TEX_MIRROR_ONCE_HALF_BORDER = 5,
+	SQ_TEX_CLAMP_BORDER = 6,
+	SQ_TEX_MIRROR_ONCE_BORDER = 7,
+};
+
+enum sq_tex_swiz {
+	SQ_TEX_X = 0,
+	SQ_TEX_Y = 1,
+	SQ_TEX_Z = 2,
+	SQ_TEX_W = 3,
+	SQ_TEX_ZERO = 4,
+	SQ_TEX_ONE = 5,
+};
+
+enum sq_tex_filter {
+	SQ_TEX_FILTER_POINT = 0,
+	SQ_TEX_FILTER_BILINEAR = 1,
+	SQ_TEX_FILTER_BICUBIC = 2,
+};
+
+#define REG_A2XX_RBBM_PATCH_RELEASE				0x00000001
+
+#define REG_A2XX_RBBM_CNTL					0x0000003b
+
+#define REG_A2XX_RBBM_SOFT_RESET				0x0000003c
+
+#define REG_A2XX_CP_PFP_UCODE_ADDR				0x000000c0
+
+#define REG_A2XX_CP_PFP_UCODE_DATA				0x000000c1
+
+#define REG_A2XX_RBBM_PERFCOUNTER1_SELECT			0x00000395
+
+#define REG_A2XX_RBBM_PERFCOUNTER1_LO				0x00000397
+
+#define REG_A2XX_RBBM_PERFCOUNTER1_HI				0x00000398
+
+#define REG_A2XX_RBBM_DEBUG					0x0000039b
+
+#define REG_A2XX_RBBM_PM_OVERRIDE1				0x0000039c
+
+#define REG_A2XX_RBBM_PM_OVERRIDE2				0x0000039d
+
+#define REG_A2XX_RBBM_DEBUG_OUT					0x000003a0
+
+#define REG_A2XX_RBBM_DEBUG_CNTL				0x000003a1
+
+#define REG_A2XX_RBBM_READ_ERROR				0x000003b3
+
+#define REG_A2XX_RBBM_INT_CNTL					0x000003b4
+
+#define REG_A2XX_RBBM_INT_STATUS				0x000003b5
+
+#define REG_A2XX_RBBM_INT_ACK					0x000003b6
+
+#define REG_A2XX_MASTER_INT_SIGNAL				0x000003b7
+
+#define REG_A2XX_RBBM_PERIPHID1					0x000003f9
+
+#define REG_A2XX_RBBM_PERIPHID2					0x000003fa
+
+#define REG_A2XX_CP_PERFMON_CNTL				0x00000444
+
+#define REG_A2XX_CP_PERFCOUNTER_SELECT				0x00000445
+
+#define REG_A2XX_CP_PERFCOUNTER_LO				0x00000446
+
+#define REG_A2XX_CP_PERFCOUNTER_HI				0x00000447
+
+#define REG_A2XX_CP_ST_BASE					0x0000044d
+
+#define REG_A2XX_CP_ST_BUFSZ					0x0000044e
+
+#define REG_A2XX_CP_IB1_BASE					0x00000458
+
+#define REG_A2XX_CP_IB1_BUFSZ					0x00000459
+
+#define REG_A2XX_CP_IB2_BASE					0x0000045a
+
+#define REG_A2XX_CP_IB2_BUFSZ					0x0000045b
+
+#define REG_A2XX_CP_STAT					0x0000047f
+
+#define REG_A2XX_RBBM_STATUS					0x000005d0
+#define A2XX_RBBM_STATUS_CMDFIFO_AVAIL__MASK			0x0000001f
+#define A2XX_RBBM_STATUS_CMDFIFO_AVAIL__SHIFT			0
+static inline uint32_t A2XX_RBBM_STATUS_CMDFIFO_AVAIL(uint32_t val)
+{
+	return ((val) << A2XX_RBBM_STATUS_CMDFIFO_AVAIL__SHIFT) & A2XX_RBBM_STATUS_CMDFIFO_AVAIL__MASK;
+}
+#define A2XX_RBBM_STATUS_TC_BUSY				0x00000020
+#define A2XX_RBBM_STATUS_HIRQ_PENDING				0x00000100
+#define A2XX_RBBM_STATUS_CPRQ_PENDING				0x00000200
+#define A2XX_RBBM_STATUS_CFRQ_PENDING				0x00000400
+#define A2XX_RBBM_STATUS_PFRQ_PENDING				0x00000800
+#define A2XX_RBBM_STATUS_VGT_BUSY_NO_DMA			0x00001000
+#define A2XX_RBBM_STATUS_RBBM_WU_BUSY				0x00004000
+#define A2XX_RBBM_STATUS_CP_NRT_BUSY				0x00010000
+#define A2XX_RBBM_STATUS_MH_BUSY				0x00040000
+#define A2XX_RBBM_STATUS_MH_COHERENCY_BUSY			0x00080000
+#define A2XX_RBBM_STATUS_SX_BUSY				0x00200000
+#define A2XX_RBBM_STATUS_TPC_BUSY				0x00400000
+#define A2XX_RBBM_STATUS_SC_CNTX_BUSY				0x01000000
+#define A2XX_RBBM_STATUS_PA_BUSY				0x02000000
+#define A2XX_RBBM_STATUS_VGT_BUSY				0x04000000
+#define A2XX_RBBM_STATUS_SQ_CNTX17_BUSY				0x08000000
+#define A2XX_RBBM_STATUS_SQ_CNTX0_BUSY				0x10000000
+#define A2XX_RBBM_STATUS_RB_CNTX_BUSY				0x40000000
+#define A2XX_RBBM_STATUS_GUI_ACTIVE				0x80000000
+
+#define REG_A2XX_A220_VSC_BIN_SIZE				0x00000c01
+#define A2XX_A220_VSC_BIN_SIZE_WIDTH__MASK			0x0000001f
+#define A2XX_A220_VSC_BIN_SIZE_WIDTH__SHIFT			0
+static inline uint32_t A2XX_A220_VSC_BIN_SIZE_WIDTH(uint32_t val)
+{
+	return ((val >> 5) << A2XX_A220_VSC_BIN_SIZE_WIDTH__SHIFT) & A2XX_A220_VSC_BIN_SIZE_WIDTH__MASK;
+}
+#define A2XX_A220_VSC_BIN_SIZE_HEIGHT__MASK			0x000003e0
+#define A2XX_A220_VSC_BIN_SIZE_HEIGHT__SHIFT			5
+static inline uint32_t A2XX_A220_VSC_BIN_SIZE_HEIGHT(uint32_t val)
+{
+	return ((val >> 5) << A2XX_A220_VSC_BIN_SIZE_HEIGHT__SHIFT) & A2XX_A220_VSC_BIN_SIZE_HEIGHT__MASK;
+}
+
+static inline uint32_t REG_A2XX_VSC_PIPE(uint32_t i0) { return 0x00000c06 + 0x3*i0; }
+
+static inline uint32_t REG_A2XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c06 + 0x3*i0; }
+
+static inline uint32_t REG_A2XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000c07 + 0x3*i0; }
+
+static inline uint32_t REG_A2XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c08 + 0x3*i0; }
+
+#define REG_A2XX_PC_DEBUG_CNTL					0x00000c38
+
+#define REG_A2XX_PC_DEBUG_DATA					0x00000c39
+
+#define REG_A2XX_PA_SC_VIZ_QUERY_STATUS				0x00000c44
+
+#define REG_A2XX_GRAS_DEBUG_CNTL				0x00000c80
+
+#define REG_A2XX_PA_SU_DEBUG_CNTL				0x00000c80
+
+#define REG_A2XX_GRAS_DEBUG_DATA				0x00000c81
+
+#define REG_A2XX_PA_SU_DEBUG_DATA				0x00000c81
+
+#define REG_A2XX_PA_SU_FACE_DATA				0x00000c86
+
+#define REG_A2XX_SQ_GPR_MANAGEMENT				0x00000d00
+
+#define REG_A2XX_SQ_FLOW_CONTROL				0x00000d01
+
+#define REG_A2XX_SQ_INST_STORE_MANAGMENT			0x00000d02
+
+#define REG_A2XX_SQ_DEBUG_MISC					0x00000d05
+
+#define REG_A2XX_SQ_INT_CNTL					0x00000d34
+
+#define REG_A2XX_SQ_INT_STATUS					0x00000d35
+
+#define REG_A2XX_SQ_INT_ACK					0x00000d36
+
+#define REG_A2XX_SQ_DEBUG_INPUT_FSM				0x00000dae
+
+#define REG_A2XX_SQ_DEBUG_CONST_MGR_FSM				0x00000daf
+
+#define REG_A2XX_SQ_DEBUG_TP_FSM				0x00000db0
+
+#define REG_A2XX_SQ_DEBUG_FSM_ALU_0				0x00000db1
+
+#define REG_A2XX_SQ_DEBUG_FSM_ALU_1				0x00000db2
+
+#define REG_A2XX_SQ_DEBUG_EXP_ALLOC				0x00000db3
+
+#define REG_A2XX_SQ_DEBUG_PTR_BUFF				0x00000db4
+
+#define REG_A2XX_SQ_DEBUG_GPR_VTX				0x00000db5
+
+#define REG_A2XX_SQ_DEBUG_GPR_PIX				0x00000db6
+
+#define REG_A2XX_SQ_DEBUG_TB_STATUS_SEL				0x00000db7
+
+#define REG_A2XX_SQ_DEBUG_VTX_TB_0				0x00000db8
+
+#define REG_A2XX_SQ_DEBUG_VTX_TB_1				0x00000db9
+
+#define REG_A2XX_SQ_DEBUG_VTX_TB_STATUS_REG			0x00000dba
+
+#define REG_A2XX_SQ_DEBUG_VTX_TB_STATE_MEM			0x00000dbb
+
+#define REG_A2XX_SQ_DEBUG_PIX_TB_0				0x00000dbc
+
+#define REG_A2XX_SQ_DEBUG_PIX_TB_STATUS_REG_0			0x00000dbd
+
+#define REG_A2XX_SQ_DEBUG_PIX_TB_STATUS_REG_1			0x00000dbe
+
+#define REG_A2XX_SQ_DEBUG_PIX_TB_STATUS_REG_2			0x00000dbf
+
+#define REG_A2XX_SQ_DEBUG_PIX_TB_STATUS_REG_3			0x00000dc0
+
+#define REG_A2XX_SQ_DEBUG_PIX_TB_STATE_MEM			0x00000dc1
+
+#define REG_A2XX_TC_CNTL_STATUS					0x00000e00
+#define A2XX_TC_CNTL_STATUS_L2_INVALIDATE			0x00000001
+
+#define REG_A2XX_TP0_CHICKEN					0x00000e1e
+
+#define REG_A2XX_RB_BC_CONTROL					0x00000f01
+#define A2XX_RB_BC_CONTROL_ACCUM_LINEAR_MODE_ENABLE		0x00000001
+#define A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT__MASK		0x00000006
+#define A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT__SHIFT		1
+static inline uint32_t A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT(uint32_t val)
+{
+	return ((val) << A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT__SHIFT) & A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT__MASK;
+}
+#define A2XX_RB_BC_CONTROL_DISABLE_EDRAM_CAM			0x00000008
+#define A2XX_RB_BC_CONTROL_DISABLE_EZ_FAST_CONTEXT_SWITCH	0x00000010
+#define A2XX_RB_BC_CONTROL_DISABLE_EZ_NULL_ZCMD_DROP		0x00000020
+#define A2XX_RB_BC_CONTROL_DISABLE_LZ_NULL_ZCMD_DROP		0x00000040
+#define A2XX_RB_BC_CONTROL_ENABLE_AZ_THROTTLE			0x00000080
+#define A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT__MASK		0x00001f00
+#define A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT__SHIFT		8
+static inline uint32_t A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT(uint32_t val)
+{
+	return ((val) << A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT__SHIFT) & A2XX_RB_BC_CONTROL_AZ_THROTTLE_COUNT__MASK;
+}
+#define A2XX_RB_BC_CONTROL_ENABLE_CRC_UPDATE			0x00004000
+#define A2XX_RB_BC_CONTROL_CRC_MODE				0x00008000
+#define A2XX_RB_BC_CONTROL_DISABLE_SAMPLE_COUNTERS		0x00010000
+#define A2XX_RB_BC_CONTROL_DISABLE_ACCUM			0x00020000
+#define A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK__MASK		0x003c0000
+#define A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK__SHIFT		18
+static inline uint32_t A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK(uint32_t val)
+{
+	return ((val) << A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK__SHIFT) & A2XX_RB_BC_CONTROL_ACCUM_ALLOC_MASK__MASK;
+}
+#define A2XX_RB_BC_CONTROL_LINEAR_PERFORMANCE_ENABLE		0x00400000
+#define A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT__MASK		0x07800000
+#define A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT__SHIFT		23
+static inline uint32_t A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT(uint32_t val)
+{
+	return ((val) << A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT__SHIFT) & A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT__MASK;
+}
+#define A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT__MASK	0x18000000
+#define A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT__SHIFT	27
+static inline uint32_t A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT(uint32_t val)
+{
+	return ((val) << A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT__SHIFT) & A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT__MASK;
+}
+#define A2XX_RB_BC_CONTROL_MEM_EXPORT_LINEAR_MODE_ENABLE	0x20000000
+#define A2XX_RB_BC_CONTROL_CRC_SYSTEM				0x40000000
+#define A2XX_RB_BC_CONTROL_RESERVED6				0x80000000
+
+#define REG_A2XX_RB_EDRAM_INFO					0x00000f02
+
+#define REG_A2XX_RB_DEBUG_CNTL					0x00000f26
+
+#define REG_A2XX_RB_DEBUG_DATA					0x00000f27
+
+#define REG_A2XX_RB_SURFACE_INFO				0x00002000
+
+#define REG_A2XX_RB_COLOR_INFO					0x00002001
+#define A2XX_RB_COLOR_INFO_FORMAT__MASK				0x0000000f
+#define A2XX_RB_COLOR_INFO_FORMAT__SHIFT			0
+static inline uint32_t A2XX_RB_COLOR_INFO_FORMAT(enum a2xx_colorformatx val)
+{
+	return ((val) << A2XX_RB_COLOR_INFO_FORMAT__SHIFT) & A2XX_RB_COLOR_INFO_FORMAT__MASK;
+}
+#define A2XX_RB_COLOR_INFO_ROUND_MODE__MASK			0x00000030
+#define A2XX_RB_COLOR_INFO_ROUND_MODE__SHIFT			4
+static inline uint32_t A2XX_RB_COLOR_INFO_ROUND_MODE(uint32_t val)
+{
+	return ((val) << A2XX_RB_COLOR_INFO_ROUND_MODE__SHIFT) & A2XX_RB_COLOR_INFO_ROUND_MODE__MASK;
+}
+#define A2XX_RB_COLOR_INFO_LINEAR				0x00000040
+#define A2XX_RB_COLOR_INFO_ENDIAN__MASK				0x00000180
+#define A2XX_RB_COLOR_INFO_ENDIAN__SHIFT			7
+static inline uint32_t A2XX_RB_COLOR_INFO_ENDIAN(uint32_t val)
+{
+	return ((val) << A2XX_RB_COLOR_INFO_ENDIAN__SHIFT) & A2XX_RB_COLOR_INFO_ENDIAN__MASK;
+}
+#define A2XX_RB_COLOR_INFO_SWAP__MASK				0x00000600
+#define A2XX_RB_COLOR_INFO_SWAP__SHIFT				9
+static inline uint32_t A2XX_RB_COLOR_INFO_SWAP(uint32_t val)
+{
+	return ((val) << A2XX_RB_COLOR_INFO_SWAP__SHIFT) & A2XX_RB_COLOR_INFO_SWAP__MASK;
+}
+#define A2XX_RB_COLOR_INFO_BASE__MASK				0xfffff000
+#define A2XX_RB_COLOR_INFO_BASE__SHIFT				12
+static inline uint32_t A2XX_RB_COLOR_INFO_BASE(uint32_t val)
+{
+	return ((val >> 10) << A2XX_RB_COLOR_INFO_BASE__SHIFT) & A2XX_RB_COLOR_INFO_BASE__MASK;
+}
+
+#define REG_A2XX_RB_DEPTH_INFO					0x00002002
+#define A2XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK			0x00000001
+#define A2XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT			0
+static inline uint32_t A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(enum adreno_rb_depth_format val)
+{
+	return ((val) << A2XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT) & A2XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK;
+}
+#define A2XX_RB_DEPTH_INFO_DEPTH_BASE__MASK			0xfffff000
+#define A2XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT			12
+static inline uint32_t A2XX_RB_DEPTH_INFO_DEPTH_BASE(uint32_t val)
+{
+	return ((val >> 10) << A2XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT) & A2XX_RB_DEPTH_INFO_DEPTH_BASE__MASK;
+}
+
+#define REG_A2XX_A225_RB_COLOR_INFO3				0x00002005
+
+#define REG_A2XX_COHER_DEST_BASE_0				0x00002006
+
+#define REG_A2XX_PA_SC_SCREEN_SCISSOR_TL			0x0000200e
+#define A2XX_PA_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE	0x80000000
+#define A2XX_PA_SC_SCREEN_SCISSOR_TL_X__MASK			0x00007fff
+#define A2XX_PA_SC_SCREEN_SCISSOR_TL_X__SHIFT			0
+static inline uint32_t A2XX_PA_SC_SCREEN_SCISSOR_TL_X(uint32_t val)
+{
+	return ((val) << A2XX_PA_SC_SCREEN_SCISSOR_TL_X__SHIFT) & A2XX_PA_SC_SCREEN_SCISSOR_TL_X__MASK;
+}
+#define A2XX_PA_SC_SCREEN_SCISSOR_TL_Y__MASK			0x7fff0000
+#define A2XX_PA_SC_SCREEN_SCISSOR_TL_Y__SHIFT			16
+static inline uint32_t A2XX_PA_SC_SCREEN_SCISSOR_TL_Y(uint32_t val)
+{
+	return ((val) << A2XX_PA_SC_SCREEN_SCISSOR_TL_Y__SHIFT) & A2XX_PA_SC_SCREEN_SCISSOR_TL_Y__MASK;
+}
+
+#define REG_A2XX_PA_SC_SCREEN_SCISSOR_BR			0x0000200f
+#define A2XX_PA_SC_SCREEN_SCISSOR_BR_WINDOW_OFFSET_DISABLE	0x80000000
+#define A2XX_PA_SC_SCREEN_SCISSOR_BR_X__MASK			0x00007fff
+#define A2XX_PA_SC_SCREEN_SCISSOR_BR_X__SHIFT			0
+static inline uint32_t A2XX_PA_SC_SCREEN_SCISSOR_BR_X(uint32_t val)
+{
+	return ((val) << A2XX_PA_SC_SCREEN_SCISSOR_BR_X__SHIFT) & A2XX_PA_SC_SCREEN_SCISSOR_BR_X__MASK;
+}
+#define A2XX_PA_SC_SCREEN_SCISSOR_BR_Y__MASK			0x7fff0000
+#define A2XX_PA_SC_SCREEN_SCISSOR_BR_Y__SHIFT			16
+static inline uint32_t A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(uint32_t val)
+{
+	return ((val) << A2XX_PA_SC_SCREEN_SCISSOR_BR_Y__SHIFT) & A2XX_PA_SC_SCREEN_SCISSOR_BR_Y__MASK;
+}
+
+#define REG_A2XX_PA_SC_WINDOW_OFFSET				0x00002080
+#define A2XX_PA_SC_WINDOW_OFFSET_X__MASK			0x00007fff
+#define A2XX_PA_SC_WINDOW_OFFSET_X__SHIFT			0
+static inline uint32_t A2XX_PA_SC_WINDOW_OFFSET_X(int32_t val)
+{
+	return ((val) << A2XX_PA_SC_WINDOW_OFFSET_X__SHIFT) & A2XX_PA_SC_WINDOW_OFFSET_X__MASK;
+}
+#define A2XX_PA_SC_WINDOW_OFFSET_Y__MASK			0x7fff0000
+#define A2XX_PA_SC_WINDOW_OFFSET_Y__SHIFT			16
+static inline uint32_t A2XX_PA_SC_WINDOW_OFFSET_Y(int32_t val)
+{
+	return ((val) << A2XX_PA_SC_WINDOW_OFFSET_Y__SHIFT) & A2XX_PA_SC_WINDOW_OFFSET_Y__MASK;
+}
+#define A2XX_PA_SC_WINDOW_OFFSET_DISABLE			0x80000000
+
+#define REG_A2XX_PA_SC_WINDOW_SCISSOR_TL			0x00002081
+#define A2XX_PA_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE	0x80000000
+#define A2XX_PA_SC_WINDOW_SCISSOR_TL_X__MASK			0x00007fff
+#define A2XX_PA_SC_WINDOW_SCISSOR_TL_X__SHIFT			0
+static inline uint32_t A2XX_PA_SC_WINDOW_SCISSOR_TL_X(uint32_t val)
+{
+	return ((val) << A2XX_PA_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A2XX_PA_SC_WINDOW_SCISSOR_TL_X__MASK;
+}
+#define A2XX_PA_SC_WINDOW_SCISSOR_TL_Y__MASK			0x7fff0000
+#define A2XX_PA_SC_WINDOW_SCISSOR_TL_Y__SHIFT			16
+static inline uint32_t A2XX_PA_SC_WINDOW_SCISSOR_TL_Y(uint32_t val)
+{
+	return ((val) << A2XX_PA_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A2XX_PA_SC_WINDOW_SCISSOR_TL_Y__MASK;
+}
+
+#define REG_A2XX_PA_SC_WINDOW_SCISSOR_BR			0x00002082
+#define A2XX_PA_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE	0x80000000
+#define A2XX_PA_SC_WINDOW_SCISSOR_BR_X__MASK			0x00007fff
+#define A2XX_PA_SC_WINDOW_SCISSOR_BR_X__SHIFT			0
+static inline uint32_t A2XX_PA_SC_WINDOW_SCISSOR_BR_X(uint32_t val)
+{
+	return ((val) << A2XX_PA_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A2XX_PA_SC_WINDOW_SCISSOR_BR_X__MASK;
+}
+#define A2XX_PA_SC_WINDOW_SCISSOR_BR_Y__MASK			0x7fff0000
+#define A2XX_PA_SC_WINDOW_SCISSOR_BR_Y__SHIFT			16
+static inline uint32_t A2XX_PA_SC_WINDOW_SCISSOR_BR_Y(uint32_t val)
+{
+	return ((val) << A2XX_PA_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A2XX_PA_SC_WINDOW_SCISSOR_BR_Y__MASK;
+}
+
+#define REG_A2XX_UNKNOWN_2010					0x00002010
+
+#define REG_A2XX_VGT_MAX_VTX_INDX				0x00002100
+
+#define REG_A2XX_VGT_MIN_VTX_INDX				0x00002101
+
+#define REG_A2XX_VGT_INDX_OFFSET				0x00002102
+
+#define REG_A2XX_A225_PC_MULTI_PRIM_IB_RESET_INDX		0x00002103
+
+#define REG_A2XX_RB_COLOR_MASK					0x00002104
+#define A2XX_RB_COLOR_MASK_WRITE_RED				0x00000001
+#define A2XX_RB_COLOR_MASK_WRITE_GREEN				0x00000002
+#define A2XX_RB_COLOR_MASK_WRITE_BLUE				0x00000004
+#define A2XX_RB_COLOR_MASK_WRITE_ALPHA				0x00000008
+
+#define REG_A2XX_RB_BLEND_RED					0x00002105
+
+#define REG_A2XX_RB_BLEND_GREEN					0x00002106
+
+#define REG_A2XX_RB_BLEND_BLUE					0x00002107
+
+#define REG_A2XX_RB_BLEND_ALPHA					0x00002108
+
+#define REG_A2XX_RB_FOG_COLOR					0x00002109
+
+#define REG_A2XX_RB_STENCILREFMASK_BF				0x0000210c
+#define A2XX_RB_STENCILREFMASK_BF_STENCILREF__MASK		0x000000ff
+#define A2XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT		0
+static inline uint32_t A2XX_RB_STENCILREFMASK_BF_STENCILREF(uint32_t val)
+{
+	return ((val) << A2XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT) & A2XX_RB_STENCILREFMASK_BF_STENCILREF__MASK;
+}
+#define A2XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK		0x0000ff00
+#define A2XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT		8
+static inline uint32_t A2XX_RB_STENCILREFMASK_BF_STENCILMASK(uint32_t val)
+{
+	return ((val) << A2XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT) & A2XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK;
+}
+#define A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK	0x00ff0000
+#define A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT	16
+static inline uint32_t A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(uint32_t val)
+{
+	return ((val) << A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT) & A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK;
+}
+
+#define REG_A2XX_RB_STENCILREFMASK				0x0000210d
+#define A2XX_RB_STENCILREFMASK_STENCILREF__MASK			0x000000ff
+#define A2XX_RB_STENCILREFMASK_STENCILREF__SHIFT		0
+static inline uint32_t A2XX_RB_STENCILREFMASK_STENCILREF(uint32_t val)
+{
+	return ((val) << A2XX_RB_STENCILREFMASK_STENCILREF__SHIFT) & A2XX_RB_STENCILREFMASK_STENCILREF__MASK;
+}
+#define A2XX_RB_STENCILREFMASK_STENCILMASK__MASK		0x0000ff00
+#define A2XX_RB_STENCILREFMASK_STENCILMASK__SHIFT		8
+static inline uint32_t A2XX_RB_STENCILREFMASK_STENCILMASK(uint32_t val)
+{
+	return ((val) << A2XX_RB_STENCILREFMASK_STENCILMASK__SHIFT) & A2XX_RB_STENCILREFMASK_STENCILMASK__MASK;
+}
+#define A2XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK		0x00ff0000
+#define A2XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT		16
+static inline uint32_t A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(uint32_t val)
+{
+	return ((val) << A2XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT) & A2XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK;
+}
+
+#define REG_A2XX_RB_ALPHA_REF					0x0000210e
+
+#define REG_A2XX_PA_CL_VPORT_XSCALE				0x0000210f
+#define A2XX_PA_CL_VPORT_XSCALE__MASK				0xffffffff
+#define A2XX_PA_CL_VPORT_XSCALE__SHIFT				0
+static inline uint32_t A2XX_PA_CL_VPORT_XSCALE(float val)
+{
+	return ((fui(val)) << A2XX_PA_CL_VPORT_XSCALE__SHIFT) & A2XX_PA_CL_VPORT_XSCALE__MASK;
+}
+
+#define REG_A2XX_PA_CL_VPORT_XOFFSET				0x00002110
+#define A2XX_PA_CL_VPORT_XOFFSET__MASK				0xffffffff
+#define A2XX_PA_CL_VPORT_XOFFSET__SHIFT				0
+static inline uint32_t A2XX_PA_CL_VPORT_XOFFSET(float val)
+{
+	return ((fui(val)) << A2XX_PA_CL_VPORT_XOFFSET__SHIFT) & A2XX_PA_CL_VPORT_XOFFSET__MASK;
+}
+
+#define REG_A2XX_PA_CL_VPORT_YSCALE				0x00002111
+#define A2XX_PA_CL_VPORT_YSCALE__MASK				0xffffffff
+#define A2XX_PA_CL_VPORT_YSCALE__SHIFT				0
+static inline uint32_t A2XX_PA_CL_VPORT_YSCALE(float val)
+{
+	return ((fui(val)) << A2XX_PA_CL_VPORT_YSCALE__SHIFT) & A2XX_PA_CL_VPORT_YSCALE__MASK;
+}
+
+#define REG_A2XX_PA_CL_VPORT_YOFFSET				0x00002112
+#define A2XX_PA_CL_VPORT_YOFFSET__MASK				0xffffffff
+#define A2XX_PA_CL_VPORT_YOFFSET__SHIFT				0
+static inline uint32_t A2XX_PA_CL_VPORT_YOFFSET(float val)
+{
+	return ((fui(val)) << A2XX_PA_CL_VPORT_YOFFSET__SHIFT) & A2XX_PA_CL_VPORT_YOFFSET__MASK;
+}
+
+#define REG_A2XX_PA_CL_VPORT_ZSCALE				0x00002113
+#define A2XX_PA_CL_VPORT_ZSCALE__MASK				0xffffffff
+#define A2XX_PA_CL_VPORT_ZSCALE__SHIFT				0
+static inline uint32_t A2XX_PA_CL_VPORT_ZSCALE(float val)
+{
+	return ((fui(val)) << A2XX_PA_CL_VPORT_ZSCALE__SHIFT) & A2XX_PA_CL_VPORT_ZSCALE__MASK;
+}
+
+#define REG_A2XX_PA_CL_VPORT_ZOFFSET				0x00002114
+#define A2XX_PA_CL_VPORT_ZOFFSET__MASK				0xffffffff
+#define A2XX_PA_CL_VPORT_ZOFFSET__SHIFT				0
+static inline uint32_t A2XX_PA_CL_VPORT_ZOFFSET(float val)
+{
+	return ((fui(val)) << A2XX_PA_CL_VPORT_ZOFFSET__SHIFT) & A2XX_PA_CL_VPORT_ZOFFSET__MASK;
+}
+
+#define REG_A2XX_SQ_PROGRAM_CNTL				0x00002180
+#define A2XX_SQ_PROGRAM_CNTL_VS_REGS__MASK			0x000000ff
+#define A2XX_SQ_PROGRAM_CNTL_VS_REGS__SHIFT			0
+static inline uint32_t A2XX_SQ_PROGRAM_CNTL_VS_REGS(uint32_t val)
+{
+	return ((val) << A2XX_SQ_PROGRAM_CNTL_VS_REGS__SHIFT) & A2XX_SQ_PROGRAM_CNTL_VS_REGS__MASK;
+}
+#define A2XX_SQ_PROGRAM_CNTL_PS_REGS__MASK			0x0000ff00
+#define A2XX_SQ_PROGRAM_CNTL_PS_REGS__SHIFT			8
+static inline uint32_t A2XX_SQ_PROGRAM_CNTL_PS_REGS(uint32_t val)
+{
+	return ((val) << A2XX_SQ_PROGRAM_CNTL_PS_REGS__SHIFT) & A2XX_SQ_PROGRAM_CNTL_PS_REGS__MASK;
+}
+#define A2XX_SQ_PROGRAM_CNTL_VS_RESOURCE			0x00010000
+#define A2XX_SQ_PROGRAM_CNTL_PS_RESOURCE			0x00020000
+#define A2XX_SQ_PROGRAM_CNTL_PARAM_GEN				0x00040000
+#define A2XX_SQ_PROGRAM_CNTL_GEN_INDEX_PIX			0x00080000
+#define A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT__MASK		0x00f00000
+#define A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT__SHIFT		20
+static inline uint32_t A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(uint32_t val)
+{
+	return ((val) << A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT__SHIFT) & A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_COUNT__MASK;
+}
+#define A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE__MASK		0x07000000
+#define A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE__SHIFT		24
+static inline uint32_t A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE(enum a2xx_sq_ps_vtx_mode val)
+{
+	return ((val) << A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE__SHIFT) & A2XX_SQ_PROGRAM_CNTL_VS_EXPORT_MODE__MASK;
+}
+#define A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE__MASK		0x78000000
+#define A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE__SHIFT		27
+static inline uint32_t A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE(uint32_t val)
+{
+	return ((val) << A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE__SHIFT) & A2XX_SQ_PROGRAM_CNTL_PS_EXPORT_MODE__MASK;
+}
+#define A2XX_SQ_PROGRAM_CNTL_GEN_INDEX_VTX			0x80000000
+
+#define REG_A2XX_SQ_CONTEXT_MISC				0x00002181
+#define A2XX_SQ_CONTEXT_MISC_INST_PRED_OPTIMIZE			0x00000001
+#define A2XX_SQ_CONTEXT_MISC_SC_OUTPUT_SCREEN_XY		0x00000002
+#define A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL__MASK		0x0000000c
+#define A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL__SHIFT		2
+static inline uint32_t A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(enum a2xx_sq_sample_cntl val)
+{
+	return ((val) << A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL__SHIFT) & A2XX_SQ_CONTEXT_MISC_SC_SAMPLE_CNTL__MASK;
+}
+#define A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS__MASK		0x0000ff00
+#define A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS__SHIFT		8
+static inline uint32_t A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS(uint32_t val)
+{
+	return ((val) << A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS__SHIFT) & A2XX_SQ_CONTEXT_MISC_PARAM_GEN_POS__MASK;
+}
+#define A2XX_SQ_CONTEXT_MISC_PERFCOUNTER_REF			0x00010000
+#define A2XX_SQ_CONTEXT_MISC_YEILD_OPTIMIZE			0x00020000
+#define A2XX_SQ_CONTEXT_MISC_TX_CACHE_SEL			0x00040000
+
+#define REG_A2XX_SQ_INTERPOLATOR_CNTL				0x00002182
+
+#define REG_A2XX_SQ_WRAPPING_0					0x00002183
+
+#define REG_A2XX_SQ_WRAPPING_1					0x00002184
+
+#define REG_A2XX_SQ_PS_PROGRAM					0x000021f6
+
+#define REG_A2XX_SQ_VS_PROGRAM					0x000021f7
+
+#define REG_A2XX_RB_DEPTHCONTROL				0x00002200
+#define A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE			0x00000001
+#define A2XX_RB_DEPTHCONTROL_Z_ENABLE				0x00000002
+#define A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE			0x00000004
+#define A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE			0x00000008
+#define A2XX_RB_DEPTHCONTROL_ZFUNC__MASK			0x00000070
+#define A2XX_RB_DEPTHCONTROL_ZFUNC__SHIFT			4
+static inline uint32_t A2XX_RB_DEPTHCONTROL_ZFUNC(enum adreno_compare_func val)
+{
+	return ((val) << A2XX_RB_DEPTHCONTROL_ZFUNC__SHIFT) & A2XX_RB_DEPTHCONTROL_ZFUNC__MASK;
+}
+#define A2XX_RB_DEPTHCONTROL_BACKFACE_ENABLE			0x00000080
+#define A2XX_RB_DEPTHCONTROL_STENCILFUNC__MASK			0x00000700
+#define A2XX_RB_DEPTHCONTROL_STENCILFUNC__SHIFT			8
+static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILFUNC(enum adreno_compare_func val)
+{
+	return ((val) << A2XX_RB_DEPTHCONTROL_STENCILFUNC__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILFUNC__MASK;
+}
+#define A2XX_RB_DEPTHCONTROL_STENCILFAIL__MASK			0x00003800
+#define A2XX_RB_DEPTHCONTROL_STENCILFAIL__SHIFT			11
+static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILFAIL(enum adreno_stencil_op val)
+{
+	return ((val) << A2XX_RB_DEPTHCONTROL_STENCILFAIL__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILFAIL__MASK;
+}
+#define A2XX_RB_DEPTHCONTROL_STENCILZPASS__MASK			0x0001c000
+#define A2XX_RB_DEPTHCONTROL_STENCILZPASS__SHIFT		14
+static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILZPASS(enum adreno_stencil_op val)
+{
+	return ((val) << A2XX_RB_DEPTHCONTROL_STENCILZPASS__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILZPASS__MASK;
+}
+#define A2XX_RB_DEPTHCONTROL_STENCILZFAIL__MASK			0x000e0000
+#define A2XX_RB_DEPTHCONTROL_STENCILZFAIL__SHIFT		17
+static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILZFAIL(enum adreno_stencil_op val)
+{
+	return ((val) << A2XX_RB_DEPTHCONTROL_STENCILZFAIL__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILZFAIL__MASK;
+}
+#define A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF__MASK		0x00700000
+#define A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF__SHIFT		20
+static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF(enum adreno_compare_func val)
+{
+	return ((val) << A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILFUNC_BF__MASK;
+}
+#define A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF__MASK		0x03800000
+#define A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF__SHIFT		23
+static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF(enum adreno_stencil_op val)
+{
+	return ((val) << A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILFAIL_BF__MASK;
+}
+#define A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF__MASK		0x1c000000
+#define A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF__SHIFT		26
+static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF(enum adreno_stencil_op val)
+{
+	return ((val) << A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILZPASS_BF__MASK;
+}
+#define A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF__MASK		0xe0000000
+#define A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF__SHIFT		29
+static inline uint32_t A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF(enum adreno_stencil_op val)
+{
+	return ((val) << A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF__SHIFT) & A2XX_RB_DEPTHCONTROL_STENCILZFAIL_BF__MASK;
+}
+
+#define REG_A2XX_RB_BLEND_CONTROL				0x00002201
+#define A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND__MASK		0x0000001f
+#define A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND__SHIFT		0
+static inline uint32_t A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND(enum adreno_rb_blend_factor val)
+{
+	return ((val) << A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND__SHIFT) & A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND__MASK;
+}
+#define A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN__MASK		0x000000e0
+#define A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN__SHIFT		5
+static inline uint32_t A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN(enum adreno_rb_blend_opcode val)
+{
+	return ((val) << A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN__SHIFT) & A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN__MASK;
+}
+#define A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND__MASK		0x00001f00
+#define A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND__SHIFT		8
+static inline uint32_t A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(enum adreno_rb_blend_factor val)
+{
+	return ((val) << A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND__SHIFT) & A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND__MASK;
+}
+#define A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND__MASK		0x001f0000
+#define A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND__SHIFT		16
+static inline uint32_t A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND(enum adreno_rb_blend_factor val)
+{
+	return ((val) << A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND__SHIFT) & A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND__MASK;
+}
+#define A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN__MASK		0x00e00000
+#define A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN__SHIFT		21
+static inline uint32_t A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN(enum adreno_rb_blend_opcode val)
+{
+	return ((val) << A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN__SHIFT) & A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN__MASK;
+}
+#define A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND__MASK		0x1f000000
+#define A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND__SHIFT		24
+static inline uint32_t A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND(enum adreno_rb_blend_factor val)
+{
+	return ((val) << A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND__SHIFT) & A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND__MASK;
+}
+#define A2XX_RB_BLEND_CONTROL_BLEND_FORCE_ENABLE		0x20000000
+#define A2XX_RB_BLEND_CONTROL_BLEND_FORCE			0x40000000
+
+#define REG_A2XX_RB_COLORCONTROL				0x00002202
+#define A2XX_RB_COLORCONTROL_ALPHA_FUNC__MASK			0x00000007
+#define A2XX_RB_COLORCONTROL_ALPHA_FUNC__SHIFT			0
+static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_FUNC(enum adreno_compare_func val)
+{
+	return ((val) << A2XX_RB_COLORCONTROL_ALPHA_FUNC__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_FUNC__MASK;
+}
+#define A2XX_RB_COLORCONTROL_ALPHA_TEST_ENABLE			0x00000008
+#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_ENABLE		0x00000010
+#define A2XX_RB_COLORCONTROL_BLEND_DISABLE			0x00000020
+#define A2XX_RB_COLORCONTROL_VOB_ENABLE				0x00000040
+#define A2XX_RB_COLORCONTROL_VS_EXPORTS_FOG			0x00000080
+#define A2XX_RB_COLORCONTROL_ROP_CODE__MASK			0x00000f00
+#define A2XX_RB_COLORCONTROL_ROP_CODE__SHIFT			8
+static inline uint32_t A2XX_RB_COLORCONTROL_ROP_CODE(uint32_t val)
+{
+	return ((val) << A2XX_RB_COLORCONTROL_ROP_CODE__SHIFT) & A2XX_RB_COLORCONTROL_ROP_CODE__MASK;
+}
+#define A2XX_RB_COLORCONTROL_DITHER_MODE__MASK			0x00003000
+#define A2XX_RB_COLORCONTROL_DITHER_MODE__SHIFT			12
+static inline uint32_t A2XX_RB_COLORCONTROL_DITHER_MODE(enum adreno_rb_dither_mode val)
+{
+	return ((val) << A2XX_RB_COLORCONTROL_DITHER_MODE__SHIFT) & A2XX_RB_COLORCONTROL_DITHER_MODE__MASK;
+}
+#define A2XX_RB_COLORCONTROL_DITHER_TYPE__MASK			0x0000c000
+#define A2XX_RB_COLORCONTROL_DITHER_TYPE__SHIFT			14
+static inline uint32_t A2XX_RB_COLORCONTROL_DITHER_TYPE(enum a2xx_rb_dither_type val)
+{
+	return ((val) << A2XX_RB_COLORCONTROL_DITHER_TYPE__SHIFT) & A2XX_RB_COLORCONTROL_DITHER_TYPE__MASK;
+}
+#define A2XX_RB_COLORCONTROL_PIXEL_FOG				0x00010000
+#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0__MASK	0x03000000
+#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0__SHIFT	24
+static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0(uint32_t val)
+{
+	return ((val) << A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0__MASK;
+}
+#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1__MASK	0x0c000000
+#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1__SHIFT	26
+static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1(uint32_t val)
+{
+	return ((val) << A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1__MASK;
+}
+#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2__MASK	0x30000000
+#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2__SHIFT	28
+static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2(uint32_t val)
+{
+	return ((val) << A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2__MASK;
+}
+#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3__MASK	0xc0000000
+#define A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3__SHIFT	30
+static inline uint32_t A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3(uint32_t val)
+{
+	return ((val) << A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3__SHIFT) & A2XX_RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3__MASK;
+}
+
+#define REG_A2XX_VGT_CURRENT_BIN_ID_MAX				0x00002203
+#define A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN__MASK		0x00000007
+#define A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN__SHIFT		0
+static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN(uint32_t val)
+{
+	return ((val) << A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MAX_COLUMN__MASK;
+}
+#define A2XX_VGT_CURRENT_BIN_ID_MAX_ROW__MASK			0x00000038
+#define A2XX_VGT_CURRENT_BIN_ID_MAX_ROW__SHIFT			3
+static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MAX_ROW(uint32_t val)
+{
+	return ((val) << A2XX_VGT_CURRENT_BIN_ID_MAX_ROW__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MAX_ROW__MASK;
+}
+#define A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK__MASK	0x000001c0
+#define A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK__SHIFT	6
+static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK(uint32_t val)
+{
+	return ((val) << A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MAX_GUARD_BAND_MASK__MASK;
+}
+
+#define REG_A2XX_PA_CL_CLIP_CNTL				0x00002204
+#define A2XX_PA_CL_CLIP_CNTL_CLIP_DISABLE			0x00010000
+#define A2XX_PA_CL_CLIP_CNTL_BOUNDARY_EDGE_FLAG_ENA		0x00040000
+#define A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF__MASK		0x00080000
+#define A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF__SHIFT		19
+static inline uint32_t A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF(enum a2xx_dx_clip_space val)
+{
+	return ((val) << A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF__SHIFT) & A2XX_PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF__MASK;
+}
+#define A2XX_PA_CL_CLIP_CNTL_DIS_CLIP_ERR_DETECT		0x00100000
+#define A2XX_PA_CL_CLIP_CNTL_VTX_KILL_OR			0x00200000
+#define A2XX_PA_CL_CLIP_CNTL_XY_NAN_RETAIN			0x00400000
+#define A2XX_PA_CL_CLIP_CNTL_Z_NAN_RETAIN			0x00800000
+#define A2XX_PA_CL_CLIP_CNTL_W_NAN_RETAIN			0x01000000
+
+#define REG_A2XX_PA_SU_SC_MODE_CNTL				0x00002205
+#define A2XX_PA_SU_SC_MODE_CNTL_CULL_FRONT			0x00000001
+#define A2XX_PA_SU_SC_MODE_CNTL_CULL_BACK			0x00000002
+#define A2XX_PA_SU_SC_MODE_CNTL_FACE				0x00000004
+#define A2XX_PA_SU_SC_MODE_CNTL_POLYMODE__MASK			0x00000018
+#define A2XX_PA_SU_SC_MODE_CNTL_POLYMODE__SHIFT			3
+static inline uint32_t A2XX_PA_SU_SC_MODE_CNTL_POLYMODE(enum a2xx_pa_su_sc_polymode val)
+{
+	return ((val) << A2XX_PA_SU_SC_MODE_CNTL_POLYMODE__SHIFT) & A2XX_PA_SU_SC_MODE_CNTL_POLYMODE__MASK;
+}
+#define A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE__MASK		0x000000e0
+#define A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE__SHIFT		5
+static inline uint32_t A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+	return ((val) << A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE__SHIFT) & A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE__MASK;
+}
+#define A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE__MASK		0x00000700
+#define A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE__SHIFT		8
+static inline uint32_t A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+	return ((val) << A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE__SHIFT) & A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE__MASK;
+}
+#define A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_FRONT_ENABLE	0x00000800
+#define A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_BACK_ENABLE		0x00001000
+#define A2XX_PA_SU_SC_MODE_CNTL_POLY_OFFSET_PARA_ENABLE		0x00002000
+#define A2XX_PA_SU_SC_MODE_CNTL_MSAA_ENABLE			0x00008000
+#define A2XX_PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE	0x00010000
+#define A2XX_PA_SU_SC_MODE_CNTL_LINE_STIPPLE_ENABLE		0x00040000
+#define A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST		0x00080000
+#define A2XX_PA_SU_SC_MODE_CNTL_PERSP_CORR_DIS			0x00100000
+#define A2XX_PA_SU_SC_MODE_CNTL_MULTI_PRIM_IB_ENA		0x00200000
+#define A2XX_PA_SU_SC_MODE_CNTL_QUAD_ORDER_ENABLE		0x00800000
+#define A2XX_PA_SU_SC_MODE_CNTL_WAIT_RB_IDLE_ALL_TRI		0x02000000
+#define A2XX_PA_SU_SC_MODE_CNTL_WAIT_RB_IDLE_FIRST_TRI_NEW_STATE	0x04000000
+#define A2XX_PA_SU_SC_MODE_CNTL_CLAMPED_FACENESS		0x10000000
+#define A2XX_PA_SU_SC_MODE_CNTL_ZERO_AREA_FACENESS		0x20000000
+#define A2XX_PA_SU_SC_MODE_CNTL_FACE_KILL_ENABLE		0x40000000
+#define A2XX_PA_SU_SC_MODE_CNTL_FACE_WRITE_ENABLE		0x80000000
+
+#define REG_A2XX_PA_CL_VTE_CNTL					0x00002206
+#define A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA			0x00000001
+#define A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA			0x00000002
+#define A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA			0x00000004
+#define A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA			0x00000008
+#define A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA			0x00000010
+#define A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA			0x00000020
+#define A2XX_PA_CL_VTE_CNTL_VTX_XY_FMT				0x00000100
+#define A2XX_PA_CL_VTE_CNTL_VTX_Z_FMT				0x00000200
+#define A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT				0x00000400
+#define A2XX_PA_CL_VTE_CNTL_PERFCOUNTER_REF			0x00000800
+
+#define REG_A2XX_VGT_CURRENT_BIN_ID_MIN				0x00002207
+#define A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN__MASK		0x00000007
+#define A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN__SHIFT		0
+static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN(uint32_t val)
+{
+	return ((val) << A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MIN_COLUMN__MASK;
+}
+#define A2XX_VGT_CURRENT_BIN_ID_MIN_ROW__MASK			0x00000038
+#define A2XX_VGT_CURRENT_BIN_ID_MIN_ROW__SHIFT			3
+static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MIN_ROW(uint32_t val)
+{
+	return ((val) << A2XX_VGT_CURRENT_BIN_ID_MIN_ROW__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MIN_ROW__MASK;
+}
+#define A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK__MASK	0x000001c0
+#define A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK__SHIFT	6
+static inline uint32_t A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK(uint32_t val)
+{
+	return ((val) << A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK__SHIFT) & A2XX_VGT_CURRENT_BIN_ID_MIN_GUARD_BAND_MASK__MASK;
+}
+
+#define REG_A2XX_RB_MODECONTROL					0x00002208
+#define A2XX_RB_MODECONTROL_EDRAM_MODE__MASK			0x00000007
+#define A2XX_RB_MODECONTROL_EDRAM_MODE__SHIFT			0
+static inline uint32_t A2XX_RB_MODECONTROL_EDRAM_MODE(enum a2xx_rb_edram_mode val)
+{
+	return ((val) << A2XX_RB_MODECONTROL_EDRAM_MODE__SHIFT) & A2XX_RB_MODECONTROL_EDRAM_MODE__MASK;
+}
+
+#define REG_A2XX_A220_RB_LRZ_VSC_CONTROL			0x00002209
+
+#define REG_A2XX_RB_SAMPLE_POS					0x0000220a
+
+#define REG_A2XX_CLEAR_COLOR					0x0000220b
+#define A2XX_CLEAR_COLOR_RED__MASK				0x000000ff
+#define A2XX_CLEAR_COLOR_RED__SHIFT				0
+static inline uint32_t A2XX_CLEAR_COLOR_RED(uint32_t val)
+{
+	return ((val) << A2XX_CLEAR_COLOR_RED__SHIFT) & A2XX_CLEAR_COLOR_RED__MASK;
+}
+#define A2XX_CLEAR_COLOR_GREEN__MASK				0x0000ff00
+#define A2XX_CLEAR_COLOR_GREEN__SHIFT				8
+static inline uint32_t A2XX_CLEAR_COLOR_GREEN(uint32_t val)
+{
+	return ((val) << A2XX_CLEAR_COLOR_GREEN__SHIFT) & A2XX_CLEAR_COLOR_GREEN__MASK;
+}
+#define A2XX_CLEAR_COLOR_BLUE__MASK				0x00ff0000
+#define A2XX_CLEAR_COLOR_BLUE__SHIFT				16
+static inline uint32_t A2XX_CLEAR_COLOR_BLUE(uint32_t val)
+{
+	return ((val) << A2XX_CLEAR_COLOR_BLUE__SHIFT) & A2XX_CLEAR_COLOR_BLUE__MASK;
+}
+#define A2XX_CLEAR_COLOR_ALPHA__MASK				0xff000000
+#define A2XX_CLEAR_COLOR_ALPHA__SHIFT				24
+static inline uint32_t A2XX_CLEAR_COLOR_ALPHA(uint32_t val)
+{
+	return ((val) << A2XX_CLEAR_COLOR_ALPHA__SHIFT) & A2XX_CLEAR_COLOR_ALPHA__MASK;
+}
+
+#define REG_A2XX_A220_GRAS_CONTROL				0x00002210
+
+#define REG_A2XX_PA_SU_POINT_SIZE				0x00002280
+#define A2XX_PA_SU_POINT_SIZE_HEIGHT__MASK			0x0000ffff
+#define A2XX_PA_SU_POINT_SIZE_HEIGHT__SHIFT			0
+static inline uint32_t A2XX_PA_SU_POINT_SIZE_HEIGHT(float val)
+{
+	return ((((uint32_t)(val * 8.0))) << A2XX_PA_SU_POINT_SIZE_HEIGHT__SHIFT) & A2XX_PA_SU_POINT_SIZE_HEIGHT__MASK;
+}
+#define A2XX_PA_SU_POINT_SIZE_WIDTH__MASK			0xffff0000
+#define A2XX_PA_SU_POINT_SIZE_WIDTH__SHIFT			16
+static inline uint32_t A2XX_PA_SU_POINT_SIZE_WIDTH(float val)
+{
+	return ((((uint32_t)(val * 8.0))) << A2XX_PA_SU_POINT_SIZE_WIDTH__SHIFT) & A2XX_PA_SU_POINT_SIZE_WIDTH__MASK;
+}
+
+#define REG_A2XX_PA_SU_POINT_MINMAX				0x00002281
+#define A2XX_PA_SU_POINT_MINMAX_MIN__MASK			0x0000ffff
+#define A2XX_PA_SU_POINT_MINMAX_MIN__SHIFT			0
+static inline uint32_t A2XX_PA_SU_POINT_MINMAX_MIN(float val)
+{
+	return ((((uint32_t)(val * 8.0))) << A2XX_PA_SU_POINT_MINMAX_MIN__SHIFT) & A2XX_PA_SU_POINT_MINMAX_MIN__MASK;
+}
+#define A2XX_PA_SU_POINT_MINMAX_MAX__MASK			0xffff0000
+#define A2XX_PA_SU_POINT_MINMAX_MAX__SHIFT			16
+static inline uint32_t A2XX_PA_SU_POINT_MINMAX_MAX(float val)
+{
+	return ((((uint32_t)(val * 8.0))) << A2XX_PA_SU_POINT_MINMAX_MAX__SHIFT) & A2XX_PA_SU_POINT_MINMAX_MAX__MASK;
+}
+
+#define REG_A2XX_PA_SU_LINE_CNTL				0x00002282
+#define A2XX_PA_SU_LINE_CNTL_WIDTH__MASK			0x0000ffff
+#define A2XX_PA_SU_LINE_CNTL_WIDTH__SHIFT			0
+static inline uint32_t A2XX_PA_SU_LINE_CNTL_WIDTH(float val)
+{
+	return ((((uint32_t)(val * 8.0))) << A2XX_PA_SU_LINE_CNTL_WIDTH__SHIFT) & A2XX_PA_SU_LINE_CNTL_WIDTH__MASK;
+}
+
+#define REG_A2XX_PA_SC_LINE_STIPPLE				0x00002283
+#define A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN__MASK		0x0000ffff
+#define A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN__SHIFT		0
+static inline uint32_t A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN(uint32_t val)
+{
+	return ((val) << A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN__SHIFT) & A2XX_PA_SC_LINE_STIPPLE_LINE_PATTERN__MASK;
+}
+#define A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT__MASK		0x00ff0000
+#define A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT__SHIFT		16
+static inline uint32_t A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT(uint32_t val)
+{
+	return ((val) << A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT__SHIFT) & A2XX_PA_SC_LINE_STIPPLE_REPEAT_COUNT__MASK;
+}
+#define A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER__MASK		0x10000000
+#define A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER__SHIFT	28
+static inline uint32_t A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER(enum a2xx_pa_sc_pattern_bit_order val)
+{
+	return ((val) << A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER__SHIFT) & A2XX_PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER__MASK;
+}
+#define A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL__MASK		0x60000000
+#define A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL__SHIFT		29
+static inline uint32_t A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL(enum a2xx_pa_sc_auto_reset_cntl val)
+{
+	return ((val) << A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL__SHIFT) & A2XX_PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL__MASK;
+}
+
+#define REG_A2XX_PA_SC_VIZ_QUERY				0x00002293
+
+#define REG_A2XX_VGT_ENHANCE					0x00002294
+
+#define REG_A2XX_PA_SC_LINE_CNTL				0x00002300
+#define A2XX_PA_SC_LINE_CNTL_BRES_CNTL__MASK			0x0000ffff
+#define A2XX_PA_SC_LINE_CNTL_BRES_CNTL__SHIFT			0
+static inline uint32_t A2XX_PA_SC_LINE_CNTL_BRES_CNTL(uint32_t val)
+{
+	return ((val) << A2XX_PA_SC_LINE_CNTL_BRES_CNTL__SHIFT) & A2XX_PA_SC_LINE_CNTL_BRES_CNTL__MASK;
+}
+#define A2XX_PA_SC_LINE_CNTL_USE_BRES_CNTL			0x00000100
+#define A2XX_PA_SC_LINE_CNTL_EXPAND_LINE_WIDTH			0x00000200
+#define A2XX_PA_SC_LINE_CNTL_LAST_PIXEL				0x00000400
+
+#define REG_A2XX_PA_SC_AA_CONFIG				0x00002301
+
+#define REG_A2XX_PA_SU_VTX_CNTL					0x00002302
+#define A2XX_PA_SU_VTX_CNTL_PIX_CENTER__MASK			0x00000001
+#define A2XX_PA_SU_VTX_CNTL_PIX_CENTER__SHIFT			0
+static inline uint32_t A2XX_PA_SU_VTX_CNTL_PIX_CENTER(enum a2xx_pa_pixcenter val)
+{
+	return ((val) << A2XX_PA_SU_VTX_CNTL_PIX_CENTER__SHIFT) & A2XX_PA_SU_VTX_CNTL_PIX_CENTER__MASK;
+}
+#define A2XX_PA_SU_VTX_CNTL_ROUND_MODE__MASK			0x00000006
+#define A2XX_PA_SU_VTX_CNTL_ROUND_MODE__SHIFT			1
+static inline uint32_t A2XX_PA_SU_VTX_CNTL_ROUND_MODE(enum a2xx_pa_roundmode val)
+{
+	return ((val) << A2XX_PA_SU_VTX_CNTL_ROUND_MODE__SHIFT) & A2XX_PA_SU_VTX_CNTL_ROUND_MODE__MASK;
+}
+#define A2XX_PA_SU_VTX_CNTL_QUANT_MODE__MASK			0x00000380
+#define A2XX_PA_SU_VTX_CNTL_QUANT_MODE__SHIFT			7
+static inline uint32_t A2XX_PA_SU_VTX_CNTL_QUANT_MODE(enum a2xx_pa_quantmode val)
+{
+	return ((val) << A2XX_PA_SU_VTX_CNTL_QUANT_MODE__SHIFT) & A2XX_PA_SU_VTX_CNTL_QUANT_MODE__MASK;
+}
+
+#define REG_A2XX_PA_CL_GB_VERT_CLIP_ADJ				0x00002303
+#define A2XX_PA_CL_GB_VERT_CLIP_ADJ__MASK			0xffffffff
+#define A2XX_PA_CL_GB_VERT_CLIP_ADJ__SHIFT			0
+static inline uint32_t A2XX_PA_CL_GB_VERT_CLIP_ADJ(float val)
+{
+	return ((fui(val)) << A2XX_PA_CL_GB_VERT_CLIP_ADJ__SHIFT) & A2XX_PA_CL_GB_VERT_CLIP_ADJ__MASK;
+}
+
+#define REG_A2XX_PA_CL_GB_VERT_DISC_ADJ				0x00002304
+#define A2XX_PA_CL_GB_VERT_DISC_ADJ__MASK			0xffffffff
+#define A2XX_PA_CL_GB_VERT_DISC_ADJ__SHIFT			0
+static inline uint32_t A2XX_PA_CL_GB_VERT_DISC_ADJ(float val)
+{
+	return ((fui(val)) << A2XX_PA_CL_GB_VERT_DISC_ADJ__SHIFT) & A2XX_PA_CL_GB_VERT_DISC_ADJ__MASK;
+}
+
+#define REG_A2XX_PA_CL_GB_HORZ_CLIP_ADJ				0x00002305
+#define A2XX_PA_CL_GB_HORZ_CLIP_ADJ__MASK			0xffffffff
+#define A2XX_PA_CL_GB_HORZ_CLIP_ADJ__SHIFT			0
+static inline uint32_t A2XX_PA_CL_GB_HORZ_CLIP_ADJ(float val)
+{
+	return ((fui(val)) << A2XX_PA_CL_GB_HORZ_CLIP_ADJ__SHIFT) & A2XX_PA_CL_GB_HORZ_CLIP_ADJ__MASK;
+}
+
+#define REG_A2XX_PA_CL_GB_HORZ_DISC_ADJ				0x00002306
+#define A2XX_PA_CL_GB_HORZ_DISC_ADJ__MASK			0xffffffff
+#define A2XX_PA_CL_GB_HORZ_DISC_ADJ__SHIFT			0
+static inline uint32_t A2XX_PA_CL_GB_HORZ_DISC_ADJ(float val)
+{
+	return ((fui(val)) << A2XX_PA_CL_GB_HORZ_DISC_ADJ__SHIFT) & A2XX_PA_CL_GB_HORZ_DISC_ADJ__MASK;
+}
+
+#define REG_A2XX_SQ_VS_CONST					0x00002307
+#define A2XX_SQ_VS_CONST_BASE__MASK				0x000001ff
+#define A2XX_SQ_VS_CONST_BASE__SHIFT				0
+static inline uint32_t A2XX_SQ_VS_CONST_BASE(uint32_t val)
+{
+	return ((val) << A2XX_SQ_VS_CONST_BASE__SHIFT) & A2XX_SQ_VS_CONST_BASE__MASK;
+}
+#define A2XX_SQ_VS_CONST_SIZE__MASK				0x001ff000
+#define A2XX_SQ_VS_CONST_SIZE__SHIFT				12
+static inline uint32_t A2XX_SQ_VS_CONST_SIZE(uint32_t val)
+{
+	return ((val) << A2XX_SQ_VS_CONST_SIZE__SHIFT) & A2XX_SQ_VS_CONST_SIZE__MASK;
+}
+
+#define REG_A2XX_SQ_PS_CONST					0x00002308
+#define A2XX_SQ_PS_CONST_BASE__MASK				0x000001ff
+#define A2XX_SQ_PS_CONST_BASE__SHIFT				0
+static inline uint32_t A2XX_SQ_PS_CONST_BASE(uint32_t val)
+{
+	return ((val) << A2XX_SQ_PS_CONST_BASE__SHIFT) & A2XX_SQ_PS_CONST_BASE__MASK;
+}
+#define A2XX_SQ_PS_CONST_SIZE__MASK				0x001ff000
+#define A2XX_SQ_PS_CONST_SIZE__SHIFT				12
+static inline uint32_t A2XX_SQ_PS_CONST_SIZE(uint32_t val)
+{
+	return ((val) << A2XX_SQ_PS_CONST_SIZE__SHIFT) & A2XX_SQ_PS_CONST_SIZE__MASK;
+}
+
+#define REG_A2XX_SQ_DEBUG_MISC_0				0x00002309
+
+#define REG_A2XX_SQ_DEBUG_MISC_1				0x0000230a
+
+#define REG_A2XX_PA_SC_AA_MASK					0x00002312
+
+#define REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL			0x00002316
+
+#define REG_A2XX_VGT_OUT_DEALLOC_CNTL				0x00002317
+
+#define REG_A2XX_RB_COPY_CONTROL				0x00002318
+#define A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT__MASK		0x00000007
+#define A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT__SHIFT		0
+static inline uint32_t A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT(enum a2xx_rb_copy_sample_select val)
+{
+	return ((val) << A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT__SHIFT) & A2XX_RB_COPY_CONTROL_COPY_SAMPLE_SELECT__MASK;
+}
+#define A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE			0x00000008
+#define A2XX_RB_COPY_CONTROL_CLEAR_MASK__MASK			0x000000f0
+#define A2XX_RB_COPY_CONTROL_CLEAR_MASK__SHIFT			4
+static inline uint32_t A2XX_RB_COPY_CONTROL_CLEAR_MASK(uint32_t val)
+{
+	return ((val) << A2XX_RB_COPY_CONTROL_CLEAR_MASK__SHIFT) & A2XX_RB_COPY_CONTROL_CLEAR_MASK__MASK;
+}
+
+#define REG_A2XX_RB_COPY_DEST_BASE				0x00002319
+
+#define REG_A2XX_RB_COPY_DEST_PITCH				0x0000231a
+#define A2XX_RB_COPY_DEST_PITCH__MASK				0xffffffff
+#define A2XX_RB_COPY_DEST_PITCH__SHIFT				0
+static inline uint32_t A2XX_RB_COPY_DEST_PITCH(uint32_t val)
+{
+	return ((val >> 5) << A2XX_RB_COPY_DEST_PITCH__SHIFT) & A2XX_RB_COPY_DEST_PITCH__MASK;
+}
+
+#define REG_A2XX_RB_COPY_DEST_INFO				0x0000231b
+#define A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN__MASK		0x00000007
+#define A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN__SHIFT		0
+static inline uint32_t A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN(enum adreno_rb_surface_endian val)
+{
+	return ((val) << A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN__SHIFT) & A2XX_RB_COPY_DEST_INFO_DEST_ENDIAN__MASK;
+}
+#define A2XX_RB_COPY_DEST_INFO_LINEAR				0x00000008
+#define A2XX_RB_COPY_DEST_INFO_FORMAT__MASK			0x000000f0
+#define A2XX_RB_COPY_DEST_INFO_FORMAT__SHIFT			4
+static inline uint32_t A2XX_RB_COPY_DEST_INFO_FORMAT(enum a2xx_colorformatx val)
+{
+	return ((val) << A2XX_RB_COPY_DEST_INFO_FORMAT__SHIFT) & A2XX_RB_COPY_DEST_INFO_FORMAT__MASK;
+}
+#define A2XX_RB_COPY_DEST_INFO_SWAP__MASK			0x00000300
+#define A2XX_RB_COPY_DEST_INFO_SWAP__SHIFT			8
+static inline uint32_t A2XX_RB_COPY_DEST_INFO_SWAP(uint32_t val)
+{
+	return ((val) << A2XX_RB_COPY_DEST_INFO_SWAP__SHIFT) & A2XX_RB_COPY_DEST_INFO_SWAP__MASK;
+}
+#define A2XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK		0x00000c00
+#define A2XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT		10
+static inline uint32_t A2XX_RB_COPY_DEST_INFO_DITHER_MODE(enum adreno_rb_dither_mode val)
+{
+	return ((val) << A2XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT) & A2XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK;
+}
+#define A2XX_RB_COPY_DEST_INFO_DITHER_TYPE__MASK		0x00003000
+#define A2XX_RB_COPY_DEST_INFO_DITHER_TYPE__SHIFT		12
+static inline uint32_t A2XX_RB_COPY_DEST_INFO_DITHER_TYPE(enum a2xx_rb_dither_type val)
+{
+	return ((val) << A2XX_RB_COPY_DEST_INFO_DITHER_TYPE__SHIFT) & A2XX_RB_COPY_DEST_INFO_DITHER_TYPE__MASK;
+}
+#define A2XX_RB_COPY_DEST_INFO_WRITE_RED			0x00004000
+#define A2XX_RB_COPY_DEST_INFO_WRITE_GREEN			0x00008000
+#define A2XX_RB_COPY_DEST_INFO_WRITE_BLUE			0x00010000
+#define A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA			0x00020000
+
+#define REG_A2XX_RB_COPY_DEST_OFFSET				0x0000231c
+#define A2XX_RB_COPY_DEST_OFFSET_X__MASK			0x00001fff
+#define A2XX_RB_COPY_DEST_OFFSET_X__SHIFT			0
+static inline uint32_t A2XX_RB_COPY_DEST_OFFSET_X(uint32_t val)
+{
+	return ((val) << A2XX_RB_COPY_DEST_OFFSET_X__SHIFT) & A2XX_RB_COPY_DEST_OFFSET_X__MASK;
+}
+#define A2XX_RB_COPY_DEST_OFFSET_Y__MASK			0x03ffe000
+#define A2XX_RB_COPY_DEST_OFFSET_Y__SHIFT			13
+static inline uint32_t A2XX_RB_COPY_DEST_OFFSET_Y(uint32_t val)
+{
+	return ((val) << A2XX_RB_COPY_DEST_OFFSET_Y__SHIFT) & A2XX_RB_COPY_DEST_OFFSET_Y__MASK;
+}
+
+#define REG_A2XX_RB_DEPTH_CLEAR					0x0000231d
+
+#define REG_A2XX_RB_SAMPLE_COUNT_CTL				0x00002324
+
+#define REG_A2XX_RB_COLOR_DEST_MASK				0x00002326
+
+#define REG_A2XX_A225_GRAS_UCP0X				0x00002340
+
+#define REG_A2XX_A225_GRAS_UCP5W				0x00002357
+
+#define REG_A2XX_A225_GRAS_UCP_ENABLED				0x00002360
+
+#define REG_A2XX_PA_SU_POLY_OFFSET_FRONT_SCALE			0x00002380
+
+#define REG_A2XX_PA_SU_POLY_OFFSET_BACK_OFFSET			0x00002383
+
+#define REG_A2XX_SQ_CONSTANT_0					0x00004000
+
+#define REG_A2XX_SQ_FETCH_0					0x00004800
+
+#define REG_A2XX_SQ_CF_BOOLEANS					0x00004900
+
+#define REG_A2XX_SQ_CF_LOOP					0x00004908
+
+#define REG_A2XX_COHER_SIZE_PM4					0x00000a29
+
+#define REG_A2XX_COHER_BASE_PM4					0x00000a2a
+
+#define REG_A2XX_COHER_STATUS_PM4				0x00000a2b
+
+#define REG_A2XX_SQ_TEX_0					0x00000000
+#define A2XX_SQ_TEX_0_CLAMP_X__MASK				0x00001c00
+#define A2XX_SQ_TEX_0_CLAMP_X__SHIFT				10
+static inline uint32_t A2XX_SQ_TEX_0_CLAMP_X(enum sq_tex_clamp val)
+{
+	return ((val) << A2XX_SQ_TEX_0_CLAMP_X__SHIFT) & A2XX_SQ_TEX_0_CLAMP_X__MASK;
+}
+#define A2XX_SQ_TEX_0_CLAMP_Y__MASK				0x0000e000
+#define A2XX_SQ_TEX_0_CLAMP_Y__SHIFT				13
+static inline uint32_t A2XX_SQ_TEX_0_CLAMP_Y(enum sq_tex_clamp val)
+{
+	return ((val) << A2XX_SQ_TEX_0_CLAMP_Y__SHIFT) & A2XX_SQ_TEX_0_CLAMP_Y__MASK;
+}
+#define A2XX_SQ_TEX_0_CLAMP_Z__MASK				0x00070000
+#define A2XX_SQ_TEX_0_CLAMP_Z__SHIFT				16
+static inline uint32_t A2XX_SQ_TEX_0_CLAMP_Z(enum sq_tex_clamp val)
+{
+	return ((val) << A2XX_SQ_TEX_0_CLAMP_Z__SHIFT) & A2XX_SQ_TEX_0_CLAMP_Z__MASK;
+}
+#define A2XX_SQ_TEX_0_PITCH__MASK				0xffc00000
+#define A2XX_SQ_TEX_0_PITCH__SHIFT				22
+static inline uint32_t A2XX_SQ_TEX_0_PITCH(uint32_t val)
+{
+	return ((val >> 5) << A2XX_SQ_TEX_0_PITCH__SHIFT) & A2XX_SQ_TEX_0_PITCH__MASK;
+}
+
+#define REG_A2XX_SQ_TEX_1					0x00000001
+
+#define REG_A2XX_SQ_TEX_2					0x00000002
+#define A2XX_SQ_TEX_2_WIDTH__MASK				0x00001fff
+#define A2XX_SQ_TEX_2_WIDTH__SHIFT				0
+static inline uint32_t A2XX_SQ_TEX_2_WIDTH(uint32_t val)
+{
+	return ((val) << A2XX_SQ_TEX_2_WIDTH__SHIFT) & A2XX_SQ_TEX_2_WIDTH__MASK;
+}
+#define A2XX_SQ_TEX_2_HEIGHT__MASK				0x03ffe000
+#define A2XX_SQ_TEX_2_HEIGHT__SHIFT				13
+static inline uint32_t A2XX_SQ_TEX_2_HEIGHT(uint32_t val)
+{
+	return ((val) << A2XX_SQ_TEX_2_HEIGHT__SHIFT) & A2XX_SQ_TEX_2_HEIGHT__MASK;
+}
+
+#define REG_A2XX_SQ_TEX_3					0x00000003
+#define A2XX_SQ_TEX_3_SWIZ_X__MASK				0x0000000e
+#define A2XX_SQ_TEX_3_SWIZ_X__SHIFT				1
+static inline uint32_t A2XX_SQ_TEX_3_SWIZ_X(enum sq_tex_swiz val)
+{
+	return ((val) << A2XX_SQ_TEX_3_SWIZ_X__SHIFT) & A2XX_SQ_TEX_3_SWIZ_X__MASK;
+}
+#define A2XX_SQ_TEX_3_SWIZ_Y__MASK				0x00000070
+#define A2XX_SQ_TEX_3_SWIZ_Y__SHIFT				4
+static inline uint32_t A2XX_SQ_TEX_3_SWIZ_Y(enum sq_tex_swiz val)
+{
+	return ((val) << A2XX_SQ_TEX_3_SWIZ_Y__SHIFT) & A2XX_SQ_TEX_3_SWIZ_Y__MASK;
+}
+#define A2XX_SQ_TEX_3_SWIZ_Z__MASK				0x00000380
+#define A2XX_SQ_TEX_3_SWIZ_Z__SHIFT				7
+static inline uint32_t A2XX_SQ_TEX_3_SWIZ_Z(enum sq_tex_swiz val)
+{
+	return ((val) << A2XX_SQ_TEX_3_SWIZ_Z__SHIFT) & A2XX_SQ_TEX_3_SWIZ_Z__MASK;
+}
+#define A2XX_SQ_TEX_3_SWIZ_W__MASK				0x00001c00
+#define A2XX_SQ_TEX_3_SWIZ_W__SHIFT				10
+static inline uint32_t A2XX_SQ_TEX_3_SWIZ_W(enum sq_tex_swiz val)
+{
+	return ((val) << A2XX_SQ_TEX_3_SWIZ_W__SHIFT) & A2XX_SQ_TEX_3_SWIZ_W__MASK;
+}
+#define A2XX_SQ_TEX_3_XY_MAG_FILTER__MASK			0x00180000
+#define A2XX_SQ_TEX_3_XY_MAG_FILTER__SHIFT			19
+static inline uint32_t A2XX_SQ_TEX_3_XY_MAG_FILTER(enum sq_tex_filter val)
+{
+	return ((val) << A2XX_SQ_TEX_3_XY_MAG_FILTER__SHIFT) & A2XX_SQ_TEX_3_XY_MAG_FILTER__MASK;
+}
+#define A2XX_SQ_TEX_3_XY_MIN_FILTER__MASK			0x00600000
+#define A2XX_SQ_TEX_3_XY_MIN_FILTER__SHIFT			21
+static inline uint32_t A2XX_SQ_TEX_3_XY_MIN_FILTER(enum sq_tex_filter val)
+{
+	return ((val) << A2XX_SQ_TEX_3_XY_MIN_FILTER__SHIFT) & A2XX_SQ_TEX_3_XY_MIN_FILTER__MASK;
+}
+
+
+#endif /* A2XX_XML */
diff --git a/drivers/gpu/drm/msm/adreno/a3xx.xml.h b/drivers/gpu/drm/msm/adreno/a3xx.xml.h
new file mode 100644
index 0000000..d183516
--- /dev/null
+++ b/drivers/gpu/drm/msm/adreno/a3xx.xml.h
@@ -0,0 +1,2193 @@
+#ifndef A3XX_XML
+#define A3XX_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml              (    327 bytes, from 2013-07-05 19:21:12)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2013-03-31 16:51:27)
+- /home/robclark/src/freedreno/envytools/rnndb/a2xx/a2xx.xml           (  30005 bytes, from 2013-07-19 21:30:48)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml       (   8983 bytes, from 2013-07-24 01:38:36)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml          (   9712 bytes, from 2013-05-26 15:22:37)
+- /home/robclark/src/freedreno/envytools/rnndb/a3xx/a3xx.xml           (  51415 bytes, from 2013-08-03 14:26:05)
+
+Copyright (C) 2013 by the following authors:
+- Rob Clark <robdclark@gmail.com> (robclark)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+enum a3xx_render_mode {
+	RB_RENDERING_PASS = 0,
+	RB_TILING_PASS = 1,
+	RB_RESOLVE_PASS = 2,
+};
+
+enum a3xx_tile_mode {
+	LINEAR = 0,
+	TILE_32X32 = 2,
+};
+
+enum a3xx_threadmode {
+	MULTI = 0,
+	SINGLE = 1,
+};
+
+enum a3xx_instrbuffermode {
+	BUFFER = 1,
+};
+
+enum a3xx_threadsize {
+	TWO_QUADS = 0,
+	FOUR_QUADS = 1,
+};
+
+enum a3xx_state_block_id {
+	HLSQ_BLOCK_ID_TP_TEX = 2,
+	HLSQ_BLOCK_ID_TP_MIPMAP = 3,
+	HLSQ_BLOCK_ID_SP_VS = 4,
+	HLSQ_BLOCK_ID_SP_FS = 6,
+};
+
+enum a3xx_cache_opcode {
+	INVALIDATE = 1,
+};
+
+enum a3xx_vtx_fmt {
+	VFMT_FLOAT_32 = 0,
+	VFMT_FLOAT_32_32 = 1,
+	VFMT_FLOAT_32_32_32 = 2,
+	VFMT_FLOAT_32_32_32_32 = 3,
+	VFMT_FLOAT_16 = 4,
+	VFMT_FLOAT_16_16 = 5,
+	VFMT_FLOAT_16_16_16 = 6,
+	VFMT_FLOAT_16_16_16_16 = 7,
+	VFMT_FIXED_32 = 8,
+	VFMT_FIXED_32_32 = 9,
+	VFMT_FIXED_32_32_32 = 10,
+	VFMT_FIXED_32_32_32_32 = 11,
+	VFMT_SHORT_16 = 16,
+	VFMT_SHORT_16_16 = 17,
+	VFMT_SHORT_16_16_16 = 18,
+	VFMT_SHORT_16_16_16_16 = 19,
+	VFMT_USHORT_16 = 20,
+	VFMT_USHORT_16_16 = 21,
+	VFMT_USHORT_16_16_16 = 22,
+	VFMT_USHORT_16_16_16_16 = 23,
+	VFMT_NORM_SHORT_16 = 24,
+	VFMT_NORM_SHORT_16_16 = 25,
+	VFMT_NORM_SHORT_16_16_16 = 26,
+	VFMT_NORM_SHORT_16_16_16_16 = 27,
+	VFMT_NORM_USHORT_16 = 28,
+	VFMT_NORM_USHORT_16_16 = 29,
+	VFMT_NORM_USHORT_16_16_16 = 30,
+	VFMT_NORM_USHORT_16_16_16_16 = 31,
+	VFMT_UBYTE_8 = 40,
+	VFMT_UBYTE_8_8 = 41,
+	VFMT_UBYTE_8_8_8 = 42,
+	VFMT_UBYTE_8_8_8_8 = 43,
+	VFMT_NORM_UBYTE_8 = 44,
+	VFMT_NORM_UBYTE_8_8 = 45,
+	VFMT_NORM_UBYTE_8_8_8 = 46,
+	VFMT_NORM_UBYTE_8_8_8_8 = 47,
+	VFMT_BYTE_8 = 48,
+	VFMT_BYTE_8_8 = 49,
+	VFMT_BYTE_8_8_8 = 50,
+	VFMT_BYTE_8_8_8_8 = 51,
+	VFMT_NORM_BYTE_8 = 52,
+	VFMT_NORM_BYTE_8_8 = 53,
+	VFMT_NORM_BYTE_8_8_8 = 54,
+	VFMT_NORM_BYTE_8_8_8_8 = 55,
+	VFMT_UINT_10_10_10_2 = 60,
+	VFMT_NORM_UINT_10_10_10_2 = 61,
+	VFMT_INT_10_10_10_2 = 62,
+	VFMT_NORM_INT_10_10_10_2 = 63,
+};
+
+enum a3xx_tex_fmt {
+	TFMT_NORM_USHORT_565 = 4,
+	TFMT_NORM_USHORT_5551 = 6,
+	TFMT_NORM_USHORT_4444 = 7,
+	TFMT_NORM_UINT_X8Z24 = 10,
+	TFMT_NORM_UINT_NV12_UV_TILED = 17,
+	TFMT_NORM_UINT_NV12_Y_TILED = 19,
+	TFMT_NORM_UINT_NV12_UV = 21,
+	TFMT_NORM_UINT_NV12_Y = 23,
+	TFMT_NORM_UINT_I420_Y = 24,
+	TFMT_NORM_UINT_I420_U = 26,
+	TFMT_NORM_UINT_I420_V = 27,
+	TFMT_NORM_UINT_2_10_10_10 = 41,
+	TFMT_NORM_UINT_A8 = 44,
+	TFMT_NORM_UINT_L8_A8 = 47,
+	TFMT_NORM_UINT_8 = 48,
+	TFMT_NORM_UINT_8_8 = 49,
+	TFMT_NORM_UINT_8_8_8 = 50,
+	TFMT_NORM_UINT_8_8_8_8 = 51,
+	TFMT_FLOAT_16 = 64,
+	TFMT_FLOAT_16_16 = 65,
+	TFMT_FLOAT_16_16_16_16 = 67,
+	TFMT_FLOAT_32 = 84,
+	TFMT_FLOAT_32_32 = 85,
+	TFMT_FLOAT_32_32_32_32 = 87,
+};
+
+enum a3xx_tex_fetchsize {
+	TFETCH_DISABLE = 0,
+	TFETCH_1_BYTE = 1,
+	TFETCH_2_BYTE = 2,
+	TFETCH_4_BYTE = 3,
+	TFETCH_8_BYTE = 4,
+	TFETCH_16_BYTE = 5,
+};
+
+enum a3xx_color_fmt {
+	RB_R8G8B8_UNORM = 4,
+	RB_R8G8B8A8_UNORM = 8,
+	RB_Z16_UNORM = 12,
+	RB_A8_UNORM = 20,
+};
+
+enum a3xx_color_swap {
+	WZYX = 0,
+	WXYZ = 1,
+	ZYXW = 2,
+	XYZW = 3,
+};
+
+enum a3xx_msaa_samples {
+	MSAA_ONE = 0,
+	MSAA_TWO = 1,
+	MSAA_FOUR = 2,
+};
+
+enum a3xx_sp_perfcounter_select {
+	SP_FS_CFLOW_INSTRUCTIONS = 12,
+	SP_FS_FULL_ALU_INSTRUCTIONS = 14,
+	SP0_ICL1_MISSES = 26,
+	SP_ALU_ACTIVE_CYCLES = 29,
+};
+
+enum adreno_rb_copy_control_mode {
+	RB_COPY_RESOLVE = 1,
+	RB_COPY_DEPTH_STENCIL = 5,
+};
+
+enum a3xx_tex_filter {
+	A3XX_TEX_NEAREST = 0,
+	A3XX_TEX_LINEAR = 1,
+};
+
+enum a3xx_tex_clamp {
+	A3XX_TEX_REPEAT = 0,
+	A3XX_TEX_CLAMP_TO_EDGE = 1,
+	A3XX_TEX_MIRROR_REPEAT = 2,
+	A3XX_TEX_CLAMP_NONE = 3,
+};
+
+enum a3xx_tex_swiz {
+	A3XX_TEX_X = 0,
+	A3XX_TEX_Y = 1,
+	A3XX_TEX_Z = 2,
+	A3XX_TEX_W = 3,
+	A3XX_TEX_ZERO = 4,
+	A3XX_TEX_ONE = 5,
+};
+
+enum a3xx_tex_type {
+	A3XX_TEX_1D = 0,
+	A3XX_TEX_2D = 1,
+	A3XX_TEX_CUBE = 2,
+	A3XX_TEX_3D = 3,
+};
+
+#define A3XX_INT0_RBBM_GPU_IDLE					0x00000001
+#define A3XX_INT0_RBBM_AHB_ERROR				0x00000002
+#define A3XX_INT0_RBBM_REG_TIMEOUT				0x00000004
+#define A3XX_INT0_RBBM_ME_MS_TIMEOUT				0x00000008
+#define A3XX_INT0_RBBM_PFP_MS_TIMEOUT				0x00000010
+#define A3XX_INT0_RBBM_ATB_BUS_OVERFLOW				0x00000020
+#define A3XX_INT0_VFD_ERROR					0x00000040
+#define A3XX_INT0_CP_SW_INT					0x00000080
+#define A3XX_INT0_CP_T0_PACKET_IN_IB				0x00000100
+#define A3XX_INT0_CP_OPCODE_ERROR				0x00000200
+#define A3XX_INT0_CP_RESERVED_BIT_ERROR				0x00000400
+#define A3XX_INT0_CP_HW_FAULT					0x00000800
+#define A3XX_INT0_CP_DMA					0x00001000
+#define A3XX_INT0_CP_IB2_INT					0x00002000
+#define A3XX_INT0_CP_IB1_INT					0x00004000
+#define A3XX_INT0_CP_RB_INT					0x00008000
+#define A3XX_INT0_CP_REG_PROTECT_FAULT				0x00010000
+#define A3XX_INT0_CP_RB_DONE_TS					0x00020000
+#define A3XX_INT0_CP_VS_DONE_TS					0x00040000
+#define A3XX_INT0_CP_PS_DONE_TS					0x00080000
+#define A3XX_INT0_CACHE_FLUSH_TS				0x00100000
+#define A3XX_INT0_CP_AHB_ERROR_HALT				0x00200000
+#define A3XX_INT0_MISC_HANG_DETECT				0x01000000
+#define A3XX_INT0_UCHE_OOB_ACCESS				0x02000000
+#define REG_A3XX_RBBM_HW_VERSION				0x00000000
+
+#define REG_A3XX_RBBM_HW_RELEASE				0x00000001
+
+#define REG_A3XX_RBBM_HW_CONFIGURATION				0x00000002
+
+#define REG_A3XX_RBBM_CLOCK_CTL					0x00000010
+
+#define REG_A3XX_RBBM_SP_HYST_CNT				0x00000012
+
+#define REG_A3XX_RBBM_SW_RESET_CMD				0x00000018
+
+#define REG_A3XX_RBBM_AHB_CTL0					0x00000020
+
+#define REG_A3XX_RBBM_AHB_CTL1					0x00000021
+
+#define REG_A3XX_RBBM_AHB_CMD					0x00000022
+
+#define REG_A3XX_RBBM_AHB_ERROR_STATUS				0x00000027
+
+#define REG_A3XX_RBBM_GPR0_CTL					0x0000002e
+
+#define REG_A3XX_RBBM_STATUS					0x00000030
+#define A3XX_RBBM_STATUS_HI_BUSY				0x00000001
+#define A3XX_RBBM_STATUS_CP_ME_BUSY				0x00000002
+#define A3XX_RBBM_STATUS_CP_PFP_BUSY				0x00000004
+#define A3XX_RBBM_STATUS_CP_NRT_BUSY				0x00004000
+#define A3XX_RBBM_STATUS_VBIF_BUSY				0x00008000
+#define A3XX_RBBM_STATUS_TSE_BUSY				0x00010000
+#define A3XX_RBBM_STATUS_RAS_BUSY				0x00020000
+#define A3XX_RBBM_STATUS_RB_BUSY				0x00040000
+#define A3XX_RBBM_STATUS_PC_DCALL_BUSY				0x00080000
+#define A3XX_RBBM_STATUS_PC_VSD_BUSY				0x00100000
+#define A3XX_RBBM_STATUS_VFD_BUSY				0x00200000
+#define A3XX_RBBM_STATUS_VPC_BUSY				0x00400000
+#define A3XX_RBBM_STATUS_UCHE_BUSY				0x00800000
+#define A3XX_RBBM_STATUS_SP_BUSY				0x01000000
+#define A3XX_RBBM_STATUS_TPL1_BUSY				0x02000000
+#define A3XX_RBBM_STATUS_MARB_BUSY				0x04000000
+#define A3XX_RBBM_STATUS_VSC_BUSY				0x08000000
+#define A3XX_RBBM_STATUS_ARB_BUSY				0x10000000
+#define A3XX_RBBM_STATUS_HLSQ_BUSY				0x20000000
+#define A3XX_RBBM_STATUS_GPU_BUSY_NOHC				0x40000000
+#define A3XX_RBBM_STATUS_GPU_BUSY				0x80000000
+
+#define REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL			0x00000033
+
+#define REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL			0x00000050
+
+#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL0			0x00000051
+
+#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL1			0x00000054
+
+#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL2			0x00000057
+
+#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL3			0x0000005a
+
+#define REG_A3XX_RBBM_INT_CLEAR_CMD				0x00000061
+
+#define REG_A3XX_RBBM_INT_0_MASK				0x00000063
+
+#define REG_A3XX_RBBM_INT_0_STATUS				0x00000064
+
+#define REG_A3XX_RBBM_PERFCTR_CTL				0x00000080
+
+#define REG_A3XX_RBBM_PERFCTR_LOAD_CMD0				0x00000081
+
+#define REG_A3XX_RBBM_PERFCTR_LOAD_CMD1				0x00000082
+
+#define REG_A3XX_RBBM_PERFCTR_LOAD_VALUE_LO			0x00000084
+
+#define REG_A3XX_RBBM_PERFCTR_LOAD_VALUE_HI			0x00000085
+
+#define REG_A3XX_RBBM_PERFCOUNTER0_SELECT			0x00000086
+
+#define REG_A3XX_RBBM_PERFCOUNTER1_SELECT			0x00000087
+
+#define REG_A3XX_RBBM_GPU_BUSY_MASKED				0x00000088
+
+#define REG_A3XX_RBBM_PERFCTR_CP_0_LO				0x00000090
+
+#define REG_A3XX_RBBM_PERFCTR_CP_0_HI				0x00000091
+
+#define REG_A3XX_RBBM_PERFCTR_RBBM_0_LO				0x00000092
+
+#define REG_A3XX_RBBM_PERFCTR_RBBM_0_HI				0x00000093
+
+#define REG_A3XX_RBBM_PERFCTR_RBBM_1_LO				0x00000094
+
+#define REG_A3XX_RBBM_PERFCTR_RBBM_1_HI				0x00000095
+
+#define REG_A3XX_RBBM_PERFCTR_PC_0_LO				0x00000096
+
+#define REG_A3XX_RBBM_PERFCTR_PC_0_HI				0x00000097
+
+#define REG_A3XX_RBBM_PERFCTR_PC_1_LO				0x00000098
+
+#define REG_A3XX_RBBM_PERFCTR_PC_1_HI				0x00000099
+
+#define REG_A3XX_RBBM_PERFCTR_PC_2_LO				0x0000009a
+
+#define REG_A3XX_RBBM_PERFCTR_PC_2_HI				0x0000009b
+
+#define REG_A3XX_RBBM_PERFCTR_PC_3_LO				0x0000009c
+
+#define REG_A3XX_RBBM_PERFCTR_PC_3_HI				0x0000009d
+
+#define REG_A3XX_RBBM_PERFCTR_VFD_0_LO				0x0000009e
+
+#define REG_A3XX_RBBM_PERFCTR_VFD_0_HI				0x0000009f
+
+#define REG_A3XX_RBBM_PERFCTR_VFD_1_LO				0x000000a0
+
+#define REG_A3XX_RBBM_PERFCTR_VFD_1_HI				0x000000a1
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_0_LO				0x000000a2
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_0_HI				0x000000a3
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_1_LO				0x000000a4
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_1_HI				0x000000a5
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_2_LO				0x000000a6
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_2_HI				0x000000a7
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_3_LO				0x000000a8
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_3_HI				0x000000a9
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_4_LO				0x000000aa
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_4_HI				0x000000ab
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_5_LO				0x000000ac
+
+#define REG_A3XX_RBBM_PERFCTR_HLSQ_5_HI				0x000000ad
+
+#define REG_A3XX_RBBM_PERFCTR_VPC_0_LO				0x000000ae
+
+#define REG_A3XX_RBBM_PERFCTR_VPC_0_HI				0x000000af
+
+#define REG_A3XX_RBBM_PERFCTR_VPC_1_LO				0x000000b0
+
+#define REG_A3XX_RBBM_PERFCTR_VPC_1_HI				0x000000b1
+
+#define REG_A3XX_RBBM_PERFCTR_TSE_0_LO				0x000000b2
+
+#define REG_A3XX_RBBM_PERFCTR_TSE_0_HI				0x000000b3
+
+#define REG_A3XX_RBBM_PERFCTR_TSE_1_LO				0x000000b4
+
+#define REG_A3XX_RBBM_PERFCTR_TSE_1_HI				0x000000b5
+
+#define REG_A3XX_RBBM_PERFCTR_RAS_0_LO				0x000000b6
+
+#define REG_A3XX_RBBM_PERFCTR_RAS_0_HI				0x000000b7
+
+#define REG_A3XX_RBBM_PERFCTR_RAS_1_LO				0x000000b8
+
+#define REG_A3XX_RBBM_PERFCTR_RAS_1_HI				0x000000b9
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_0_LO				0x000000ba
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_0_HI				0x000000bb
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_1_LO				0x000000bc
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_1_HI				0x000000bd
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_2_LO				0x000000be
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_2_HI				0x000000bf
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_3_LO				0x000000c0
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_3_HI				0x000000c1
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_4_LO				0x000000c2
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_4_HI				0x000000c3
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_5_LO				0x000000c4
+
+#define REG_A3XX_RBBM_PERFCTR_UCHE_5_HI				0x000000c5
+
+#define REG_A3XX_RBBM_PERFCTR_TP_0_LO				0x000000c6
+
+#define REG_A3XX_RBBM_PERFCTR_TP_0_HI				0x000000c7
+
+#define REG_A3XX_RBBM_PERFCTR_TP_1_LO				0x000000c8
+
+#define REG_A3XX_RBBM_PERFCTR_TP_1_HI				0x000000c9
+
+#define REG_A3XX_RBBM_PERFCTR_TP_2_LO				0x000000ca
+
+#define REG_A3XX_RBBM_PERFCTR_TP_2_HI				0x000000cb
+
+#define REG_A3XX_RBBM_PERFCTR_TP_3_LO				0x000000cc
+
+#define REG_A3XX_RBBM_PERFCTR_TP_3_HI				0x000000cd
+
+#define REG_A3XX_RBBM_PERFCTR_TP_4_LO				0x000000ce
+
+#define REG_A3XX_RBBM_PERFCTR_TP_4_HI				0x000000cf
+
+#define REG_A3XX_RBBM_PERFCTR_TP_5_LO				0x000000d0
+
+#define REG_A3XX_RBBM_PERFCTR_TP_5_HI				0x000000d1
+
+#define REG_A3XX_RBBM_PERFCTR_SP_0_LO				0x000000d2
+
+#define REG_A3XX_RBBM_PERFCTR_SP_0_HI				0x000000d3
+
+#define REG_A3XX_RBBM_PERFCTR_SP_1_LO				0x000000d4
+
+#define REG_A3XX_RBBM_PERFCTR_SP_1_HI				0x000000d5
+
+#define REG_A3XX_RBBM_PERFCTR_SP_2_LO				0x000000d6
+
+#define REG_A3XX_RBBM_PERFCTR_SP_2_HI				0x000000d7
+
+#define REG_A3XX_RBBM_PERFCTR_SP_3_LO				0x000000d8
+
+#define REG_A3XX_RBBM_PERFCTR_SP_3_HI				0x000000d9
+
+#define REG_A3XX_RBBM_PERFCTR_SP_4_LO				0x000000da
+
+#define REG_A3XX_RBBM_PERFCTR_SP_4_HI				0x000000db
+
+#define REG_A3XX_RBBM_PERFCTR_SP_5_LO				0x000000dc
+
+#define REG_A3XX_RBBM_PERFCTR_SP_5_HI				0x000000dd
+
+#define REG_A3XX_RBBM_PERFCTR_SP_6_LO				0x000000de
+
+#define REG_A3XX_RBBM_PERFCTR_SP_6_HI				0x000000df
+
+#define REG_A3XX_RBBM_PERFCTR_SP_7_LO				0x000000e0
+
+#define REG_A3XX_RBBM_PERFCTR_SP_7_HI				0x000000e1
+
+#define REG_A3XX_RBBM_PERFCTR_RB_0_LO				0x000000e2
+
+#define REG_A3XX_RBBM_PERFCTR_RB_0_HI				0x000000e3
+
+#define REG_A3XX_RBBM_PERFCTR_RB_1_LO				0x000000e4
+
+#define REG_A3XX_RBBM_PERFCTR_RB_1_HI				0x000000e5
+
+#define REG_A3XX_RBBM_PERFCTR_PWR_0_LO				0x000000ea
+
+#define REG_A3XX_RBBM_PERFCTR_PWR_0_HI				0x000000eb
+
+#define REG_A3XX_RBBM_PERFCTR_PWR_1_LO				0x000000ec
+
+#define REG_A3XX_RBBM_PERFCTR_PWR_1_HI				0x000000ed
+
+#define REG_A3XX_RBBM_RBBM_CTL					0x00000100
+
+#define REG_A3XX_RBBM_DEBUG_BUS_CTL				0x00000111
+
+#define REG_A3XX_RBBM_DEBUG_BUS_DATA_STATUS			0x00000112
+
+#define REG_A3XX_CP_PFP_UCODE_ADDR				0x000001c9
+
+#define REG_A3XX_CP_PFP_UCODE_DATA				0x000001ca
+
+#define REG_A3XX_CP_ROQ_ADDR					0x000001cc
+
+#define REG_A3XX_CP_ROQ_DATA					0x000001cd
+
+#define REG_A3XX_CP_MERCIU_ADDR					0x000001d1
+
+#define REG_A3XX_CP_MERCIU_DATA					0x000001d2
+
+#define REG_A3XX_CP_MERCIU_DATA2				0x000001d3
+
+#define REG_A3XX_CP_MEQ_ADDR					0x000001da
+
+#define REG_A3XX_CP_MEQ_DATA					0x000001db
+
+#define REG_A3XX_CP_PERFCOUNTER_SELECT				0x00000445
+
+#define REG_A3XX_CP_HW_FAULT					0x0000045c
+
+#define REG_A3XX_CP_PROTECT_CTRL				0x0000045e
+
+#define REG_A3XX_CP_PROTECT_STATUS				0x0000045f
+
+static inline uint32_t REG_A3XX_CP_PROTECT(uint32_t i0) { return 0x00000460 + 0x1*i0; }
+
+static inline uint32_t REG_A3XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000460 + 0x1*i0; }
+
+#define REG_A3XX_CP_AHB_FAULT					0x0000054d
+
+#define REG_A3XX_GRAS_CL_CLIP_CNTL				0x00002040
+#define A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER			0x00001000
+#define A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE			0x00010000
+#define A3XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE		0x00020000
+#define A3XX_GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE		0x00080000
+#define A3XX_GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE			0x00100000
+#define A3XX_GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE		0x00200000
+
+#define REG_A3XX_GRAS_CL_GB_CLIP_ADJ				0x00002044
+#define A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK			0x000003ff
+#define A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__SHIFT			0
+static inline uint32_t A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(uint32_t val)
+{
+	return ((val) << A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__SHIFT) & A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK;
+}
+#define A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__MASK			0x000ffc00
+#define A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__SHIFT			10
+static inline uint32_t A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(uint32_t val)
+{
+	return ((val) << A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__SHIFT) & A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__MASK;
+}
+
+#define REG_A3XX_GRAS_CL_VPORT_XOFFSET				0x00002048
+#define A3XX_GRAS_CL_VPORT_XOFFSET__MASK			0xffffffff
+#define A3XX_GRAS_CL_VPORT_XOFFSET__SHIFT			0
+static inline uint32_t A3XX_GRAS_CL_VPORT_XOFFSET(float val)
+{
+	return ((fui(val)) << A3XX_GRAS_CL_VPORT_XOFFSET__SHIFT) & A3XX_GRAS_CL_VPORT_XOFFSET__MASK;
+}
+
+#define REG_A3XX_GRAS_CL_VPORT_XSCALE				0x00002049
+#define A3XX_GRAS_CL_VPORT_XSCALE__MASK				0xffffffff
+#define A3XX_GRAS_CL_VPORT_XSCALE__SHIFT			0
+static inline uint32_t A3XX_GRAS_CL_VPORT_XSCALE(float val)
+{
+	return ((fui(val)) << A3XX_GRAS_CL_VPORT_XSCALE__SHIFT) & A3XX_GRAS_CL_VPORT_XSCALE__MASK;
+}
+
+#define REG_A3XX_GRAS_CL_VPORT_YOFFSET				0x0000204a
+#define A3XX_GRAS_CL_VPORT_YOFFSET__MASK			0xffffffff
+#define A3XX_GRAS_CL_VPORT_YOFFSET__SHIFT			0
+static inline uint32_t A3XX_GRAS_CL_VPORT_YOFFSET(float val)
+{
+	return ((fui(val)) << A3XX_GRAS_CL_VPORT_YOFFSET__SHIFT) & A3XX_GRAS_CL_VPORT_YOFFSET__MASK;
+}
+
+#define REG_A3XX_GRAS_CL_VPORT_YSCALE				0x0000204b
+#define A3XX_GRAS_CL_VPORT_YSCALE__MASK				0xffffffff
+#define A3XX_GRAS_CL_VPORT_YSCALE__SHIFT			0
+static inline uint32_t A3XX_GRAS_CL_VPORT_YSCALE(float val)
+{
+	return ((fui(val)) << A3XX_GRAS_CL_VPORT_YSCALE__SHIFT) & A3XX_GRAS_CL_VPORT_YSCALE__MASK;
+}
+
+#define REG_A3XX_GRAS_CL_VPORT_ZOFFSET				0x0000204c
+#define A3XX_GRAS_CL_VPORT_ZOFFSET__MASK			0xffffffff
+#define A3XX_GRAS_CL_VPORT_ZOFFSET__SHIFT			0
+static inline uint32_t A3XX_GRAS_CL_VPORT_ZOFFSET(float val)
+{
+	return ((fui(val)) << A3XX_GRAS_CL_VPORT_ZOFFSET__SHIFT) & A3XX_GRAS_CL_VPORT_ZOFFSET__MASK;
+}
+
+#define REG_A3XX_GRAS_CL_VPORT_ZSCALE				0x0000204d
+#define A3XX_GRAS_CL_VPORT_ZSCALE__MASK				0xffffffff
+#define A3XX_GRAS_CL_VPORT_ZSCALE__SHIFT			0
+static inline uint32_t A3XX_GRAS_CL_VPORT_ZSCALE(float val)
+{
+	return ((fui(val)) << A3XX_GRAS_CL_VPORT_ZSCALE__SHIFT) & A3XX_GRAS_CL_VPORT_ZSCALE__MASK;
+}
+
+#define REG_A3XX_GRAS_SU_POINT_MINMAX				0x00002068
+
+#define REG_A3XX_GRAS_SU_POINT_SIZE				0x00002069
+
+#define REG_A3XX_GRAS_SU_POLY_OFFSET_SCALE			0x0000206c
+#define A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK		0x00ffffff
+#define A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT		0
+static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(float val)
+{
+	return ((((uint32_t)(val * 40.0))) << A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK;
+}
+
+#define REG_A3XX_GRAS_SU_POLY_OFFSET_OFFSET			0x0000206d
+#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK			0xffffffff
+#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT			0
+static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_OFFSET(float val)
+{
+	return ((((uint32_t)(val * 44.0))) << A3XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK;
+}
+
+#define REG_A3XX_GRAS_SU_MODE_CONTROL				0x00002070
+#define A3XX_GRAS_SU_MODE_CONTROL_CULL_FRONT			0x00000001
+#define A3XX_GRAS_SU_MODE_CONTROL_CULL_BACK			0x00000002
+#define A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK		0x000007fc
+#define A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT		2
+static inline uint32_t A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(uint32_t val)
+{
+	return ((val) << A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT) & A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK;
+}
+#define A3XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET			0x00000800
+
+#define REG_A3XX_GRAS_SC_CONTROL				0x00002072
+#define A3XX_GRAS_SC_CONTROL_RENDER_MODE__MASK			0x000000f0
+#define A3XX_GRAS_SC_CONTROL_RENDER_MODE__SHIFT			4
+static inline uint32_t A3XX_GRAS_SC_CONTROL_RENDER_MODE(enum a3xx_render_mode val)
+{
+	return ((val) << A3XX_GRAS_SC_CONTROL_RENDER_MODE__SHIFT) & A3XX_GRAS_SC_CONTROL_RENDER_MODE__MASK;
+}
+#define A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__MASK			0x00000f00
+#define A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__SHIFT		8
+static inline uint32_t A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(enum a3xx_msaa_samples val)
+{
+	return ((val) << A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__SHIFT) & A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__MASK;
+}
+#define A3XX_GRAS_SC_CONTROL_RASTER_MODE__MASK			0x0000f000
+#define A3XX_GRAS_SC_CONTROL_RASTER_MODE__SHIFT			12
+static inline uint32_t A3XX_GRAS_SC_CONTROL_RASTER_MODE(uint32_t val)
+{
+	return ((val) << A3XX_GRAS_SC_CONTROL_RASTER_MODE__SHIFT) & A3XX_GRAS_SC_CONTROL_RASTER_MODE__MASK;
+}
+
+#define REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL			0x00002074
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE	0x80000000
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__MASK			0x00007fff
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__SHIFT			0
+static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(uint32_t val)
+{
+	return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__MASK;
+}
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__MASK			0x7fff0000
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__SHIFT			16
+static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(uint32_t val)
+{
+	return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__MASK;
+}
+
+#define REG_A3XX_GRAS_SC_SCREEN_SCISSOR_BR			0x00002075
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_WINDOW_OFFSET_DISABLE	0x80000000
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__MASK			0x00007fff
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__SHIFT			0
+static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(uint32_t val)
+{
+	return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__MASK;
+}
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__MASK			0x7fff0000
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__SHIFT			16
+static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(uint32_t val)
+{
+	return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__MASK;
+}
+
+#define REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL			0x00002079
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE	0x80000000
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK			0x00007fff
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT			0
+static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(uint32_t val)
+{
+	return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK;
+}
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK			0x7fff0000
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT			16
+static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(uint32_t val)
+{
+	return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK;
+}
+
+#define REG_A3XX_GRAS_SC_WINDOW_SCISSOR_BR			0x0000207a
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE	0x80000000
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK			0x00007fff
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT			0
+static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(uint32_t val)
+{
+	return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK;
+}
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK			0x7fff0000
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT			16
+static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(uint32_t val)
+{
+	return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK;
+}
+
+#define REG_A3XX_RB_MODE_CONTROL				0x000020c0
+#define A3XX_RB_MODE_CONTROL_GMEM_BYPASS			0x00000080
+#define A3XX_RB_MODE_CONTROL_RENDER_MODE__MASK			0x00000700
+#define A3XX_RB_MODE_CONTROL_RENDER_MODE__SHIFT			8
+static inline uint32_t A3XX_RB_MODE_CONTROL_RENDER_MODE(enum a3xx_render_mode val)
+{
+	return ((val) << A3XX_RB_MODE_CONTROL_RENDER_MODE__SHIFT) & A3XX_RB_MODE_CONTROL_RENDER_MODE__MASK;
+}
+#define A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE		0x00008000
+#define A3XX_RB_MODE_CONTROL_PACKER_TIMER_ENABLE		0x00010000
+
+#define REG_A3XX_RB_RENDER_CONTROL				0x000020c1
+#define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK			0x00000ff0
+#define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__SHIFT			4
+static inline uint32_t A3XX_RB_RENDER_CONTROL_BIN_WIDTH(uint32_t val)
+{
+	return ((val >> 5) << A3XX_RB_RENDER_CONTROL_BIN_WIDTH__SHIFT) & A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK;
+}
+#define A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE		0x00001000
+#define A3XX_RB_RENDER_CONTROL_ENABLE_GMEM			0x00002000
+#define A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK		0x07000000
+#define A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__SHIFT		24
+static inline uint32_t A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(enum adreno_compare_func val)
+{
+	return ((val) << A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK;
+}
+
+#define REG_A3XX_RB_MSAA_CONTROL				0x000020c2
+#define A3XX_RB_MSAA_CONTROL_DISABLE				0x00000400
+#define A3XX_RB_MSAA_CONTROL_SAMPLES__MASK			0x0000f000
+#define A3XX_RB_MSAA_CONTROL_SAMPLES__SHIFT			12
+static inline uint32_t A3XX_RB_MSAA_CONTROL_SAMPLES(enum a3xx_msaa_samples val)
+{
+	return ((val) << A3XX_RB_MSAA_CONTROL_SAMPLES__SHIFT) & A3XX_RB_MSAA_CONTROL_SAMPLES__MASK;
+}
+#define A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__MASK			0xffff0000
+#define A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__SHIFT			16
+static inline uint32_t A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(uint32_t val)
+{
+	return ((val) << A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__SHIFT) & A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__MASK;
+}
+
+#define REG_A3XX_UNKNOWN_20C3					0x000020c3
+
+static inline uint32_t REG_A3XX_RB_MRT(uint32_t i0) { return 0x000020c4 + 0x4*i0; }
+
+static inline uint32_t REG_A3XX_RB_MRT_CONTROL(uint32_t i0) { return 0x000020c4 + 0x4*i0; }
+#define A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE			0x00000008
+#define A3XX_RB_MRT_CONTROL_BLEND				0x00000010
+#define A3XX_RB_MRT_CONTROL_BLEND2				0x00000020
+#define A3XX_RB_MRT_CONTROL_ROP_CODE__MASK			0x00000f00
+#define A3XX_RB_MRT_CONTROL_ROP_CODE__SHIFT			8
+static inline uint32_t A3XX_RB_MRT_CONTROL_ROP_CODE(uint32_t val)
+{
+	return ((val) << A3XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A3XX_RB_MRT_CONTROL_ROP_CODE__MASK;
+}
+#define A3XX_RB_MRT_CONTROL_DITHER_MODE__MASK			0x00003000
+#define A3XX_RB_MRT_CONTROL_DITHER_MODE__SHIFT			12
+static inline uint32_t A3XX_RB_MRT_CONTROL_DITHER_MODE(enum adreno_rb_dither_mode val)
+{
+	return ((val) << A3XX_RB_MRT_CONTROL_DITHER_MODE__SHIFT) & A3XX_RB_MRT_CONTROL_DITHER_MODE__MASK;
+}
+#define A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK		0x0f000000
+#define A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT		24
+static inline uint32_t A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val)
+{
+	return ((val) << A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
+}
+
+static inline uint32_t REG_A3XX_RB_MRT_BUF_INFO(uint32_t i0) { return 0x000020c5 + 0x4*i0; }
+#define A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK			0x0000003f
+#define A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT		0
+static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a3xx_color_fmt val)
+{
+	return ((val) << A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK;
+}
+#define A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK		0x000000c0
+#define A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT		6
+static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(enum a3xx_tile_mode val)
+{
+	return ((val) << A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK;
+}
+#define A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK			0x00000c00
+#define A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT			10
+static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val)
+{
+	return ((val) << A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK;
+}
+#define A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK		0xfffe0000
+#define A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT		17
+static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(uint32_t val)
+{
+	return ((val >> 5) << A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK;
+}
+
+static inline uint32_t REG_A3XX_RB_MRT_BUF_BASE(uint32_t i0) { return 0x000020c6 + 0x4*i0; }
+#define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK		0xfffffff0
+#define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT		4
+static inline uint32_t A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(uint32_t val)
+{
+	return ((val >> 5) << A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT) & A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK;
+}
+
+static inline uint32_t REG_A3XX_RB_MRT_BLEND_CONTROL(uint32_t i0) { return 0x000020c7 + 0x4*i0; }
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK		0x0000001f
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT		0
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val)
+{
+	return ((val) << A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK	0x000000e0
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT	5
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(enum adreno_rb_blend_opcode val)
+{
+	return ((val) << A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK		0x00001f00
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT	8
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(enum adreno_rb_blend_factor val)
+{
+	return ((val) << A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK	0x001f0000
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT	16
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(enum adreno_rb_blend_factor val)
+{
+	return ((val) << A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK	0x00e00000
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT	21
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(enum adreno_rb_blend_opcode val)
+{
+	return ((val) << A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK	0x1f000000
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT	24
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_rb_blend_factor val)
+{
+	return ((val) << A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE			0x20000000
+
+#define REG_A3XX_RB_BLEND_RED					0x000020e4
+#define A3XX_RB_BLEND_RED_UINT__MASK				0x000000ff
+#define A3XX_RB_BLEND_RED_UINT__SHIFT				0
+static inline uint32_t A3XX_RB_BLEND_RED_UINT(uint32_t val)
+{
+	return ((val) << A3XX_RB_BLEND_RED_UINT__SHIFT) & A3XX_RB_BLEND_RED_UINT__MASK;
+}
+#define A3XX_RB_BLEND_RED_FLOAT__MASK				0xffff0000
+#define A3XX_RB_BLEND_RED_FLOAT__SHIFT				16
+static inline uint32_t A3XX_RB_BLEND_RED_FLOAT(float val)
+{
+	return ((util_float_to_half(val)) << A3XX_RB_BLEND_RED_FLOAT__SHIFT) & A3XX_RB_BLEND_RED_FLOAT__MASK;
+}
+
+#define REG_A3XX_RB_BLEND_GREEN					0x000020e5
+#define A3XX_RB_BLEND_GREEN_UINT__MASK				0x000000ff
+#define A3XX_RB_BLEND_GREEN_UINT__SHIFT				0
+static inline uint32_t A3XX_RB_BLEND_GREEN_UINT(uint32_t val)
+{
+	return ((val) << A3XX_RB_BLEND_GREEN_UINT__SHIFT) & A3XX_RB_BLEND_GREEN_UINT__MASK;
+}
+#define A3XX_RB_BLEND_GREEN_FLOAT__MASK				0xffff0000
+#define A3XX_RB_BLEND_GREEN_FLOAT__SHIFT			16
+static inline uint32_t A3XX_RB_BLEND_GREEN_FLOAT(float val)
+{
+	return ((util_float_to_half(val)) << A3XX_RB_BLEND_GREEN_FLOAT__SHIFT) & A3XX_RB_BLEND_GREEN_FLOAT__MASK;
+}
+
+#define REG_A3XX_RB_BLEND_BLUE					0x000020e6
+#define A3XX_RB_BLEND_BLUE_UINT__MASK				0x000000ff
+#define A3XX_RB_BLEND_BLUE_UINT__SHIFT				0
+static inline uint32_t A3XX_RB_BLEND_BLUE_UINT(uint32_t val)
+{
+	return ((val) << A3XX_RB_BLEND_BLUE_UINT__SHIFT) & A3XX_RB_BLEND_BLUE_UINT__MASK;
+}
+#define A3XX_RB_BLEND_BLUE_FLOAT__MASK				0xffff0000
+#define A3XX_RB_BLEND_BLUE_FLOAT__SHIFT				16
+static inline uint32_t A3XX_RB_BLEND_BLUE_FLOAT(float val)
+{
+	return ((util_float_to_half(val)) << A3XX_RB_BLEND_BLUE_FLOAT__SHIFT) & A3XX_RB_BLEND_BLUE_FLOAT__MASK;
+}
+
+#define REG_A3XX_RB_BLEND_ALPHA					0x000020e7
+#define A3XX_RB_BLEND_ALPHA_UINT__MASK				0x000000ff
+#define A3XX_RB_BLEND_ALPHA_UINT__SHIFT				0
+static inline uint32_t A3XX_RB_BLEND_ALPHA_UINT(uint32_t val)
+{
+	return ((val) << A3XX_RB_BLEND_ALPHA_UINT__SHIFT) & A3XX_RB_BLEND_ALPHA_UINT__MASK;
+}
+#define A3XX_RB_BLEND_ALPHA_FLOAT__MASK				0xffff0000
+#define A3XX_RB_BLEND_ALPHA_FLOAT__SHIFT			16
+static inline uint32_t A3XX_RB_BLEND_ALPHA_FLOAT(float val)
+{
+	return ((util_float_to_half(val)) << A3XX_RB_BLEND_ALPHA_FLOAT__SHIFT) & A3XX_RB_BLEND_ALPHA_FLOAT__MASK;
+}
+
+#define REG_A3XX_UNKNOWN_20E8					0x000020e8
+
+#define REG_A3XX_UNKNOWN_20E9					0x000020e9
+
+#define REG_A3XX_UNKNOWN_20EA					0x000020ea
+
+#define REG_A3XX_UNKNOWN_20EB					0x000020eb
+
+#define REG_A3XX_RB_COPY_CONTROL				0x000020ec
+#define A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__MASK			0x00000003
+#define A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__SHIFT		0
+static inline uint32_t A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(enum a3xx_msaa_samples val)
+{
+	return ((val) << A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__SHIFT) & A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__MASK;
+}
+#define A3XX_RB_COPY_CONTROL_MODE__MASK				0x00000070
+#define A3XX_RB_COPY_CONTROL_MODE__SHIFT			4
+static inline uint32_t A3XX_RB_COPY_CONTROL_MODE(enum adreno_rb_copy_control_mode val)
+{
+	return ((val) << A3XX_RB_COPY_CONTROL_MODE__SHIFT) & A3XX_RB_COPY_CONTROL_MODE__MASK;
+}
+#define A3XX_RB_COPY_CONTROL_GMEM_BASE__MASK			0xfffffc00
+#define A3XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT			10
+static inline uint32_t A3XX_RB_COPY_CONTROL_GMEM_BASE(uint32_t val)
+{
+	return ((val >> 10) << A3XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT) & A3XX_RB_COPY_CONTROL_GMEM_BASE__MASK;
+}
+
+#define REG_A3XX_RB_COPY_DEST_BASE				0x000020ed
+#define A3XX_RB_COPY_DEST_BASE_BASE__MASK			0xfffffff0
+#define A3XX_RB_COPY_DEST_BASE_BASE__SHIFT			4
+static inline uint32_t A3XX_RB_COPY_DEST_BASE_BASE(uint32_t val)
+{
+	return ((val >> 5) << A3XX_RB_COPY_DEST_BASE_BASE__SHIFT) & A3XX_RB_COPY_DEST_BASE_BASE__MASK;
+}
+
+#define REG_A3XX_RB_COPY_DEST_PITCH				0x000020ee
+#define A3XX_RB_COPY_DEST_PITCH_PITCH__MASK			0xffffffff
+#define A3XX_RB_COPY_DEST_PITCH_PITCH__SHIFT			0
+static inline uint32_t A3XX_RB_COPY_DEST_PITCH_PITCH(uint32_t val)
+{
+	return ((val >> 5) << A3XX_RB_COPY_DEST_PITCH_PITCH__SHIFT) & A3XX_RB_COPY_DEST_PITCH_PITCH__MASK;
+}
+
+#define REG_A3XX_RB_COPY_DEST_INFO				0x000020ef
+#define A3XX_RB_COPY_DEST_INFO_TILE__MASK			0x00000003
+#define A3XX_RB_COPY_DEST_INFO_TILE__SHIFT			0
+static inline uint32_t A3XX_RB_COPY_DEST_INFO_TILE(enum a3xx_tile_mode val)
+{
+	return ((val) << A3XX_RB_COPY_DEST_INFO_TILE__SHIFT) & A3XX_RB_COPY_DEST_INFO_TILE__MASK;
+}
+#define A3XX_RB_COPY_DEST_INFO_FORMAT__MASK			0x000000fc
+#define A3XX_RB_COPY_DEST_INFO_FORMAT__SHIFT			2
+static inline uint32_t A3XX_RB_COPY_DEST_INFO_FORMAT(enum a3xx_color_fmt val)
+{
+	return ((val) << A3XX_RB_COPY_DEST_INFO_FORMAT__SHIFT) & A3XX_RB_COPY_DEST_INFO_FORMAT__MASK;
+}
+#define A3XX_RB_COPY_DEST_INFO_SWAP__MASK			0x00000300
+#define A3XX_RB_COPY_DEST_INFO_SWAP__SHIFT			8
+static inline uint32_t A3XX_RB_COPY_DEST_INFO_SWAP(enum a3xx_color_swap val)
+{
+	return ((val) << A3XX_RB_COPY_DEST_INFO_SWAP__SHIFT) & A3XX_RB_COPY_DEST_INFO_SWAP__MASK;
+}
+#define A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK		0x0003c000
+#define A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT		14
+static inline uint32_t A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(uint32_t val)
+{
+	return ((val) << A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT) & A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK;
+}
+#define A3XX_RB_COPY_DEST_INFO_ENDIAN__MASK			0x001c0000
+#define A3XX_RB_COPY_DEST_INFO_ENDIAN__SHIFT			18
+static inline uint32_t A3XX_RB_COPY_DEST_INFO_ENDIAN(enum adreno_rb_surface_endian val)
+{
+	return ((val) << A3XX_RB_COPY_DEST_INFO_ENDIAN__SHIFT) & A3XX_RB_COPY_DEST_INFO_ENDIAN__MASK;
+}
+
+#define REG_A3XX_RB_DEPTH_CONTROL				0x00002100
+#define A3XX_RB_DEPTH_CONTROL_Z_ENABLE				0x00000002
+#define A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE			0x00000004
+#define A3XX_RB_DEPTH_CONTROL_EARLY_Z_ENABLE			0x00000008
+#define A3XX_RB_DEPTH_CONTROL_ZFUNC__MASK			0x00000070
+#define A3XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT			4
+static inline uint32_t A3XX_RB_DEPTH_CONTROL_ZFUNC(enum adreno_compare_func val)
+{
+	return ((val) << A3XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT) & A3XX_RB_DEPTH_CONTROL_ZFUNC__MASK;
+}
+#define A3XX_RB_DEPTH_CONTROL_BF_ENABLE				0x00000080
+#define A3XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE			0x80000000
+
+#define REG_A3XX_UNKNOWN_2101					0x00002101
+
+#define REG_A3XX_RB_DEPTH_INFO					0x00002102
+#define A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK			0x00000001
+#define A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT			0
+static inline uint32_t A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(enum adreno_rb_depth_format val)
+{
+	return ((val) << A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT) & A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK;
+}
+#define A3XX_RB_DEPTH_INFO_DEPTH_BASE__MASK			0xfffff800
+#define A3XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT			11
+static inline uint32_t A3XX_RB_DEPTH_INFO_DEPTH_BASE(uint32_t val)
+{
+	return ((val >> 10) << A3XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT) & A3XX_RB_DEPTH_INFO_DEPTH_BASE__MASK;
+}
+
+#define REG_A3XX_RB_DEPTH_PITCH					0x00002103
+#define A3XX_RB_DEPTH_PITCH__MASK				0xffffffff
+#define A3XX_RB_DEPTH_PITCH__SHIFT				0
+static inline uint32_t A3XX_RB_DEPTH_PITCH(uint32_t val)
+{
+	return ((val >> 3) << A3XX_RB_DEPTH_PITCH__SHIFT) & A3XX_RB_DEPTH_PITCH__MASK;
+}
+
+#define REG_A3XX_RB_STENCIL_CONTROL				0x00002104
+#define A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE			0x00000001
+#define A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF		0x00000004
+#define A3XX_RB_STENCIL_CONTROL_FUNC__MASK			0x00000700
+#define A3XX_RB_STENCIL_CONTROL_FUNC__SHIFT			8
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_FUNC(enum adreno_compare_func val)
+{
+	return ((val) << A3XX_RB_STENCIL_CONTROL_FUNC__SHIFT) & A3XX_RB_STENCIL_CONTROL_FUNC__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_FAIL__MASK			0x00003800
+#define A3XX_RB_STENCIL_CONTROL_FAIL__SHIFT			11
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_FAIL(enum adreno_stencil_op val)
+{
+	return ((val) << A3XX_RB_STENCIL_CONTROL_FAIL__SHIFT) & A3XX_RB_STENCIL_CONTROL_FAIL__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_ZPASS__MASK			0x0001c000
+#define A3XX_RB_STENCIL_CONTROL_ZPASS__SHIFT			14
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZPASS(enum adreno_stencil_op val)
+{
+	return ((val) << A3XX_RB_STENCIL_CONTROL_ZPASS__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZPASS__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_ZFAIL__MASK			0x000e0000
+#define A3XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT			17
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZFAIL(enum adreno_stencil_op val)
+{
+	return ((val) << A3XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZFAIL__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_FUNC_BF__MASK			0x00700000
+#define A3XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT			20
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_FUNC_BF(enum adreno_compare_func val)
+{
+	return ((val) << A3XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_FUNC_BF__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_FAIL_BF__MASK			0x03800000
+#define A3XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT			23
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_FAIL_BF(enum adreno_stencil_op val)
+{
+	return ((val) << A3XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_FAIL_BF__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK			0x1c000000
+#define A3XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT			26
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZPASS_BF(enum adreno_stencil_op val)
+{
+	return ((val) << A3XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK			0xe0000000
+#define A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT			29
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op val)
+{
+	return ((val) << A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK;
+}
+
+#define REG_A3XX_UNKNOWN_2105					0x00002105
+
+#define REG_A3XX_UNKNOWN_2106					0x00002106
+
+#define REG_A3XX_UNKNOWN_2107					0x00002107
+
+#define REG_A3XX_RB_STENCILREFMASK				0x00002108
+#define A3XX_RB_STENCILREFMASK_STENCILREF__MASK			0x000000ff
+#define A3XX_RB_STENCILREFMASK_STENCILREF__SHIFT		0
+static inline uint32_t A3XX_RB_STENCILREFMASK_STENCILREF(uint32_t val)
+{
+	return ((val) << A3XX_RB_STENCILREFMASK_STENCILREF__SHIFT) & A3XX_RB_STENCILREFMASK_STENCILREF__MASK;
+}
+#define A3XX_RB_STENCILREFMASK_STENCILMASK__MASK		0x0000ff00
+#define A3XX_RB_STENCILREFMASK_STENCILMASK__SHIFT		8
+static inline uint32_t A3XX_RB_STENCILREFMASK_STENCILMASK(uint32_t val)
+{
+	return ((val) << A3XX_RB_STENCILREFMASK_STENCILMASK__SHIFT) & A3XX_RB_STENCILREFMASK_STENCILMASK__MASK;
+}
+#define A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK		0x00ff0000
+#define A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT		16
+static inline uint32_t A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(uint32_t val)
+{
+	return ((val) << A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT) & A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK;
+}
+
+#define REG_A3XX_RB_STENCILREFMASK_BF				0x00002109
+#define A3XX_RB_STENCILREFMASK_BF_STENCILREF__MASK		0x000000ff
+#define A3XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT		0
+static inline uint32_t A3XX_RB_STENCILREFMASK_BF_STENCILREF(uint32_t val)
+{
+	return ((val) << A3XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT) & A3XX_RB_STENCILREFMASK_BF_STENCILREF__MASK;
+}
+#define A3XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK		0x0000ff00
+#define A3XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT		8
+static inline uint32_t A3XX_RB_STENCILREFMASK_BF_STENCILMASK(uint32_t val)
+{
+	return ((val) << A3XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT) & A3XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK;
+}
+#define A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK	0x00ff0000
+#define A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT	16
+static inline uint32_t A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(uint32_t val)
+{
+	return ((val) << A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT) & A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK;
+}
+
+#define REG_A3XX_PA_SC_WINDOW_OFFSET				0x0000210e
+#define A3XX_PA_SC_WINDOW_OFFSET_X__MASK			0x0000ffff
+#define A3XX_PA_SC_WINDOW_OFFSET_X__SHIFT			0
+static inline uint32_t A3XX_PA_SC_WINDOW_OFFSET_X(uint32_t val)
+{
+	return ((val) << A3XX_PA_SC_WINDOW_OFFSET_X__SHIFT) & A3XX_PA_SC_WINDOW_OFFSET_X__MASK;
+}
+#define A3XX_PA_SC_WINDOW_OFFSET_Y__MASK			0xffff0000
+#define A3XX_PA_SC_WINDOW_OFFSET_Y__SHIFT			16
+static inline uint32_t A3XX_PA_SC_WINDOW_OFFSET_Y(uint32_t val)
+{
+	return ((val) << A3XX_PA_SC_WINDOW_OFFSET_Y__SHIFT) & A3XX_PA_SC_WINDOW_OFFSET_Y__MASK;
+}
+
+#define REG_A3XX_PC_VSTREAM_CONTROL				0x000021e4
+
+#define REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL			0x000021ea
+
+#define REG_A3XX_PC_PRIM_VTX_CNTL				0x000021ec
+#define A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__MASK		0x0000001f
+#define A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__SHIFT		0
+static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(uint32_t val)
+{
+	return ((val) << A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__MASK;
+}
+#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__MASK	0x000000e0
+#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__SHIFT	5
+static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+	return ((val) << A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__MASK;
+}
+#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__MASK		0x00000700
+#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__SHIFT	8
+static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+	return ((val) << A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__MASK;
+}
+#define A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST		0x02000000
+
+#define REG_A3XX_PC_RESTART_INDEX				0x000021ed
+
+#define REG_A3XX_HLSQ_CONTROL_0_REG				0x00002200
+#define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK		0x00000010
+#define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT		4
+static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(enum a3xx_threadsize val)
+{
+	return ((val) << A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT) & A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK;
+}
+#define A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE		0x00000040
+#define A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART			0x00000200
+#define A3XX_HLSQ_CONTROL_0_REG_RESERVED2			0x00000400
+#define A3XX_HLSQ_CONTROL_0_REG_CHUNKDISABLE			0x04000000
+#define A3XX_HLSQ_CONTROL_0_REG_CONSTSWITCHMODE			0x08000000
+#define A3XX_HLSQ_CONTROL_0_REG_LAZYUPDATEDISABLE		0x10000000
+#define A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE		0x20000000
+#define A3XX_HLSQ_CONTROL_0_REG_TPFULLUPDATE			0x40000000
+#define A3XX_HLSQ_CONTROL_0_REG_SINGLECONTEXT			0x80000000
+
+#define REG_A3XX_HLSQ_CONTROL_1_REG				0x00002201
+#define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK		0x00000040
+#define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT		6
+static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(enum a3xx_threadsize val)
+{
+	return ((val) << A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK;
+}
+#define A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE		0x00000100
+#define A3XX_HLSQ_CONTROL_1_REG_RESERVED1			0x00000200
+
+#define REG_A3XX_HLSQ_CONTROL_2_REG				0x00002202
+#define A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK	0xfc000000
+#define A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT	26
+static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT) & A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK;
+}
+
+#define REG_A3XX_HLSQ_CONTROL_3_REG				0x00002203
+
+#define REG_A3XX_HLSQ_VS_CONTROL_REG				0x00002204
+#define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK		0x00000fff
+#define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT		0
+static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK;
+}
+#define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__MASK		0x00fff000
+#define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT	12
+static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__MASK;
+}
+#define A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__MASK		0xff000000
+#define A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__SHIFT		24
+static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__MASK;
+}
+
+#define REG_A3XX_HLSQ_FS_CONTROL_REG				0x00002205
+#define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK		0x00000fff
+#define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT		0
+static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK;
+}
+#define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__MASK		0x00fff000
+#define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT	12
+static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__MASK;
+}
+#define A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__MASK		0xff000000
+#define A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__SHIFT		24
+static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__MASK;
+}
+
+#define REG_A3XX_HLSQ_CONST_VSPRESV_RANGE_REG			0x00002206
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK	0x0000ffff
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__SHIFT	0
+static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__SHIFT) & A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK;
+}
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__MASK	0xffff0000
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__SHIFT	16
+static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__SHIFT) & A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__MASK;
+}
+
+#define REG_A3XX_HLSQ_CONST_FSPRESV_RANGE_REG			0x00002207
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK	0x0000ffff
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__SHIFT	0
+static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__SHIFT) & A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK;
+}
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__MASK	0xffff0000
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__SHIFT	16
+static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(uint32_t val)
+{
+	return ((val) << A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__SHIFT) & A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__MASK;
+}
+
+#define REG_A3XX_HLSQ_CL_NDRANGE_0_REG				0x0000220a
+
+#define REG_A3XX_HLSQ_CL_NDRANGE_1_REG				0x0000220b
+
+#define REG_A3XX_HLSQ_CL_NDRANGE_2_REG				0x0000220c
+
+#define REG_A3XX_HLSQ_CL_CONTROL_0_REG				0x00002211
+
+#define REG_A3XX_HLSQ_CL_CONTROL_1_REG				0x00002212
+
+#define REG_A3XX_HLSQ_CL_KERNEL_CONST_REG			0x00002214
+
+#define REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG			0x00002215
+
+#define REG_A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG			0x00002217
+
+#define REG_A3XX_HLSQ_CL_WG_OFFSET_REG				0x0000221a
+
+#define REG_A3XX_VFD_CONTROL_0					0x00002240
+#define A3XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK			0x0003ffff
+#define A3XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT			0
+static inline uint32_t A3XX_VFD_CONTROL_0_TOTALATTRTOVS(uint32_t val)
+{
+	return ((val) << A3XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT) & A3XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK;
+}
+#define A3XX_VFD_CONTROL_0_PACKETSIZE__MASK			0x003c0000
+#define A3XX_VFD_CONTROL_0_PACKETSIZE__SHIFT			18
+static inline uint32_t A3XX_VFD_CONTROL_0_PACKETSIZE(uint32_t val)
+{
+	return ((val) << A3XX_VFD_CONTROL_0_PACKETSIZE__SHIFT) & A3XX_VFD_CONTROL_0_PACKETSIZE__MASK;
+}
+#define A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__MASK		0x07c00000
+#define A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__SHIFT		22
+static inline uint32_t A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(uint32_t val)
+{
+	return ((val) << A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__SHIFT) & A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__MASK;
+}
+#define A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__MASK		0xf8000000
+#define A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__SHIFT		27
+static inline uint32_t A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(uint32_t val)
+{
+	return ((val) << A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__SHIFT) & A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__MASK;
+}
+
+#define REG_A3XX_VFD_CONTROL_1					0x00002241
+#define A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK			0x0000ffff
+#define A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT			0
+static inline uint32_t A3XX_VFD_CONTROL_1_MAXSTORAGE(uint32_t val)
+{
+	return ((val) << A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT) & A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK;
+}
+#define A3XX_VFD_CONTROL_1_REGID4VTX__MASK			0x00ff0000
+#define A3XX_VFD_CONTROL_1_REGID4VTX__SHIFT			16
+static inline uint32_t A3XX_VFD_CONTROL_1_REGID4VTX(uint32_t val)
+{
+	return ((val) << A3XX_VFD_CONTROL_1_REGID4VTX__SHIFT) & A3XX_VFD_CONTROL_1_REGID4VTX__MASK;
+}
+#define A3XX_VFD_CONTROL_1_REGID4INST__MASK			0xff000000
+#define A3XX_VFD_CONTROL_1_REGID4INST__SHIFT			24
+static inline uint32_t A3XX_VFD_CONTROL_1_REGID4INST(uint32_t val)
+{
+	return ((val) << A3XX_VFD_CONTROL_1_REGID4INST__SHIFT) & A3XX_VFD_CONTROL_1_REGID4INST__MASK;
+}
+
+#define REG_A3XX_VFD_INDEX_MIN					0x00002242
+
+#define REG_A3XX_VFD_INDEX_MAX					0x00002243
+
+#define REG_A3XX_VFD_INSTANCEID_OFFSET				0x00002244
+
+#define REG_A3XX_VFD_INDEX_OFFSET				0x00002245
+
+static inline uint32_t REG_A3XX_VFD_FETCH(uint32_t i0) { return 0x00002246 + 0x2*i0; }
+
+static inline uint32_t REG_A3XX_VFD_FETCH_INSTR_0(uint32_t i0) { return 0x00002246 + 0x2*i0; }
+#define A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK			0x0000007f
+#define A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT			0
+static inline uint32_t A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(uint32_t val)
+{
+	return ((val) << A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK;
+}
+#define A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__MASK			0x0001ff80
+#define A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__SHIFT			7
+static inline uint32_t A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(uint32_t val)
+{
+	return ((val) << A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__MASK;
+}
+#define A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT			0x00020000
+#define A3XX_VFD_FETCH_INSTR_0_INDEXCODE__MASK			0x00fc0000
+#define A3XX_VFD_FETCH_INSTR_0_INDEXCODE__SHIFT			18
+static inline uint32_t A3XX_VFD_FETCH_INSTR_0_INDEXCODE(uint32_t val)
+{
+	return ((val) << A3XX_VFD_FETCH_INSTR_0_INDEXCODE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_INDEXCODE__MASK;
+}
+#define A3XX_VFD_FETCH_INSTR_0_STEPRATE__MASK			0xff000000
+#define A3XX_VFD_FETCH_INSTR_0_STEPRATE__SHIFT			24
+static inline uint32_t A3XX_VFD_FETCH_INSTR_0_STEPRATE(uint32_t val)
+{
+	return ((val) << A3XX_VFD_FETCH_INSTR_0_STEPRATE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_STEPRATE__MASK;
+}
+
+static inline uint32_t REG_A3XX_VFD_FETCH_INSTR_1(uint32_t i0) { return 0x00002247 + 0x2*i0; }
+
+static inline uint32_t REG_A3XX_VFD_DECODE(uint32_t i0) { return 0x00002266 + 0x1*i0; }
+
+static inline uint32_t REG_A3XX_VFD_DECODE_INSTR(uint32_t i0) { return 0x00002266 + 0x1*i0; }
+#define A3XX_VFD_DECODE_INSTR_WRITEMASK__MASK			0x0000000f
+#define A3XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT			0
+static inline uint32_t A3XX_VFD_DECODE_INSTR_WRITEMASK(uint32_t val)
+{
+	return ((val) << A3XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT) & A3XX_VFD_DECODE_INSTR_WRITEMASK__MASK;
+}
+#define A3XX_VFD_DECODE_INSTR_CONSTFILL				0x00000010
+#define A3XX_VFD_DECODE_INSTR_FORMAT__MASK			0x00000fc0
+#define A3XX_VFD_DECODE_INSTR_FORMAT__SHIFT			6
+static inline uint32_t A3XX_VFD_DECODE_INSTR_FORMAT(enum a3xx_vtx_fmt val)
+{
+	return ((val) << A3XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A3XX_VFD_DECODE_INSTR_FORMAT__MASK;
+}
+#define A3XX_VFD_DECODE_INSTR_REGID__MASK			0x000ff000
+#define A3XX_VFD_DECODE_INSTR_REGID__SHIFT			12
+static inline uint32_t A3XX_VFD_DECODE_INSTR_REGID(uint32_t val)
+{
+	return ((val) << A3XX_VFD_DECODE_INSTR_REGID__SHIFT) & A3XX_VFD_DECODE_INSTR_REGID__MASK;
+}
+#define A3XX_VFD_DECODE_INSTR_SHIFTCNT__MASK			0x1f000000
+#define A3XX_VFD_DECODE_INSTR_SHIFTCNT__SHIFT			24
+static inline uint32_t A3XX_VFD_DECODE_INSTR_SHIFTCNT(uint32_t val)
+{
+	return ((val) << A3XX_VFD_DECODE_INSTR_SHIFTCNT__SHIFT) & A3XX_VFD_DECODE_INSTR_SHIFTCNT__MASK;
+}
+#define A3XX_VFD_DECODE_INSTR_LASTCOMPVALID			0x20000000
+#define A3XX_VFD_DECODE_INSTR_SWITCHNEXT			0x40000000
+
+#define REG_A3XX_VFD_VS_THREADING_THRESHOLD			0x0000227e
+#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__MASK	0x0000000f
+#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__SHIFT	0
+static inline uint32_t A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(uint32_t val)
+{
+	return ((val) << A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__SHIFT) & A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__MASK;
+}
+#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__MASK	0x0000ff00
+#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__SHIFT	8
+static inline uint32_t A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(uint32_t val)
+{
+	return ((val) << A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__SHIFT) & A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__MASK;
+}
+
+#define REG_A3XX_VPC_ATTR					0x00002280
+#define A3XX_VPC_ATTR_TOTALATTR__MASK				0x00000fff
+#define A3XX_VPC_ATTR_TOTALATTR__SHIFT				0
+static inline uint32_t A3XX_VPC_ATTR_TOTALATTR(uint32_t val)
+{
+	return ((val) << A3XX_VPC_ATTR_TOTALATTR__SHIFT) & A3XX_VPC_ATTR_TOTALATTR__MASK;
+}
+#define A3XX_VPC_ATTR_THRDASSIGN__MASK				0x0ffff000
+#define A3XX_VPC_ATTR_THRDASSIGN__SHIFT				12
+static inline uint32_t A3XX_VPC_ATTR_THRDASSIGN(uint32_t val)
+{
+	return ((val) << A3XX_VPC_ATTR_THRDASSIGN__SHIFT) & A3XX_VPC_ATTR_THRDASSIGN__MASK;
+}
+#define A3XX_VPC_ATTR_LMSIZE__MASK				0xf0000000
+#define A3XX_VPC_ATTR_LMSIZE__SHIFT				28
+static inline uint32_t A3XX_VPC_ATTR_LMSIZE(uint32_t val)
+{
+	return ((val) << A3XX_VPC_ATTR_LMSIZE__SHIFT) & A3XX_VPC_ATTR_LMSIZE__MASK;
+}
+
+#define REG_A3XX_VPC_PACK					0x00002281
+#define A3XX_VPC_PACK_NUMFPNONPOSVAR__MASK			0x0000ff00
+#define A3XX_VPC_PACK_NUMFPNONPOSVAR__SHIFT			8
+static inline uint32_t A3XX_VPC_PACK_NUMFPNONPOSVAR(uint32_t val)
+{
+	return ((val) << A3XX_VPC_PACK_NUMFPNONPOSVAR__SHIFT) & A3XX_VPC_PACK_NUMFPNONPOSVAR__MASK;
+}
+#define A3XX_VPC_PACK_NUMNONPOSVSVAR__MASK			0x00ff0000
+#define A3XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT			16
+static inline uint32_t A3XX_VPC_PACK_NUMNONPOSVSVAR(uint32_t val)
+{
+	return ((val) << A3XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT) & A3XX_VPC_PACK_NUMNONPOSVSVAR__MASK;
+}
+
+static inline uint32_t REG_A3XX_VPC_VARYING_INTERP(uint32_t i0) { return 0x00002282 + 0x1*i0; }
+
+static inline uint32_t REG_A3XX_VPC_VARYING_INTERP_MODE(uint32_t i0) { return 0x00002282 + 0x1*i0; }
+
+static inline uint32_t REG_A3XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x00002286 + 0x1*i0; }
+
+static inline uint32_t REG_A3XX_VPC_VARYING_PS_REPL_MODE(uint32_t i0) { return 0x00002286 + 0x1*i0; }
+
+#define REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_0			0x0000228a
+
+#define REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_1			0x0000228b
+
+#define REG_A3XX_SP_SP_CTRL_REG					0x000022c0
+#define A3XX_SP_SP_CTRL_REG_RESOLVE				0x00010000
+#define A3XX_SP_SP_CTRL_REG_CONSTMODE__MASK			0x000c0000
+#define A3XX_SP_SP_CTRL_REG_CONSTMODE__SHIFT			18
+static inline uint32_t A3XX_SP_SP_CTRL_REG_CONSTMODE(uint32_t val)
+{
+	return ((val) << A3XX_SP_SP_CTRL_REG_CONSTMODE__SHIFT) & A3XX_SP_SP_CTRL_REG_CONSTMODE__MASK;
+}
+#define A3XX_SP_SP_CTRL_REG_SLEEPMODE__MASK			0x00300000
+#define A3XX_SP_SP_CTRL_REG_SLEEPMODE__SHIFT			20
+static inline uint32_t A3XX_SP_SP_CTRL_REG_SLEEPMODE(uint32_t val)
+{
+	return ((val) << A3XX_SP_SP_CTRL_REG_SLEEPMODE__SHIFT) & A3XX_SP_SP_CTRL_REG_SLEEPMODE__MASK;
+}
+#define A3XX_SP_SP_CTRL_REG_LOMODE__MASK			0x00c00000
+#define A3XX_SP_SP_CTRL_REG_LOMODE__SHIFT			22
+static inline uint32_t A3XX_SP_SP_CTRL_REG_LOMODE(uint32_t val)
+{
+	return ((val) << A3XX_SP_SP_CTRL_REG_LOMODE__SHIFT) & A3XX_SP_SP_CTRL_REG_LOMODE__MASK;
+}
+
+#define REG_A3XX_SP_VS_CTRL_REG0				0x000022c4
+#define A3XX_SP_VS_CTRL_REG0_THREADMODE__MASK			0x00000001
+#define A3XX_SP_VS_CTRL_REG0_THREADMODE__SHIFT			0
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADMODE(enum a3xx_threadmode val)
+{
+	return ((val) << A3XX_SP_VS_CTRL_REG0_THREADMODE__SHIFT) & A3XX_SP_VS_CTRL_REG0_THREADMODE__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__MASK		0x00000002
+#define A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__SHIFT		1
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(enum a3xx_instrbuffermode val)
+{
+	return ((val) << A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__SHIFT) & A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_CACHEINVALID			0x00000004
+#define A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x000003f0
+#define A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		4
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x0003fc00
+#define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		10
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__MASK		0x000c0000
+#define A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__SHIFT		18
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__SHIFT) & A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_THREADSIZE__MASK			0x00100000
+#define A3XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT			20
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
+{
+	return ((val) << A3XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A3XX_SP_VS_CTRL_REG0_THREADSIZE__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE			0x00200000
+#define A3XX_SP_VS_CTRL_REG0_PIXLODENABLE			0x00400000
+#define A3XX_SP_VS_CTRL_REG0_LENGTH__MASK			0xff000000
+#define A3XX_SP_VS_CTRL_REG0_LENGTH__SHIFT			24
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_LENGTH(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_CTRL_REG0_LENGTH__SHIFT) & A3XX_SP_VS_CTRL_REG0_LENGTH__MASK;
+}
+
+#define REG_A3XX_SP_VS_CTRL_REG1				0x000022c5
+#define A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__MASK			0x000003ff
+#define A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__SHIFT			0
+static inline uint32_t A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__SHIFT) & A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__MASK		0x000ffc00
+#define A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__SHIFT		10
+static inline uint32_t A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK		0x3f000000
+#define A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT		24
+static inline uint32_t A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT) & A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK;
+}
+
+#define REG_A3XX_SP_VS_PARAM_REG				0x000022c6
+#define A3XX_SP_VS_PARAM_REG_POSREGID__MASK			0x000000ff
+#define A3XX_SP_VS_PARAM_REG_POSREGID__SHIFT			0
+static inline uint32_t A3XX_SP_VS_PARAM_REG_POSREGID(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_PARAM_REG_POSREGID__SHIFT) & A3XX_SP_VS_PARAM_REG_POSREGID__MASK;
+}
+#define A3XX_SP_VS_PARAM_REG_PSIZEREGID__MASK			0x0000ff00
+#define A3XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT			8
+static inline uint32_t A3XX_SP_VS_PARAM_REG_PSIZEREGID(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT) & A3XX_SP_VS_PARAM_REG_PSIZEREGID__MASK;
+}
+#define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK		0xfff00000
+#define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT		20
+static inline uint32_t A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT) & A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK;
+}
+
+static inline uint32_t REG_A3XX_SP_VS_OUT(uint32_t i0) { return 0x000022c7 + 0x1*i0; }
+
+static inline uint32_t REG_A3XX_SP_VS_OUT_REG(uint32_t i0) { return 0x000022c7 + 0x1*i0; }
+#define A3XX_SP_VS_OUT_REG_A_REGID__MASK			0x000001ff
+#define A3XX_SP_VS_OUT_REG_A_REGID__SHIFT			0
+static inline uint32_t A3XX_SP_VS_OUT_REG_A_REGID(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A3XX_SP_VS_OUT_REG_A_REGID__MASK;
+}
+#define A3XX_SP_VS_OUT_REG_A_COMPMASK__MASK			0x00001e00
+#define A3XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT			9
+static inline uint32_t A3XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A3XX_SP_VS_OUT_REG_A_COMPMASK__MASK;
+}
+#define A3XX_SP_VS_OUT_REG_B_REGID__MASK			0x01ff0000
+#define A3XX_SP_VS_OUT_REG_B_REGID__SHIFT			16
+static inline uint32_t A3XX_SP_VS_OUT_REG_B_REGID(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A3XX_SP_VS_OUT_REG_B_REGID__MASK;
+}
+#define A3XX_SP_VS_OUT_REG_B_COMPMASK__MASK			0x1e000000
+#define A3XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT			25
+static inline uint32_t A3XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A3XX_SP_VS_OUT_REG_B_COMPMASK__MASK;
+}
+
+static inline uint32_t REG_A3XX_SP_VS_VPC_DST(uint32_t i0) { return 0x000022d0 + 0x1*i0; }
+
+static inline uint32_t REG_A3XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x000022d0 + 0x1*i0; }
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK			0x000000ff
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT			0
+static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK;
+}
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK			0x0000ff00
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT			8
+static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK;
+}
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK			0x00ff0000
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT			16
+static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK;
+}
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK			0xff000000
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT			24
+static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK;
+}
+
+#define REG_A3XX_SP_VS_OBJ_OFFSET_REG				0x000022d4
+#define A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK	0x01ff0000
+#define A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT	16
+static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK		0xfe000000
+#define A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT	25
+static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK;
+}
+
+#define REG_A3XX_SP_VS_OBJ_START_REG				0x000022d5
+
+#define REG_A3XX_SP_VS_PVT_MEM_CTRL_REG				0x000022d6
+
+#define REG_A3XX_SP_VS_PVT_MEM_ADDR_REG				0x000022d7
+
+#define REG_A3XX_SP_VS_PVT_MEM_SIZE_REG				0x000022d8
+
+#define REG_A3XX_SP_VS_LENGTH_REG				0x000022df
+#define A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__MASK		0xffffffff
+#define A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__SHIFT		0
+static inline uint32_t A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(uint32_t val)
+{
+	return ((val) << A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__SHIFT) & A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__MASK;
+}
+
+#define REG_A3XX_SP_FS_CTRL_REG0				0x000022e0
+#define A3XX_SP_FS_CTRL_REG0_THREADMODE__MASK			0x00000001
+#define A3XX_SP_FS_CTRL_REG0_THREADMODE__SHIFT			0
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_THREADMODE(enum a3xx_threadmode val)
+{
+	return ((val) << A3XX_SP_FS_CTRL_REG0_THREADMODE__SHIFT) & A3XX_SP_FS_CTRL_REG0_THREADMODE__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__MASK		0x00000002
+#define A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__SHIFT		1
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(enum a3xx_instrbuffermode val)
+{
+	return ((val) << A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__SHIFT) & A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_CACHEINVALID			0x00000004
+#define A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK		0x000003f0
+#define A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT		4
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK		0x0003fc00
+#define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT		10
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__MASK		0x000c0000
+#define A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__SHIFT		18
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__SHIFT) & A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_THREADSIZE__MASK			0x00100000
+#define A3XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT			20
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
+{
+	return ((val) << A3XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT) & A3XX_SP_FS_CTRL_REG0_THREADSIZE__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE			0x00200000
+#define A3XX_SP_FS_CTRL_REG0_PIXLODENABLE			0x00400000
+#define A3XX_SP_FS_CTRL_REG0_LENGTH__MASK			0xff000000
+#define A3XX_SP_FS_CTRL_REG0_LENGTH__SHIFT			24
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_LENGTH(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_CTRL_REG0_LENGTH__SHIFT) & A3XX_SP_FS_CTRL_REG0_LENGTH__MASK;
+}
+
+#define REG_A3XX_SP_FS_CTRL_REG1				0x000022e1
+#define A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__MASK			0x000003ff
+#define A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__SHIFT			0
+static inline uint32_t A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__SHIFT) & A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__MASK		0x000ffc00
+#define A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__SHIFT		10
+static inline uint32_t A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__MASK		0x00f00000
+#define A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__SHIFT		20
+static inline uint32_t A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__SHIFT) & A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__MASK		0x3f000000
+#define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__SHIFT		24
+static inline uint32_t A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__SHIFT) & A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__MASK;
+}
+
+#define REG_A3XX_SP_FS_OBJ_OFFSET_REG				0x000022e2
+#define A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK	0x01ff0000
+#define A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT	16
+static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK		0xfe000000
+#define A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT	25
+static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK;
+}
+
+#define REG_A3XX_SP_FS_OBJ_START_REG				0x000022e3
+
+#define REG_A3XX_SP_FS_PVT_MEM_CTRL_REG				0x000022e4
+
+#define REG_A3XX_SP_FS_PVT_MEM_ADDR_REG				0x000022e5
+
+#define REG_A3XX_SP_FS_PVT_MEM_SIZE_REG				0x000022e6
+
+#define REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0			0x000022e8
+
+#define REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_1			0x000022e9
+
+#define REG_A3XX_SP_FS_OUTPUT_REG				0x000022ec
+
+static inline uint32_t REG_A3XX_SP_FS_MRT(uint32_t i0) { return 0x000022f0 + 0x1*i0; }
+
+static inline uint32_t REG_A3XX_SP_FS_MRT_REG(uint32_t i0) { return 0x000022f0 + 0x1*i0; }
+#define A3XX_SP_FS_MRT_REG_REGID__MASK				0x000000ff
+#define A3XX_SP_FS_MRT_REG_REGID__SHIFT				0
+static inline uint32_t A3XX_SP_FS_MRT_REG_REGID(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_MRT_REG_REGID__SHIFT) & A3XX_SP_FS_MRT_REG_REGID__MASK;
+}
+#define A3XX_SP_FS_MRT_REG_HALF_PRECISION			0x00000100
+
+static inline uint32_t REG_A3XX_SP_FS_IMAGE_OUTPUT(uint32_t i0) { return 0x000022f4 + 0x1*i0; }
+
+static inline uint32_t REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(uint32_t i0) { return 0x000022f4 + 0x1*i0; }
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__MASK		0x0000003f
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__SHIFT		0
+static inline uint32_t A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT(enum a3xx_color_fmt val)
+{
+	return ((val) << A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__SHIFT) & A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__MASK;
+}
+
+#define REG_A3XX_SP_FS_LENGTH_REG				0x000022ff
+#define A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__MASK		0xffffffff
+#define A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__SHIFT		0
+static inline uint32_t A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__SHIFT) & A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__MASK;
+}
+
+#define REG_A3XX_TPL1_TP_VS_TEX_OFFSET				0x00002340
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__MASK		0x000000ff
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__SHIFT		0
+static inline uint32_t A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET(uint32_t val)
+{
+	return ((val) << A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__SHIFT) & A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__MASK;
+}
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__MASK		0x0000ff00
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__SHIFT		8
+static inline uint32_t A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET(uint32_t val)
+{
+	return ((val) << A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__SHIFT) & A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__MASK;
+}
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__MASK		0xffff0000
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__SHIFT		16
+static inline uint32_t A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR(uint32_t val)
+{
+	return ((val) << A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__SHIFT) & A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__MASK;
+}
+
+#define REG_A3XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR		0x00002341
+
+#define REG_A3XX_TPL1_TP_FS_TEX_OFFSET				0x00002342
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__MASK		0x000000ff
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__SHIFT		0
+static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET(uint32_t val)
+{
+	return ((val) << A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__SHIFT) & A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__MASK;
+}
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__MASK		0x0000ff00
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__SHIFT		8
+static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET(uint32_t val)
+{
+	return ((val) << A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__SHIFT) & A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__MASK;
+}
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__MASK		0xffff0000
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__SHIFT		16
+static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR(uint32_t val)
+{
+	return ((val) << A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__SHIFT) & A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__MASK;
+}
+
+#define REG_A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR		0x00002343
+
+#define REG_A3XX_VBIF_CLKON					0x00003001
+
+#define REG_A3XX_VBIF_FIXED_SORT_EN				0x0000300c
+
+#define REG_A3XX_VBIF_FIXED_SORT_SEL0				0x0000300d
+
+#define REG_A3XX_VBIF_FIXED_SORT_SEL1				0x0000300e
+
+#define REG_A3XX_VBIF_ABIT_SORT					0x0000301c
+
+#define REG_A3XX_VBIF_ABIT_SORT_CONF				0x0000301d
+
+#define REG_A3XX_VBIF_GATE_OFF_WRREQ_EN				0x0000302a
+
+#define REG_A3XX_VBIF_IN_RD_LIM_CONF0				0x0000302c
+
+#define REG_A3XX_VBIF_IN_RD_LIM_CONF1				0x0000302d
+
+#define REG_A3XX_VBIF_IN_WR_LIM_CONF0				0x00003030
+
+#define REG_A3XX_VBIF_IN_WR_LIM_CONF1				0x00003031
+
+#define REG_A3XX_VBIF_OUT_RD_LIM_CONF0				0x00003034
+
+#define REG_A3XX_VBIF_OUT_WR_LIM_CONF0				0x00003035
+
+#define REG_A3XX_VBIF_DDR_OUT_MAX_BURST				0x00003036
+
+#define REG_A3XX_VBIF_ARB_CTL					0x0000303c
+
+#define REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB			0x00003049
+
+#define REG_A3XX_VBIF_OUT_AXI_AMEMTYPE_CONF0			0x00003058
+
+#define REG_A3XX_VBIF_OUT_AXI_AOOO_EN				0x0000305e
+
+#define REG_A3XX_VBIF_OUT_AXI_AOOO				0x0000305f
+
+#define REG_A3XX_VSC_BIN_SIZE					0x00000c01
+#define A3XX_VSC_BIN_SIZE_WIDTH__MASK				0x0000001f
+#define A3XX_VSC_BIN_SIZE_WIDTH__SHIFT				0
+static inline uint32_t A3XX_VSC_BIN_SIZE_WIDTH(uint32_t val)
+{
+	return ((val >> 5) << A3XX_VSC_BIN_SIZE_WIDTH__SHIFT) & A3XX_VSC_BIN_SIZE_WIDTH__MASK;
+}
+#define A3XX_VSC_BIN_SIZE_HEIGHT__MASK				0x000003e0
+#define A3XX_VSC_BIN_SIZE_HEIGHT__SHIFT				5
+static inline uint32_t A3XX_VSC_BIN_SIZE_HEIGHT(uint32_t val)
+{
+	return ((val >> 5) << A3XX_VSC_BIN_SIZE_HEIGHT__SHIFT) & A3XX_VSC_BIN_SIZE_HEIGHT__MASK;
+}
+
+#define REG_A3XX_VSC_SIZE_ADDRESS				0x00000c02
+
+static inline uint32_t REG_A3XX_VSC_PIPE(uint32_t i0) { return 0x00000c06 + 0x3*i0; }
+
+static inline uint32_t REG_A3XX_VSC_PIPE_CONFIG(uint32_t i0) { return 0x00000c06 + 0x3*i0; }
+#define A3XX_VSC_PIPE_CONFIG_X__MASK				0x000003ff
+#define A3XX_VSC_PIPE_CONFIG_X__SHIFT				0
+static inline uint32_t A3XX_VSC_PIPE_CONFIG_X(uint32_t val)
+{
+	return ((val) << A3XX_VSC_PIPE_CONFIG_X__SHIFT) & A3XX_VSC_PIPE_CONFIG_X__MASK;
+}
+#define A3XX_VSC_PIPE_CONFIG_Y__MASK				0x000ffc00
+#define A3XX_VSC_PIPE_CONFIG_Y__SHIFT				10
+static inline uint32_t A3XX_VSC_PIPE_CONFIG_Y(uint32_t val)
+{
+	return ((val) << A3XX_VSC_PIPE_CONFIG_Y__SHIFT) & A3XX_VSC_PIPE_CONFIG_Y__MASK;
+}
+#define A3XX_VSC_PIPE_CONFIG_W__MASK				0x00f00000
+#define A3XX_VSC_PIPE_CONFIG_W__SHIFT				20
+static inline uint32_t A3XX_VSC_PIPE_CONFIG_W(uint32_t val)
+{
+	return ((val) << A3XX_VSC_PIPE_CONFIG_W__SHIFT) & A3XX_VSC_PIPE_CONFIG_W__MASK;
+}
+#define A3XX_VSC_PIPE_CONFIG_H__MASK				0x0f000000
+#define A3XX_VSC_PIPE_CONFIG_H__SHIFT				24
+static inline uint32_t A3XX_VSC_PIPE_CONFIG_H(uint32_t val)
+{
+	return ((val) << A3XX_VSC_PIPE_CONFIG_H__SHIFT) & A3XX_VSC_PIPE_CONFIG_H__MASK;
+}
+
+static inline uint32_t REG_A3XX_VSC_PIPE_DATA_ADDRESS(uint32_t i0) { return 0x00000c07 + 0x3*i0; }
+
+static inline uint32_t REG_A3XX_VSC_PIPE_DATA_LENGTH(uint32_t i0) { return 0x00000c08 + 0x3*i0; }
+
+#define REG_A3XX_UNKNOWN_0C3D					0x00000c3d
+
+#define REG_A3XX_PC_PERFCOUNTER0_SELECT				0x00000c48
+
+#define REG_A3XX_PC_PERFCOUNTER1_SELECT				0x00000c49
+
+#define REG_A3XX_PC_PERFCOUNTER2_SELECT				0x00000c4a
+
+#define REG_A3XX_PC_PERFCOUNTER3_SELECT				0x00000c4b
+
+#define REG_A3XX_UNKNOWN_0C81					0x00000c81
+
+#define REG_A3XX_GRAS_PERFCOUNTER0_SELECT			0x00000c88
+
+#define REG_A3XX_GRAS_PERFCOUNTER1_SELECT			0x00000c89
+
+#define REG_A3XX_GRAS_PERFCOUNTER2_SELECT			0x00000c8a
+
+#define REG_A3XX_GRAS_PERFCOUNTER3_SELECT			0x00000c8b
+
+static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE(uint32_t i0) { return 0x00000ca0 + 0x4*i0; }
+
+static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_X(uint32_t i0) { return 0x00000ca0 + 0x4*i0; }
+
+static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_Y(uint32_t i0) { return 0x00000ca1 + 0x4*i0; }
+
+static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_Z(uint32_t i0) { return 0x00000ca2 + 0x4*i0; }
+
+static inline uint32_t REG_A3XX_GRAS_CL_USER_PLANE_W(uint32_t i0) { return 0x00000ca3 + 0x4*i0; }
+
+#define REG_A3XX_RB_GMEM_BASE_ADDR				0x00000cc0
+
+#define REG_A3XX_RB_PERFCOUNTER0_SELECT				0x00000cc6
+
+#define REG_A3XX_RB_PERFCOUNTER1_SELECT				0x00000cc7
+
+#define REG_A3XX_RB_WINDOW_SIZE					0x00000ce0
+#define A3XX_RB_WINDOW_SIZE_WIDTH__MASK				0x00003fff
+#define A3XX_RB_WINDOW_SIZE_WIDTH__SHIFT			0
+static inline uint32_t A3XX_RB_WINDOW_SIZE_WIDTH(uint32_t val)
+{
+	return ((val) << A3XX_RB_WINDOW_SIZE_WIDTH__SHIFT) & A3XX_RB_WINDOW_SIZE_WIDTH__MASK;
+}
+#define A3XX_RB_WINDOW_SIZE_HEIGHT__MASK			0x0fffc000
+#define A3XX_RB_WINDOW_SIZE_HEIGHT__SHIFT			14
+static inline uint32_t A3XX_RB_WINDOW_SIZE_HEIGHT(uint32_t val)
+{
+	return ((val) << A3XX_RB_WINDOW_SIZE_HEIGHT__SHIFT) & A3XX_RB_WINDOW_SIZE_HEIGHT__MASK;
+}
+
+#define REG_A3XX_HLSQ_PERFCOUNTER0_SELECT			0x00000e00
+
+#define REG_A3XX_HLSQ_PERFCOUNTER1_SELECT			0x00000e01
+
+#define REG_A3XX_HLSQ_PERFCOUNTER2_SELECT			0x00000e02
+
+#define REG_A3XX_HLSQ_PERFCOUNTER3_SELECT			0x00000e03
+
+#define REG_A3XX_HLSQ_PERFCOUNTER4_SELECT			0x00000e04
+
+#define REG_A3XX_HLSQ_PERFCOUNTER5_SELECT			0x00000e05
+
+#define REG_A3XX_UNKNOWN_0E43					0x00000e43
+
+#define REG_A3XX_VFD_PERFCOUNTER0_SELECT			0x00000e44
+
+#define REG_A3XX_VFD_PERFCOUNTER1_SELECT			0x00000e45
+
+#define REG_A3XX_VPC_VPC_DEBUG_RAM_SEL				0x00000e61
+
+#define REG_A3XX_VPC_VPC_DEBUG_RAM_READ				0x00000e62
+
+#define REG_A3XX_VPC_PERFCOUNTER0_SELECT			0x00000e64
+
+#define REG_A3XX_VPC_PERFCOUNTER1_SELECT			0x00000e65
+
+#define REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG			0x00000e82
+
+#define REG_A3XX_UCHE_PERFCOUNTER0_SELECT			0x00000e84
+
+#define REG_A3XX_UCHE_PERFCOUNTER1_SELECT			0x00000e85
+
+#define REG_A3XX_UCHE_PERFCOUNTER2_SELECT			0x00000e86
+
+#define REG_A3XX_UCHE_PERFCOUNTER3_SELECT			0x00000e87
+
+#define REG_A3XX_UCHE_PERFCOUNTER4_SELECT			0x00000e88
+
+#define REG_A3XX_UCHE_PERFCOUNTER5_SELECT			0x00000e89
+
+#define REG_A3XX_UCHE_CACHE_INVALIDATE0_REG			0x00000ea0
+#define A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__MASK		0x0fffffff
+#define A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__SHIFT		0
+static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(uint32_t val)
+{
+	return ((val) << A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__SHIFT) & A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__MASK;
+}
+
+#define REG_A3XX_UCHE_CACHE_INVALIDATE1_REG			0x00000ea1
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__MASK		0x0fffffff
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__SHIFT		0
+static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(uint32_t val)
+{
+	return ((val) << A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__SHIFT) & A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__MASK;
+}
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__MASK		0x30000000
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__SHIFT		28
+static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(enum a3xx_cache_opcode val)
+{
+	return ((val) << A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__SHIFT) & A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__MASK;
+}
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE		0x80000000
+
+#define REG_A3XX_SP_PERFCOUNTER0_SELECT				0x00000ec4
+
+#define REG_A3XX_SP_PERFCOUNTER1_SELECT				0x00000ec5
+
+#define REG_A3XX_SP_PERFCOUNTER2_SELECT				0x00000ec6
+
+#define REG_A3XX_SP_PERFCOUNTER3_SELECT				0x00000ec7
+
+#define REG_A3XX_SP_PERFCOUNTER4_SELECT				0x00000ec8
+
+#define REG_A3XX_SP_PERFCOUNTER5_SELECT				0x00000ec9
+
+#define REG_A3XX_SP_PERFCOUNTER6_SELECT				0x00000eca
+
+#define REG_A3XX_SP_PERFCOUNTER7_SELECT				0x00000ecb
+
+#define REG_A3XX_UNKNOWN_0EE0					0x00000ee0
+
+#define REG_A3XX_UNKNOWN_0F03					0x00000f03
+
+#define REG_A3XX_TP_PERFCOUNTER0_SELECT				0x00000f04
+
+#define REG_A3XX_TP_PERFCOUNTER1_SELECT				0x00000f05
+
+#define REG_A3XX_TP_PERFCOUNTER2_SELECT				0x00000f06
+
+#define REG_A3XX_TP_PERFCOUNTER3_SELECT				0x00000f07
+
+#define REG_A3XX_TP_PERFCOUNTER4_SELECT				0x00000f08
+
+#define REG_A3XX_TP_PERFCOUNTER5_SELECT				0x00000f09
+
+#define REG_A3XX_TEX_SAMP_0					0x00000000
+#define A3XX_TEX_SAMP_0_XY_MAG__MASK				0x0000000c
+#define A3XX_TEX_SAMP_0_XY_MAG__SHIFT				2
+static inline uint32_t A3XX_TEX_SAMP_0_XY_MAG(enum a3xx_tex_filter val)
+{
+	return ((val) << A3XX_TEX_SAMP_0_XY_MAG__SHIFT) & A3XX_TEX_SAMP_0_XY_MAG__MASK;
+}
+#define A3XX_TEX_SAMP_0_XY_MIN__MASK				0x00000030
+#define A3XX_TEX_SAMP_0_XY_MIN__SHIFT				4
+static inline uint32_t A3XX_TEX_SAMP_0_XY_MIN(enum a3xx_tex_filter val)
+{
+	return ((val) << A3XX_TEX_SAMP_0_XY_MIN__SHIFT) & A3XX_TEX_SAMP_0_XY_MIN__MASK;
+}
+#define A3XX_TEX_SAMP_0_WRAP_S__MASK				0x000001c0
+#define A3XX_TEX_SAMP_0_WRAP_S__SHIFT				6
+static inline uint32_t A3XX_TEX_SAMP_0_WRAP_S(enum a3xx_tex_clamp val)
+{
+	return ((val) << A3XX_TEX_SAMP_0_WRAP_S__SHIFT) & A3XX_TEX_SAMP_0_WRAP_S__MASK;
+}
+#define A3XX_TEX_SAMP_0_WRAP_T__MASK				0x00000e00
+#define A3XX_TEX_SAMP_0_WRAP_T__SHIFT				9
+static inline uint32_t A3XX_TEX_SAMP_0_WRAP_T(enum a3xx_tex_clamp val)
+{
+	return ((val) << A3XX_TEX_SAMP_0_WRAP_T__SHIFT) & A3XX_TEX_SAMP_0_WRAP_T__MASK;
+}
+#define A3XX_TEX_SAMP_0_WRAP_R__MASK				0x00007000
+#define A3XX_TEX_SAMP_0_WRAP_R__SHIFT				12
+static inline uint32_t A3XX_TEX_SAMP_0_WRAP_R(enum a3xx_tex_clamp val)
+{
+	return ((val) << A3XX_TEX_SAMP_0_WRAP_R__SHIFT) & A3XX_TEX_SAMP_0_WRAP_R__MASK;
+}
+#define A3XX_TEX_SAMP_0_UNNORM_COORDS				0x80000000
+
+#define REG_A3XX_TEX_SAMP_1					0x00000001
+
+#define REG_A3XX_TEX_CONST_0					0x00000000
+#define A3XX_TEX_CONST_0_TILED					0x00000001
+#define A3XX_TEX_CONST_0_SWIZ_X__MASK				0x00000070
+#define A3XX_TEX_CONST_0_SWIZ_X__SHIFT				4
+static inline uint32_t A3XX_TEX_CONST_0_SWIZ_X(enum a3xx_tex_swiz val)
+{
+	return ((val) << A3XX_TEX_CONST_0_SWIZ_X__SHIFT) & A3XX_TEX_CONST_0_SWIZ_X__MASK;
+}
+#define A3XX_TEX_CONST_0_SWIZ_Y__MASK				0x00000380
+#define A3XX_TEX_CONST_0_SWIZ_Y__SHIFT				7
+static inline uint32_t A3XX_TEX_CONST_0_SWIZ_Y(enum a3xx_tex_swiz val)
+{
+	return ((val) << A3XX_TEX_CONST_0_SWIZ_Y__SHIFT) & A3XX_TEX_CONST_0_SWIZ_Y__MASK;
+}
+#define A3XX_TEX_CONST_0_SWIZ_Z__MASK				0x00001c00
+#define A3XX_TEX_CONST_0_SWIZ_Z__SHIFT				10
+static inline uint32_t A3XX_TEX_CONST_0_SWIZ_Z(enum a3xx_tex_swiz val)
+{
+	return ((val) << A3XX_TEX_CONST_0_SWIZ_Z__SHIFT) & A3XX_TEX_CONST_0_SWIZ_Z__MASK;
+}
+#define A3XX_TEX_CONST_0_SWIZ_W__MASK				0x0000e000
+#define A3XX_TEX_CONST_0_SWIZ_W__SHIFT				13
+static inline uint32_t A3XX_TEX_CONST_0_SWIZ_W(enum a3xx_tex_swiz val)
+{
+	return ((val) << A3XX_TEX_CONST_0_SWIZ_W__SHIFT) & A3XX_TEX_CONST_0_SWIZ_W__MASK;
+}
+#define A3XX_TEX_CONST_0_FMT__MASK				0x1fc00000
+#define A3XX_TEX_CONST_0_FMT__SHIFT				22
+static inline uint32_t A3XX_TEX_CONST_0_FMT(enum a3xx_tex_fmt val)
+{
+	return ((val) << A3XX_TEX_CONST_0_FMT__SHIFT) & A3XX_TEX_CONST_0_FMT__MASK;
+}
+#define A3XX_TEX_CONST_0_TYPE__MASK				0xc0000000
+#define A3XX_TEX_CONST_0_TYPE__SHIFT				30
+static inline uint32_t A3XX_TEX_CONST_0_TYPE(enum a3xx_tex_type val)
+{
+	return ((val) << A3XX_TEX_CONST_0_TYPE__SHIFT) & A3XX_TEX_CONST_0_TYPE__MASK;
+}
+
+#define REG_A3XX_TEX_CONST_1					0x00000001
+#define A3XX_TEX_CONST_1_HEIGHT__MASK				0x00003fff
+#define A3XX_TEX_CONST_1_HEIGHT__SHIFT				0
+static inline uint32_t A3XX_TEX_CONST_1_HEIGHT(uint32_t val)
+{
+	return ((val) << A3XX_TEX_CONST_1_HEIGHT__SHIFT) & A3XX_TEX_CONST_1_HEIGHT__MASK;
+}
+#define A3XX_TEX_CONST_1_WIDTH__MASK				0x0fffc000
+#define A3XX_TEX_CONST_1_WIDTH__SHIFT				14
+static inline uint32_t A3XX_TEX_CONST_1_WIDTH(uint32_t val)
+{
+	return ((val) << A3XX_TEX_CONST_1_WIDTH__SHIFT) & A3XX_TEX_CONST_1_WIDTH__MASK;
+}
+#define A3XX_TEX_CONST_1_FETCHSIZE__MASK			0xf0000000
+#define A3XX_TEX_CONST_1_FETCHSIZE__SHIFT			28
+static inline uint32_t A3XX_TEX_CONST_1_FETCHSIZE(enum a3xx_tex_fetchsize val)
+{
+	return ((val) << A3XX_TEX_CONST_1_FETCHSIZE__SHIFT) & A3XX_TEX_CONST_1_FETCHSIZE__MASK;
+}
+
+#define REG_A3XX_TEX_CONST_2					0x00000002
+#define A3XX_TEX_CONST_2_INDX__MASK				0x000000ff
+#define A3XX_TEX_CONST_2_INDX__SHIFT				0
+static inline uint32_t A3XX_TEX_CONST_2_INDX(uint32_t val)
+{
+	return ((val) << A3XX_TEX_CONST_2_INDX__SHIFT) & A3XX_TEX_CONST_2_INDX__MASK;
+}
+#define A3XX_TEX_CONST_2_PITCH__MASK				0x3ffff000
+#define A3XX_TEX_CONST_2_PITCH__SHIFT				12
+static inline uint32_t A3XX_TEX_CONST_2_PITCH(uint32_t val)
+{
+	return ((val) << A3XX_TEX_CONST_2_PITCH__SHIFT) & A3XX_TEX_CONST_2_PITCH__MASK;
+}
+#define A3XX_TEX_CONST_2_SWAP__MASK				0xc0000000
+#define A3XX_TEX_CONST_2_SWAP__SHIFT				30
+static inline uint32_t A3XX_TEX_CONST_2_SWAP(enum a3xx_color_swap val)
+{
+	return ((val) << A3XX_TEX_CONST_2_SWAP__SHIFT) & A3XX_TEX_CONST_2_SWAP__MASK;
+}
+
+#define REG_A3XX_TEX_CONST_3					0x00000003
+
+
+#endif /* A3XX_XML */
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
new file mode 100644
index 0000000..035bd13
--- /dev/null
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
@@ -0,0 +1,502 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "a3xx_gpu.h"
+
+#define A3XX_INT0_MASK \
+	(A3XX_INT0_RBBM_AHB_ERROR |        \
+	 A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
+	 A3XX_INT0_CP_T0_PACKET_IN_IB |    \
+	 A3XX_INT0_CP_OPCODE_ERROR |       \
+	 A3XX_INT0_CP_RESERVED_BIT_ERROR | \
+	 A3XX_INT0_CP_HW_FAULT |           \
+	 A3XX_INT0_CP_IB1_INT |            \
+	 A3XX_INT0_CP_IB2_INT |            \
+	 A3XX_INT0_CP_RB_INT |             \
+	 A3XX_INT0_CP_REG_PROTECT_FAULT |  \
+	 A3XX_INT0_CP_AHB_ERROR_HALT |     \
+	 A3XX_INT0_UCHE_OOB_ACCESS)
+
+static struct platform_device *a3xx_pdev;
+
+static void a3xx_me_init(struct msm_gpu *gpu)
+{
+	struct msm_ringbuffer *ring = gpu->rb;
+
+	OUT_PKT3(ring, CP_ME_INIT, 17);
+	OUT_RING(ring, 0x000003f7);
+	OUT_RING(ring, 0x00000000);
+	OUT_RING(ring, 0x00000000);
+	OUT_RING(ring, 0x00000000);
+	OUT_RING(ring, 0x00000080);
+	OUT_RING(ring, 0x00000100);
+	OUT_RING(ring, 0x00000180);
+	OUT_RING(ring, 0x00006600);
+	OUT_RING(ring, 0x00000150);
+	OUT_RING(ring, 0x0000014e);
+	OUT_RING(ring, 0x00000154);
+	OUT_RING(ring, 0x00000001);
+	OUT_RING(ring, 0x00000000);
+	OUT_RING(ring, 0x00000000);
+	OUT_RING(ring, 0x00000000);
+	OUT_RING(ring, 0x00000000);
+	OUT_RING(ring, 0x00000000);
+
+	gpu->funcs->flush(gpu);
+	gpu->funcs->idle(gpu);
+}
+
+static int a3xx_hw_init(struct msm_gpu *gpu)
+{
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	uint32_t *ptr, len;
+	int i, ret;
+
+	DBG("%s", gpu->name);
+
+	if (adreno_is_a305(adreno_gpu)) {
+		/* Set up 16 deep read/write request queues: */
+		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
+		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
+		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
+		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
+		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
+		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
+		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
+		/* Enable WR-REQ: */
+		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
+		/* Set up round robin arbitration between both AXI ports: */
+		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
+		/* Set up AOOO: */
+		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
+		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
+
+	} else if (adreno_is_a320(adreno_gpu)) {
+		/* Set up 16 deep read/write request queues: */
+		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
+		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
+		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
+		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
+		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
+		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
+		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
+		/* Enable WR-REQ: */
+		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
+		/* Set up round robin arbitration between both AXI ports: */
+		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
+		/* Set up AOOO: */
+		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
+		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
+		/* Enable 1K sort: */
+		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
+		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
+
+	} else if (adreno_is_a330(adreno_gpu)) {
+		/* Set up 16 deep read/write request queues: */
+		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
+		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
+		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
+		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
+		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
+		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
+		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
+		/* Enable WR-REQ: */
+		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
+		/* Set up round robin arbitration between both AXI ports: */
+		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
+		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
+		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
+		/* Set up AOOO: */
+		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000ffff);
+		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0xffffffff);
+		/* Enable 1K sort: */
+		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001ffff);
+		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
+		/* Disable VBIF clock gating. This is to enable AXI running
+		 * higher frequency than GPU:
+		 */
+		gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
+
+	} else {
+		BUG();
+	}
+
+	/* Make all blocks contribute to the GPU BUSY perf counter: */
+	gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
+
+	/* Tune the hystersis counters for SP and CP idle detection: */
+	gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
+	gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
+
+	/* Enable the RBBM error reporting bits.  This lets us get
+	 * useful information on failure:
+	 */
+	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
+
+	/* Enable AHB error reporting: */
+	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff);
+
+	/* Turn on the power counters: */
+	gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
+
+	/* Turn on hang detection - this spews a lot of useful information
+	 * into the RBBM registers on a hang:
+	 */
+	gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
+
+	/* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */
+	gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
+
+	/* Enable Clock gating: */
+	gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
+
+	/* Set the OCMEM base address for A330 */
+//TODO:
+//	if (adreno_is_a330(adreno_gpu)) {
+//		gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
+//			(unsigned int)(a3xx_gpu->ocmem_base >> 14));
+//	}
+
+	/* Turn on performance counters: */
+	gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
+
+	/* Set SP perfcounter 7 to count SP_FS_FULL_ALU_INSTRUCTIONS
+	 * we will use this to augment our hang detection:
+	 */
+	gpu_write(gpu, REG_A3XX_SP_PERFCOUNTER7_SELECT,
+			SP_FS_FULL_ALU_INSTRUCTIONS);
+
+	gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
+
+	ret = adreno_hw_init(gpu);
+	if (ret)
+		return ret;
+
+	/* setup access protection: */
+	gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
+
+	/* RBBM registers */
+	gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040);
+	gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080);
+	gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc);
+	gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108);
+	gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140);
+	gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400);
+
+	/* CP registers */
+	gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700);
+	gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8);
+	gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0);
+	gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178);
+	gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180);
+
+	/* RB registers */
+	gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300);
+
+	/* VBIF registers */
+	gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
+
+	/* NOTE: PM4/micro-engine firmware registers look to be the same
+	 * for a2xx and a3xx.. we could possibly push that part down to
+	 * adreno_gpu base class.  Or push both PM4 and PFP but
+	 * parameterize the pfp ucode addr/data registers..
+	 */
+
+	/* Load PM4: */
+	ptr = (uint32_t *)(adreno_gpu->pm4->data);
+	len = adreno_gpu->pm4->size / 4;
+	DBG("loading PM4 ucode version: %u", ptr[0]);
+
+	gpu_write(gpu, REG_AXXX_CP_DEBUG,
+			AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
+			AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
+	gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
+	for (i = 1; i < len; i++)
+		gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
+
+	/* Load PFP: */
+	ptr = (uint32_t *)(adreno_gpu->pfp->data);
+	len = adreno_gpu->pfp->size / 4;
+	DBG("loading PFP ucode version: %u", ptr[0]);
+
+	gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
+	for (i = 1; i < len; i++)
+		gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
+
+	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
+	if (adreno_is_a305(adreno_gpu) || adreno_is_a320(adreno_gpu))
+		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
+				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
+				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
+				AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
+
+
+	/* clear ME_HALT to start micro engine */
+	gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
+
+	a3xx_me_init(gpu);
+
+	return 0;
+}
+
+static void a3xx_destroy(struct msm_gpu *gpu)
+{
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
+
+	DBG("%s", gpu->name);
+
+	adreno_gpu_cleanup(adreno_gpu);
+	put_device(&a3xx_gpu->pdev->dev);
+	kfree(a3xx_gpu);
+}
+
+static void a3xx_idle(struct msm_gpu *gpu)
+{
+	unsigned long t;
+
+	/* wait for ringbuffer to drain: */
+	adreno_idle(gpu);
+
+	t = jiffies + ADRENO_IDLE_TIMEOUT;
+
+	/* then wait for GPU to finish: */
+	do {
+		uint32_t rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS);
+		if (!(rbbm_status & A3XX_RBBM_STATUS_GPU_BUSY))
+			return;
+	} while(time_before(jiffies, t));
+
+	DRM_ERROR("timeout waiting for %s to idle!\n", gpu->name);
+
+	/* TODO maybe we need to reset GPU here to recover from hang? */
+}
+
+static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
+{
+	uint32_t status;
+
+	status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS);
+	DBG("%s: %08x", gpu->name, status);
+
+	// TODO
+
+	gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status);
+
+	msm_gpu_retire(gpu);
+
+	return IRQ_HANDLED;
+}
+
+#ifdef CONFIG_DEBUG_FS
+static const unsigned int a3xx_registers[] = {
+	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
+	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
+	0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
+	0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
+	0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
+	0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
+	0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
+	0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
+	0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
+	0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
+	0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
+	0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
+	0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
+	0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
+	0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
+	0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
+	0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
+	0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
+	0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
+	0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
+	0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
+	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
+	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
+	0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
+	0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
+	0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
+	0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
+	0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
+	0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
+	0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
+	0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
+	0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
+	0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
+	0x2750, 0x2756, 0x2760, 0x2760, 0x300c, 0x300e, 0x301c, 0x301d,
+	0x302a, 0x302a, 0x302c, 0x302d, 0x3030, 0x3031, 0x3034, 0x3036,
+	0x303c, 0x303c, 0x305e, 0x305f,
+};
+
+static void a3xx_show(struct msm_gpu *gpu, struct seq_file *m)
+{
+	int i;
+
+	adreno_show(gpu, m);
+	seq_printf(m, "status:   %08x\n",
+			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
+
+	/* dump these out in a form that can be parsed by demsm: */
+	seq_printf(m, "IO:region %s 00000000 00020000\n", gpu->name);
+	for (i = 0; i < ARRAY_SIZE(a3xx_registers); i += 2) {
+		uint32_t start = a3xx_registers[i];
+		uint32_t end   = a3xx_registers[i+1];
+		uint32_t addr;
+
+		for (addr = start; addr <= end; addr++) {
+			uint32_t val = gpu_read(gpu, addr);
+			seq_printf(m, "IO:R %08x %08x\n", addr<<2, val);
+		}
+	}
+}
+#endif
+
+static const struct adreno_gpu_funcs funcs = {
+	.base = {
+		.get_param = adreno_get_param,
+		.hw_init = a3xx_hw_init,
+		.pm_suspend = msm_gpu_pm_suspend,
+		.pm_resume = msm_gpu_pm_resume,
+		.recover = adreno_recover,
+		.last_fence = adreno_last_fence,
+		.submit = adreno_submit,
+		.flush = adreno_flush,
+		.idle = a3xx_idle,
+		.irq = a3xx_irq,
+		.destroy = a3xx_destroy,
+#ifdef CONFIG_DEBUG_FS
+		.show = a3xx_show,
+#endif
+	},
+};
+
+struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
+{
+	struct a3xx_gpu *a3xx_gpu = NULL;
+	struct msm_gpu *gpu;
+	struct platform_device *pdev = a3xx_pdev;
+	struct adreno_platform_config *config;
+	int ret;
+
+	if (!pdev) {
+		dev_err(dev->dev, "no a3xx device\n");
+		ret = -ENXIO;
+		goto fail;
+	}
+
+	config = pdev->dev.platform_data;
+
+	a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL);
+	if (!a3xx_gpu) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	gpu = &a3xx_gpu->base.base;
+
+	get_device(&pdev->dev);
+	a3xx_gpu->pdev = pdev;
+
+	gpu->fast_rate = config->fast_rate;
+	gpu->slow_rate = config->slow_rate;
+	gpu->bus_freq  = config->bus_freq;
+
+	DBG("fast_rate=%u, slow_rate=%u, bus_freq=%u",
+			gpu->fast_rate, gpu->slow_rate, gpu->bus_freq);
+
+	ret = adreno_gpu_init(dev, pdev, &a3xx_gpu->base,
+			&funcs, config->rev);
+	if (ret)
+		goto fail;
+
+	return &a3xx_gpu->base.base;
+
+fail:
+	if (a3xx_gpu)
+		a3xx_destroy(&a3xx_gpu->base.base);
+
+	return ERR_PTR(ret);
+}
+
+/*
+ * The a3xx device:
+ */
+
+static int a3xx_probe(struct platform_device *pdev)
+{
+	static struct adreno_platform_config config = {};
+#ifdef CONFIG_OF
+	/* TODO */
+#else
+	uint32_t version = socinfo_get_version();
+	if (cpu_is_apq8064ab()) {
+		config.fast_rate = 450000000;
+		config.slow_rate = 27000000;
+		config.bus_freq  = 4;
+		config.rev = ADRENO_REV(3, 2, 1, 0);
+	} else if (cpu_is_apq8064() || cpu_is_msm8960ab()) {
+		config.fast_rate = 400000000;
+		config.slow_rate = 27000000;
+		config.bus_freq  = 4;
+
+		if (SOCINFO_VERSION_MAJOR(version) == 2)
+			config.rev = ADRENO_REV(3, 2, 0, 2);
+		else if ((SOCINFO_VERSION_MAJOR(version) == 1) &&
+				(SOCINFO_VERSION_MINOR(version) == 1))
+			config.rev = ADRENO_REV(3, 2, 0, 1);
+		else
+			config.rev = ADRENO_REV(3, 2, 0, 0);
+
+	} else if (cpu_is_msm8930()) {
+		config.fast_rate = 400000000;
+		config.slow_rate = 27000000;
+		config.bus_freq  = 3;
+
+		if ((SOCINFO_VERSION_MAJOR(version) == 1) &&
+			(SOCINFO_VERSION_MINOR(version) == 2))
+			config.rev = ADRENO_REV(3, 0, 5, 2);
+		else
+			config.rev = ADRENO_REV(3, 0, 5, 0);
+
+	}
+#endif
+	pdev->dev.platform_data = &config;
+	a3xx_pdev = pdev;
+	return 0;
+}
+
+static int a3xx_remove(struct platform_device *pdev)
+{
+	a3xx_pdev = NULL;
+	return 0;
+}
+
+static struct platform_driver a3xx_driver = {
+	.probe = a3xx_probe,
+	.remove = a3xx_remove,
+	.driver.name = "kgsl-3d0",
+};
+
+void __init a3xx_register(void)
+{
+	platform_driver_register(&a3xx_driver);
+}
+
+void __exit a3xx_unregister(void)
+{
+	platform_driver_unregister(&a3xx_driver);
+}
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.h b/drivers/gpu/drm/msm/adreno/a3xx_gpu.h
new file mode 100644
index 0000000..32c398c
--- /dev/null
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __A3XX_GPU_H__
+#define __A3XX_GPU_H__
+
+#include "adreno_gpu.h"
+#include "a3xx.xml.h"
+
+struct a3xx_gpu {
+	struct adreno_gpu base;
+	struct platform_device *pdev;
+};
+#define to_a3xx_gpu(x) container_of(x, struct a3xx_gpu, base)
+
+#endif /* __A3XX_GPU_H__ */
diff --git a/drivers/gpu/drm/msm/adreno/adreno_common.xml.h b/drivers/gpu/drm/msm/adreno/adreno_common.xml.h
new file mode 100644
index 0000000..61979d4
--- /dev/null
+++ b/drivers/gpu/drm/msm/adreno/adreno_common.xml.h
@@ -0,0 +1,432 @@
+#ifndef ADRENO_COMMON_XML
+#define ADRENO_COMMON_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml              (    327 bytes, from 2013-07-05 19:21:12)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2013-03-31 16:51:27)
+- /home/robclark/src/freedreno/envytools/rnndb/a2xx/a2xx.xml           (  30005 bytes, from 2013-07-19 21:30:48)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml       (   8983 bytes, from 2013-07-24 01:38:36)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml          (   9712 bytes, from 2013-05-26 15:22:37)
+- /home/robclark/src/freedreno/envytools/rnndb/a3xx/a3xx.xml           (  51415 bytes, from 2013-08-03 14:26:05)
+
+Copyright (C) 2013 by the following authors:
+- Rob Clark <robdclark@gmail.com> (robclark)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+enum adreno_pa_su_sc_draw {
+	PC_DRAW_POINTS = 0,
+	PC_DRAW_LINES = 1,
+	PC_DRAW_TRIANGLES = 2,
+};
+
+enum adreno_compare_func {
+	FUNC_NEVER = 0,
+	FUNC_LESS = 1,
+	FUNC_EQUAL = 2,
+	FUNC_LEQUAL = 3,
+	FUNC_GREATER = 4,
+	FUNC_NOTEQUAL = 5,
+	FUNC_GEQUAL = 6,
+	FUNC_ALWAYS = 7,
+};
+
+enum adreno_stencil_op {
+	STENCIL_KEEP = 0,
+	STENCIL_ZERO = 1,
+	STENCIL_REPLACE = 2,
+	STENCIL_INCR_CLAMP = 3,
+	STENCIL_DECR_CLAMP = 4,
+	STENCIL_INVERT = 5,
+	STENCIL_INCR_WRAP = 6,
+	STENCIL_DECR_WRAP = 7,
+};
+
+enum adreno_rb_blend_factor {
+	FACTOR_ZERO = 0,
+	FACTOR_ONE = 1,
+	FACTOR_SRC_COLOR = 4,
+	FACTOR_ONE_MINUS_SRC_COLOR = 5,
+	FACTOR_SRC_ALPHA = 6,
+	FACTOR_ONE_MINUS_SRC_ALPHA = 7,
+	FACTOR_DST_COLOR = 8,
+	FACTOR_ONE_MINUS_DST_COLOR = 9,
+	FACTOR_DST_ALPHA = 10,
+	FACTOR_ONE_MINUS_DST_ALPHA = 11,
+	FACTOR_CONSTANT_COLOR = 12,
+	FACTOR_ONE_MINUS_CONSTANT_COLOR = 13,
+	FACTOR_CONSTANT_ALPHA = 14,
+	FACTOR_ONE_MINUS_CONSTANT_ALPHA = 15,
+	FACTOR_SRC_ALPHA_SATURATE = 16,
+};
+
+enum adreno_rb_blend_opcode {
+	BLEND_DST_PLUS_SRC = 0,
+	BLEND_SRC_MINUS_DST = 1,
+	BLEND_MIN_DST_SRC = 2,
+	BLEND_MAX_DST_SRC = 3,
+	BLEND_DST_MINUS_SRC = 4,
+	BLEND_DST_PLUS_SRC_BIAS = 5,
+};
+
+enum adreno_rb_surface_endian {
+	ENDIAN_NONE = 0,
+	ENDIAN_8IN16 = 1,
+	ENDIAN_8IN32 = 2,
+	ENDIAN_16IN32 = 3,
+	ENDIAN_8IN64 = 4,
+	ENDIAN_8IN128 = 5,
+};
+
+enum adreno_rb_dither_mode {
+	DITHER_DISABLE = 0,
+	DITHER_ALWAYS = 1,
+	DITHER_IF_ALPHA_OFF = 2,
+};
+
+enum adreno_rb_depth_format {
+	DEPTHX_16 = 0,
+	DEPTHX_24_8 = 1,
+};
+
+enum adreno_mmu_clnt_beh {
+	BEH_NEVR = 0,
+	BEH_TRAN_RNG = 1,
+	BEH_TRAN_FLT = 2,
+};
+
+#define REG_AXXX_MH_MMU_CONFIG					0x00000040
+#define AXXX_MH_MMU_CONFIG_MMU_ENABLE				0x00000001
+#define AXXX_MH_MMU_CONFIG_SPLIT_MODE_ENABLE			0x00000002
+#define AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__MASK		0x00000030
+#define AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__SHIFT		4
+static inline uint32_t AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+	return ((val) << AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_RB_W_CLNT_BEHAVIOR__MASK;
+}
+#define AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__MASK		0x000000c0
+#define AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__SHIFT		6
+static inline uint32_t AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+	return ((val) << AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_W_CLNT_BEHAVIOR__MASK;
+}
+#define AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__MASK		0x00000300
+#define AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__SHIFT		8
+static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+	return ((val) << AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R0_CLNT_BEHAVIOR__MASK;
+}
+#define AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__MASK		0x00000c00
+#define AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__SHIFT		10
+static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+	return ((val) << AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R1_CLNT_BEHAVIOR__MASK;
+}
+#define AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__MASK		0x00003000
+#define AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__SHIFT		12
+static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+	return ((val) << AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R2_CLNT_BEHAVIOR__MASK;
+}
+#define AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__MASK		0x0000c000
+#define AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__SHIFT		14
+static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+	return ((val) << AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R3_CLNT_BEHAVIOR__MASK;
+}
+#define AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__MASK		0x00030000
+#define AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__SHIFT		16
+static inline uint32_t AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+	return ((val) << AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_CP_R4_CLNT_BEHAVIOR__MASK;
+}
+#define AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__MASK		0x000c0000
+#define AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__SHIFT		18
+static inline uint32_t AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+	return ((val) << AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_VGT_R0_CLNT_BEHAVIOR__MASK;
+}
+#define AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__MASK		0x00300000
+#define AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__SHIFT		20
+static inline uint32_t AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+	return ((val) << AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_VGT_R1_CLNT_BEHAVIOR__MASK;
+}
+#define AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__MASK		0x00c00000
+#define AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__SHIFT		22
+static inline uint32_t AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+	return ((val) << AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_TC_R_CLNT_BEHAVIOR__MASK;
+}
+#define AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__MASK		0x03000000
+#define AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__SHIFT		24
+static inline uint32_t AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR(enum adreno_mmu_clnt_beh val)
+{
+	return ((val) << AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__SHIFT) & AXXX_MH_MMU_CONFIG_PA_W_CLNT_BEHAVIOR__MASK;
+}
+
+#define REG_AXXX_MH_MMU_VA_RANGE				0x00000041
+
+#define REG_AXXX_MH_MMU_PT_BASE					0x00000042
+
+#define REG_AXXX_MH_MMU_PAGE_FAULT				0x00000043
+
+#define REG_AXXX_MH_MMU_TRAN_ERROR				0x00000044
+
+#define REG_AXXX_MH_MMU_INVALIDATE				0x00000045
+
+#define REG_AXXX_MH_MMU_MPU_BASE				0x00000046
+
+#define REG_AXXX_MH_MMU_MPU_END					0x00000047
+
+#define REG_AXXX_CP_RB_BASE					0x000001c0
+
+#define REG_AXXX_CP_RB_CNTL					0x000001c1
+#define AXXX_CP_RB_CNTL_BUFSZ__MASK				0x0000003f
+#define AXXX_CP_RB_CNTL_BUFSZ__SHIFT				0
+static inline uint32_t AXXX_CP_RB_CNTL_BUFSZ(uint32_t val)
+{
+	return ((val) << AXXX_CP_RB_CNTL_BUFSZ__SHIFT) & AXXX_CP_RB_CNTL_BUFSZ__MASK;
+}
+#define AXXX_CP_RB_CNTL_BLKSZ__MASK				0x00003f00
+#define AXXX_CP_RB_CNTL_BLKSZ__SHIFT				8
+static inline uint32_t AXXX_CP_RB_CNTL_BLKSZ(uint32_t val)
+{
+	return ((val) << AXXX_CP_RB_CNTL_BLKSZ__SHIFT) & AXXX_CP_RB_CNTL_BLKSZ__MASK;
+}
+#define AXXX_CP_RB_CNTL_BUF_SWAP__MASK				0x00030000
+#define AXXX_CP_RB_CNTL_BUF_SWAP__SHIFT				16
+static inline uint32_t AXXX_CP_RB_CNTL_BUF_SWAP(uint32_t val)
+{
+	return ((val) << AXXX_CP_RB_CNTL_BUF_SWAP__SHIFT) & AXXX_CP_RB_CNTL_BUF_SWAP__MASK;
+}
+#define AXXX_CP_RB_CNTL_POLL_EN					0x00100000
+#define AXXX_CP_RB_CNTL_NO_UPDATE				0x08000000
+#define AXXX_CP_RB_CNTL_RPTR_WR_EN				0x80000000
+
+#define REG_AXXX_CP_RB_RPTR_ADDR				0x000001c3
+#define AXXX_CP_RB_RPTR_ADDR_SWAP__MASK				0x00000003
+#define AXXX_CP_RB_RPTR_ADDR_SWAP__SHIFT			0
+static inline uint32_t AXXX_CP_RB_RPTR_ADDR_SWAP(uint32_t val)
+{
+	return ((val) << AXXX_CP_RB_RPTR_ADDR_SWAP__SHIFT) & AXXX_CP_RB_RPTR_ADDR_SWAP__MASK;
+}
+#define AXXX_CP_RB_RPTR_ADDR_ADDR__MASK				0xfffffffc
+#define AXXX_CP_RB_RPTR_ADDR_ADDR__SHIFT			2
+static inline uint32_t AXXX_CP_RB_RPTR_ADDR_ADDR(uint32_t val)
+{
+	return ((val >> 2) << AXXX_CP_RB_RPTR_ADDR_ADDR__SHIFT) & AXXX_CP_RB_RPTR_ADDR_ADDR__MASK;
+}
+
+#define REG_AXXX_CP_RB_RPTR					0x000001c4
+
+#define REG_AXXX_CP_RB_WPTR					0x000001c5
+
+#define REG_AXXX_CP_RB_WPTR_DELAY				0x000001c6
+
+#define REG_AXXX_CP_RB_RPTR_WR					0x000001c7
+
+#define REG_AXXX_CP_RB_WPTR_BASE				0x000001c8
+
+#define REG_AXXX_CP_QUEUE_THRESHOLDS				0x000001d5
+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__MASK		0x0000000f
+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__SHIFT		0
+static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(uint32_t val)
+{
+	return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START__MASK;
+}
+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__MASK		0x00000f00
+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__SHIFT		8
+static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(uint32_t val)
+{
+	return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START__MASK;
+}
+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__MASK		0x000f0000
+#define AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__SHIFT		16
+static inline uint32_t AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(uint32_t val)
+{
+	return ((val) << AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__SHIFT) & AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START__MASK;
+}
+
+#define REG_AXXX_CP_MEQ_THRESHOLDS				0x000001d6
+
+#define REG_AXXX_CP_CSQ_AVAIL					0x000001d7
+#define AXXX_CP_CSQ_AVAIL_RING__MASK				0x0000007f
+#define AXXX_CP_CSQ_AVAIL_RING__SHIFT				0
+static inline uint32_t AXXX_CP_CSQ_AVAIL_RING(uint32_t val)
+{
+	return ((val) << AXXX_CP_CSQ_AVAIL_RING__SHIFT) & AXXX_CP_CSQ_AVAIL_RING__MASK;
+}
+#define AXXX_CP_CSQ_AVAIL_IB1__MASK				0x00007f00
+#define AXXX_CP_CSQ_AVAIL_IB1__SHIFT				8
+static inline uint32_t AXXX_CP_CSQ_AVAIL_IB1(uint32_t val)
+{
+	return ((val) << AXXX_CP_CSQ_AVAIL_IB1__SHIFT) & AXXX_CP_CSQ_AVAIL_IB1__MASK;
+}
+#define AXXX_CP_CSQ_AVAIL_IB2__MASK				0x007f0000
+#define AXXX_CP_CSQ_AVAIL_IB2__SHIFT				16
+static inline uint32_t AXXX_CP_CSQ_AVAIL_IB2(uint32_t val)
+{
+	return ((val) << AXXX_CP_CSQ_AVAIL_IB2__SHIFT) & AXXX_CP_CSQ_AVAIL_IB2__MASK;
+}
+
+#define REG_AXXX_CP_STQ_AVAIL					0x000001d8
+#define AXXX_CP_STQ_AVAIL_ST__MASK				0x0000007f
+#define AXXX_CP_STQ_AVAIL_ST__SHIFT				0
+static inline uint32_t AXXX_CP_STQ_AVAIL_ST(uint32_t val)
+{
+	return ((val) << AXXX_CP_STQ_AVAIL_ST__SHIFT) & AXXX_CP_STQ_AVAIL_ST__MASK;
+}
+
+#define REG_AXXX_CP_MEQ_AVAIL					0x000001d9
+#define AXXX_CP_MEQ_AVAIL_MEQ__MASK				0x0000001f
+#define AXXX_CP_MEQ_AVAIL_MEQ__SHIFT				0
+static inline uint32_t AXXX_CP_MEQ_AVAIL_MEQ(uint32_t val)
+{
+	return ((val) << AXXX_CP_MEQ_AVAIL_MEQ__SHIFT) & AXXX_CP_MEQ_AVAIL_MEQ__MASK;
+}
+
+#define REG_AXXX_SCRATCH_UMSK					0x000001dc
+#define AXXX_SCRATCH_UMSK_UMSK__MASK				0x000000ff
+#define AXXX_SCRATCH_UMSK_UMSK__SHIFT				0
+static inline uint32_t AXXX_SCRATCH_UMSK_UMSK(uint32_t val)
+{
+	return ((val) << AXXX_SCRATCH_UMSK_UMSK__SHIFT) & AXXX_SCRATCH_UMSK_UMSK__MASK;
+}
+#define AXXX_SCRATCH_UMSK_SWAP__MASK				0x00030000
+#define AXXX_SCRATCH_UMSK_SWAP__SHIFT				16
+static inline uint32_t AXXX_SCRATCH_UMSK_SWAP(uint32_t val)
+{
+	return ((val) << AXXX_SCRATCH_UMSK_SWAP__SHIFT) & AXXX_SCRATCH_UMSK_SWAP__MASK;
+}
+
+#define REG_AXXX_SCRATCH_ADDR					0x000001dd
+
+#define REG_AXXX_CP_ME_RDADDR					0x000001ea
+
+#define REG_AXXX_CP_STATE_DEBUG_INDEX				0x000001ec
+
+#define REG_AXXX_CP_STATE_DEBUG_DATA				0x000001ed
+
+#define REG_AXXX_CP_INT_CNTL					0x000001f2
+
+#define REG_AXXX_CP_INT_STATUS					0x000001f3
+
+#define REG_AXXX_CP_INT_ACK					0x000001f4
+
+#define REG_AXXX_CP_ME_CNTL					0x000001f6
+
+#define REG_AXXX_CP_ME_STATUS					0x000001f7
+
+#define REG_AXXX_CP_ME_RAM_WADDR				0x000001f8
+
+#define REG_AXXX_CP_ME_RAM_RADDR				0x000001f9
+
+#define REG_AXXX_CP_ME_RAM_DATA					0x000001fa
+
+#define REG_AXXX_CP_DEBUG					0x000001fc
+#define AXXX_CP_DEBUG_PREDICATE_DISABLE				0x00800000
+#define AXXX_CP_DEBUG_PROG_END_PTR_ENABLE			0x01000000
+#define AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE			0x02000000
+#define AXXX_CP_DEBUG_PREFETCH_PASS_NOPS			0x04000000
+#define AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE			0x08000000
+#define AXXX_CP_DEBUG_PREFETCH_MATCH_DISABLE			0x10000000
+#define AXXX_CP_DEBUG_SIMPLE_ME_FLOW_CONTROL			0x40000000
+#define AXXX_CP_DEBUG_MIU_WRITE_PACK_DISABLE			0x80000000
+
+#define REG_AXXX_CP_CSQ_RB_STAT					0x000001fd
+#define AXXX_CP_CSQ_RB_STAT_RPTR__MASK				0x0000007f
+#define AXXX_CP_CSQ_RB_STAT_RPTR__SHIFT				0
+static inline uint32_t AXXX_CP_CSQ_RB_STAT_RPTR(uint32_t val)
+{
+	return ((val) << AXXX_CP_CSQ_RB_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_RB_STAT_RPTR__MASK;
+}
+#define AXXX_CP_CSQ_RB_STAT_WPTR__MASK				0x007f0000
+#define AXXX_CP_CSQ_RB_STAT_WPTR__SHIFT				16
+static inline uint32_t AXXX_CP_CSQ_RB_STAT_WPTR(uint32_t val)
+{
+	return ((val) << AXXX_CP_CSQ_RB_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_RB_STAT_WPTR__MASK;
+}
+
+#define REG_AXXX_CP_CSQ_IB1_STAT				0x000001fe
+#define AXXX_CP_CSQ_IB1_STAT_RPTR__MASK				0x0000007f
+#define AXXX_CP_CSQ_IB1_STAT_RPTR__SHIFT			0
+static inline uint32_t AXXX_CP_CSQ_IB1_STAT_RPTR(uint32_t val)
+{
+	return ((val) << AXXX_CP_CSQ_IB1_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_IB1_STAT_RPTR__MASK;
+}
+#define AXXX_CP_CSQ_IB1_STAT_WPTR__MASK				0x007f0000
+#define AXXX_CP_CSQ_IB1_STAT_WPTR__SHIFT			16
+static inline uint32_t AXXX_CP_CSQ_IB1_STAT_WPTR(uint32_t val)
+{
+	return ((val) << AXXX_CP_CSQ_IB1_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_IB1_STAT_WPTR__MASK;
+}
+
+#define REG_AXXX_CP_CSQ_IB2_STAT				0x000001ff
+#define AXXX_CP_CSQ_IB2_STAT_RPTR__MASK				0x0000007f
+#define AXXX_CP_CSQ_IB2_STAT_RPTR__SHIFT			0
+static inline uint32_t AXXX_CP_CSQ_IB2_STAT_RPTR(uint32_t val)
+{
+	return ((val) << AXXX_CP_CSQ_IB2_STAT_RPTR__SHIFT) & AXXX_CP_CSQ_IB2_STAT_RPTR__MASK;
+}
+#define AXXX_CP_CSQ_IB2_STAT_WPTR__MASK				0x007f0000
+#define AXXX_CP_CSQ_IB2_STAT_WPTR__SHIFT			16
+static inline uint32_t AXXX_CP_CSQ_IB2_STAT_WPTR(uint32_t val)
+{
+	return ((val) << AXXX_CP_CSQ_IB2_STAT_WPTR__SHIFT) & AXXX_CP_CSQ_IB2_STAT_WPTR__MASK;
+}
+
+#define REG_AXXX_CP_SCRATCH_REG0				0x00000578
+
+#define REG_AXXX_CP_SCRATCH_REG1				0x00000579
+
+#define REG_AXXX_CP_SCRATCH_REG2				0x0000057a
+
+#define REG_AXXX_CP_SCRATCH_REG3				0x0000057b
+
+#define REG_AXXX_CP_SCRATCH_REG4				0x0000057c
+
+#define REG_AXXX_CP_SCRATCH_REG5				0x0000057d
+
+#define REG_AXXX_CP_SCRATCH_REG6				0x0000057e
+
+#define REG_AXXX_CP_SCRATCH_REG7				0x0000057f
+
+#define REG_AXXX_CP_ME_CF_EVENT_SRC				0x0000060a
+
+#define REG_AXXX_CP_ME_CF_EVENT_ADDR				0x0000060b
+
+#define REG_AXXX_CP_ME_CF_EVENT_DATA				0x0000060c
+
+#define REG_AXXX_CP_ME_NRT_ADDR					0x0000060d
+
+#define REG_AXXX_CP_ME_NRT_DATA					0x0000060e
+
+
+#endif /* ADRENO_COMMON_XML */
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
new file mode 100644
index 0000000..a605847
--- /dev/null
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -0,0 +1,370 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "adreno_gpu.h"
+#include "msm_gem.h"
+
+struct adreno_info {
+	struct adreno_rev rev;
+	uint32_t revn;
+	const char *name;
+	const char *pm4fw, *pfpfw;
+	uint32_t gmem;
+};
+
+#define ANY_ID 0xff
+
+static const struct adreno_info gpulist[] = {
+	{
+		.rev   = ADRENO_REV(3, 0, 5, ANY_ID),
+		.revn  = 305,
+		.name  = "A305",
+		.pm4fw = "a300_pm4.fw",
+		.pfpfw = "a300_pfp.fw",
+		.gmem  = SZ_256K,
+	}, {
+		.rev   = ADRENO_REV(3, 2, ANY_ID, ANY_ID),
+		.revn  = 320,
+		.name  = "A320",
+		.pm4fw = "a300_pm4.fw",
+		.pfpfw = "a300_pfp.fw",
+		.gmem  = SZ_512K,
+	}, {
+		.rev   = ADRENO_REV(3, 3, 0, 0),
+		.revn  = 330,
+		.name  = "A330",
+		.pm4fw = "a330_pm4.fw",
+		.pfpfw = "a330_pfp.fw",
+		.gmem  = SZ_1M,
+	},
+};
+
+#define RB_SIZE    SZ_32K
+#define RB_BLKSIZE 16
+
+int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value)
+{
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+
+	switch (param) {
+	case MSM_PARAM_GPU_ID:
+		*value = adreno_gpu->info->revn;
+		return 0;
+	case MSM_PARAM_GMEM_SIZE:
+		*value = adreno_gpu->info->gmem;
+		return 0;
+	default:
+		DBG("%s: invalid param: %u", gpu->name, param);
+		return -EINVAL;
+	}
+}
+
+#define rbmemptr(adreno_gpu, member)  \
+	((adreno_gpu)->memptrs_iova + offsetof(struct adreno_rbmemptrs, member))
+
+int adreno_hw_init(struct msm_gpu *gpu)
+{
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+
+	DBG("%s", gpu->name);
+
+	/* Setup REG_CP_RB_CNTL: */
+	gpu_write(gpu, REG_AXXX_CP_RB_CNTL,
+			/* size is log2(quad-words): */
+			AXXX_CP_RB_CNTL_BUFSZ(ilog2(gpu->rb->size / 8)) |
+			AXXX_CP_RB_CNTL_BLKSZ(RB_BLKSIZE));
+
+	/* Setup ringbuffer address: */
+	gpu_write(gpu, REG_AXXX_CP_RB_BASE, gpu->rb_iova);
+	gpu_write(gpu, REG_AXXX_CP_RB_RPTR_ADDR, rbmemptr(adreno_gpu, rptr));
+
+	/* Setup scratch/timestamp: */
+	gpu_write(gpu, REG_AXXX_SCRATCH_ADDR, rbmemptr(adreno_gpu, fence));
+
+	gpu_write(gpu, REG_AXXX_SCRATCH_UMSK, 0x1);
+
+	return 0;
+}
+
+static uint32_t get_wptr(struct msm_ringbuffer *ring)
+{
+	return ring->cur - ring->start;
+}
+
+uint32_t adreno_last_fence(struct msm_gpu *gpu)
+{
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	return adreno_gpu->memptrs->fence;
+}
+
+void adreno_recover(struct msm_gpu *gpu)
+{
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	struct drm_device *dev = gpu->dev;
+	int ret;
+
+	gpu->funcs->pm_suspend(gpu);
+
+	/* reset ringbuffer: */
+	gpu->rb->cur = gpu->rb->start;
+
+	/* reset completed fence seqno, just discard anything pending: */
+	adreno_gpu->memptrs->fence = gpu->submitted_fence;
+
+	gpu->funcs->pm_resume(gpu);
+	ret = gpu->funcs->hw_init(gpu);
+	if (ret) {
+		dev_err(dev->dev, "gpu hw init failed: %d\n", ret);
+		/* hmm, oh well? */
+	}
+}
+
+int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
+		struct msm_file_private *ctx)
+{
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	struct msm_drm_private *priv = gpu->dev->dev_private;
+	struct msm_ringbuffer *ring = gpu->rb;
+	unsigned i, ibs = 0;
+
+	for (i = 0; i < submit->nr_cmds; i++) {
+		switch (submit->cmd[i].type) {
+		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
+			/* ignore IB-targets */
+			break;
+		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
+			/* ignore if there has not been a ctx switch: */
+			if (priv->lastctx == ctx)
+				break;
+		case MSM_SUBMIT_CMD_BUF:
+			OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
+			OUT_RING(ring, submit->cmd[i].iova);
+			OUT_RING(ring, submit->cmd[i].size);
+			ibs++;
+			break;
+		}
+	}
+
+	/* on a320, at least, we seem to need to pad things out to an
+	 * even number of qwords to avoid issue w/ CP hanging on wrap-
+	 * around:
+	 */
+	if (ibs % 2)
+		OUT_PKT2(ring);
+
+	OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
+	OUT_RING(ring, submit->fence);
+
+	if (adreno_is_a3xx(adreno_gpu)) {
+		/* Flush HLSQ lazy updates to make sure there is nothing
+		 * pending for indirect loads after the timestamp has
+		 * passed:
+		 */
+		OUT_PKT3(ring, CP_EVENT_WRITE, 1);
+		OUT_RING(ring, HLSQ_FLUSH);
+
+		OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
+		OUT_RING(ring, 0x00000000);
+	}
+
+	OUT_PKT3(ring, CP_EVENT_WRITE, 3);
+	OUT_RING(ring, CACHE_FLUSH_TS);
+	OUT_RING(ring, rbmemptr(adreno_gpu, fence));
+	OUT_RING(ring, submit->fence);
+
+	/* we could maybe be clever and only CP_COND_EXEC the interrupt: */
+	OUT_PKT3(ring, CP_INTERRUPT, 1);
+	OUT_RING(ring, 0x80000000);
+
+#if 0
+	if (adreno_is_a3xx(adreno_gpu)) {
+		/* Dummy set-constant to trigger context rollover */
+		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+		OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG));
+		OUT_RING(ring, 0x00000000);
+	}
+#endif
+
+	gpu->funcs->flush(gpu);
+
+	return 0;
+}
+
+void adreno_flush(struct msm_gpu *gpu)
+{
+	uint32_t wptr = get_wptr(gpu->rb);
+
+	/* ensure writes to ringbuffer have hit system memory: */
+	mb();
+
+	gpu_write(gpu, REG_AXXX_CP_RB_WPTR, wptr);
+}
+
+void adreno_idle(struct msm_gpu *gpu)
+{
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	uint32_t rptr, wptr = get_wptr(gpu->rb);
+	unsigned long t;
+
+	t = jiffies + ADRENO_IDLE_TIMEOUT;
+
+	/* then wait for CP to drain ringbuffer: */
+	do {
+		rptr = adreno_gpu->memptrs->rptr;
+		if (rptr == wptr)
+			return;
+	} while(time_before(jiffies, t));
+
+	DRM_ERROR("timeout waiting for %s to drain ringbuffer!\n", gpu->name);
+
+	/* TODO maybe we need to reset GPU here to recover from hang? */
+}
+
+#ifdef CONFIG_DEBUG_FS
+void adreno_show(struct msm_gpu *gpu, struct seq_file *m)
+{
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+
+	seq_printf(m, "revision: %d (%d.%d.%d.%d)\n",
+			adreno_gpu->info->revn, adreno_gpu->rev.core,
+			adreno_gpu->rev.major, adreno_gpu->rev.minor,
+			adreno_gpu->rev.patchid);
+
+	seq_printf(m, "fence:    %d/%d\n", adreno_gpu->memptrs->fence,
+			gpu->submitted_fence);
+	seq_printf(m, "rptr:     %d\n", adreno_gpu->memptrs->rptr);
+	seq_printf(m, "wptr:     %d\n", adreno_gpu->memptrs->wptr);
+	seq_printf(m, "rb wptr:  %d\n", get_wptr(gpu->rb));
+}
+#endif
+
+void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords)
+{
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	uint32_t freedwords;
+	do {
+		uint32_t size = gpu->rb->size / 4;
+		uint32_t wptr = get_wptr(gpu->rb);
+		uint32_t rptr = adreno_gpu->memptrs->rptr;
+		freedwords = (rptr + (size - 1) - wptr) % size;
+	} while(freedwords < ndwords);
+}
+
+static const char *iommu_ports[] = {
+		"gfx3d_user", "gfx3d_priv",
+		"gfx3d1_user", "gfx3d1_priv",
+};
+
+static inline bool _rev_match(uint8_t entry, uint8_t id)
+{
+	return (entry == ANY_ID) || (entry == id);
+}
+
+int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
+		struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs,
+		struct adreno_rev rev)
+{
+	int i, ret;
+
+	/* identify gpu: */
+	for (i = 0; i < ARRAY_SIZE(gpulist); i++) {
+		const struct adreno_info *info = &gpulist[i];
+		if (_rev_match(info->rev.core, rev.core) &&
+				_rev_match(info->rev.major, rev.major) &&
+				_rev_match(info->rev.minor, rev.minor) &&
+				_rev_match(info->rev.patchid, rev.patchid)) {
+			gpu->info = info;
+			gpu->revn = info->revn;
+			break;
+		}
+	}
+
+	if (i == ARRAY_SIZE(gpulist)) {
+		dev_err(drm->dev, "Unknown GPU revision: %u.%u.%u.%u\n",
+				rev.core, rev.major, rev.minor, rev.patchid);
+		return -ENXIO;
+	}
+
+	DBG("Found GPU: %s (%u.%u.%u.%u)", gpu->info->name,
+			rev.core, rev.major, rev.minor, rev.patchid);
+
+	gpu->funcs = funcs;
+	gpu->rev = rev;
+
+	ret = request_firmware(&gpu->pm4, gpu->info->pm4fw, drm->dev);
+	if (ret) {
+		dev_err(drm->dev, "failed to load %s PM4 firmware: %d\n",
+				gpu->info->pm4fw, ret);
+		return ret;
+	}
+
+	ret = request_firmware(&gpu->pfp, gpu->info->pfpfw, drm->dev);
+	if (ret) {
+		dev_err(drm->dev, "failed to load %s PFP firmware: %d\n",
+				gpu->info->pfpfw, ret);
+		return ret;
+	}
+
+	ret = msm_gpu_init(drm, pdev, &gpu->base, &funcs->base,
+			gpu->info->name, "kgsl_3d0_reg_memory", "kgsl_3d0_irq",
+			RB_SIZE);
+	if (ret)
+		return ret;
+
+	ret = msm_iommu_attach(drm, gpu->base.iommu,
+			iommu_ports, ARRAY_SIZE(iommu_ports));
+	if (ret)
+		return ret;
+
+	gpu->memptrs_bo = msm_gem_new(drm, sizeof(*gpu->memptrs),
+			MSM_BO_UNCACHED);
+	if (IS_ERR(gpu->memptrs_bo)) {
+		ret = PTR_ERR(gpu->memptrs_bo);
+		gpu->memptrs_bo = NULL;
+		dev_err(drm->dev, "could not allocate memptrs: %d\n", ret);
+		return ret;
+	}
+
+	gpu->memptrs = msm_gem_vaddr_locked(gpu->memptrs_bo);
+	if (!gpu->memptrs) {
+		dev_err(drm->dev, "could not vmap memptrs\n");
+		return -ENOMEM;
+	}
+
+	ret = msm_gem_get_iova_locked(gpu->memptrs_bo, gpu->base.id,
+			&gpu->memptrs_iova);
+	if (ret) {
+		dev_err(drm->dev, "could not map memptrs: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+void adreno_gpu_cleanup(struct adreno_gpu *gpu)
+{
+	if (gpu->memptrs_bo) {
+		if (gpu->memptrs_iova)
+			msm_gem_put_iova(gpu->memptrs_bo, gpu->base.id);
+		drm_gem_object_unreference(gpu->memptrs_bo);
+	}
+	if (gpu->pm4)
+		release_firmware(gpu->pm4);
+	if (gpu->pfp)
+		release_firmware(gpu->pfp);
+	msm_gpu_cleanup(&gpu->base);
+}
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
new file mode 100644
index 0000000..f73abfb
--- /dev/null
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ADRENO_GPU_H__
+#define __ADRENO_GPU_H__
+
+#include <linux/firmware.h>
+
+#include "msm_gpu.h"
+
+#include "adreno_common.xml.h"
+#include "adreno_pm4.xml.h"
+
+struct adreno_rev {
+	uint8_t  core;
+	uint8_t  major;
+	uint8_t  minor;
+	uint8_t  patchid;
+};
+
+#define ADRENO_REV(core, major, minor, patchid) \
+	((struct adreno_rev){ core, major, minor, patchid })
+
+struct adreno_gpu_funcs {
+	struct msm_gpu_funcs base;
+};
+
+struct adreno_info;
+
+struct adreno_rbmemptrs {
+	volatile uint32_t rptr;
+	volatile uint32_t wptr;
+	volatile uint32_t fence;
+};
+
+struct adreno_gpu {
+	struct msm_gpu base;
+	struct adreno_rev rev;
+	const struct adreno_info *info;
+	uint32_t revn;  /* numeric revision name */
+	const struct adreno_gpu_funcs *funcs;
+
+	/* firmware: */
+	const struct firmware *pm4, *pfp;
+
+	/* ringbuffer rptr/wptr: */
+	// TODO should this be in msm_ringbuffer?  I think it would be
+	// different for z180..
+	struct adreno_rbmemptrs *memptrs;
+	struct drm_gem_object *memptrs_bo;
+	uint32_t memptrs_iova;
+};
+#define to_adreno_gpu(x) container_of(x, struct adreno_gpu, base)
+
+/* platform config data (ie. from DT, or pdata) */
+struct adreno_platform_config {
+	struct adreno_rev rev;
+	uint32_t fast_rate, slow_rate, bus_freq;
+};
+
+#define ADRENO_IDLE_TIMEOUT (20 * 1000)
+
+static inline bool adreno_is_a3xx(struct adreno_gpu *gpu)
+{
+	return (gpu->revn >= 300) && (gpu->revn < 400);
+}
+
+static inline bool adreno_is_a305(struct adreno_gpu *gpu)
+{
+	return gpu->revn == 305;
+}
+
+static inline bool adreno_is_a320(struct adreno_gpu *gpu)
+{
+	return gpu->revn == 320;
+}
+
+static inline bool adreno_is_a330(struct adreno_gpu *gpu)
+{
+	return gpu->revn == 330;
+}
+
+int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value);
+int adreno_hw_init(struct msm_gpu *gpu);
+uint32_t adreno_last_fence(struct msm_gpu *gpu);
+void adreno_recover(struct msm_gpu *gpu);
+int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
+		struct msm_file_private *ctx);
+void adreno_flush(struct msm_gpu *gpu);
+void adreno_idle(struct msm_gpu *gpu);
+#ifdef CONFIG_DEBUG_FS
+void adreno_show(struct msm_gpu *gpu, struct seq_file *m);
+#endif
+void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords);
+
+int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
+		struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs,
+		struct adreno_rev rev);
+void adreno_gpu_cleanup(struct adreno_gpu *gpu);
+
+
+/* ringbuffer helpers (the parts that are adreno specific) */
+
+static inline void
+OUT_PKT0(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
+{
+	adreno_wait_ring(ring->gpu, cnt+1);
+	OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF));
+}
+
+/* no-op packet: */
+static inline void
+OUT_PKT2(struct msm_ringbuffer *ring)
+{
+	adreno_wait_ring(ring->gpu, 1);
+	OUT_RING(ring, CP_TYPE2_PKT);
+}
+
+static inline void
+OUT_PKT3(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
+{
+	adreno_wait_ring(ring->gpu, cnt+1);
+	OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8));
+}
+
+
+#endif /* __ADRENO_GPU_H__ */
diff --git a/drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h b/drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h
new file mode 100644
index 0000000..94c13f4
--- /dev/null
+++ b/drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h
@@ -0,0 +1,254 @@
+#ifndef ADRENO_PM4_XML
+#define ADRENO_PM4_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml              (    327 bytes, from 2013-07-05 19:21:12)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2013-03-31 16:51:27)
+- /home/robclark/src/freedreno/envytools/rnndb/a2xx/a2xx.xml           (  30005 bytes, from 2013-07-19 21:30:48)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml       (   8983 bytes, from 2013-07-24 01:38:36)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml          (   9712 bytes, from 2013-05-26 15:22:37)
+- /home/robclark/src/freedreno/envytools/rnndb/a3xx/a3xx.xml           (  51415 bytes, from 2013-08-03 14:26:05)
+
+Copyright (C) 2013 by the following authors:
+- Rob Clark <robdclark@gmail.com> (robclark)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+enum vgt_event_type {
+	VS_DEALLOC = 0,
+	PS_DEALLOC = 1,
+	VS_DONE_TS = 2,
+	PS_DONE_TS = 3,
+	CACHE_FLUSH_TS = 4,
+	CONTEXT_DONE = 5,
+	CACHE_FLUSH = 6,
+	HLSQ_FLUSH = 7,
+	VIZQUERY_START = 7,
+	VIZQUERY_END = 8,
+	SC_WAIT_WC = 9,
+	RST_PIX_CNT = 13,
+	RST_VTX_CNT = 14,
+	TILE_FLUSH = 15,
+	CACHE_FLUSH_AND_INV_TS_EVENT = 20,
+	ZPASS_DONE = 21,
+	CACHE_FLUSH_AND_INV_EVENT = 22,
+	PERFCOUNTER_START = 23,
+	PERFCOUNTER_STOP = 24,
+	VS_FETCH_DONE = 27,
+	FACENESS_FLUSH = 28,
+};
+
+enum pc_di_primtype {
+	DI_PT_NONE = 0,
+	DI_PT_POINTLIST = 1,
+	DI_PT_LINELIST = 2,
+	DI_PT_LINESTRIP = 3,
+	DI_PT_TRILIST = 4,
+	DI_PT_TRIFAN = 5,
+	DI_PT_TRISTRIP = 6,
+	DI_PT_RECTLIST = 8,
+	DI_PT_QUADLIST = 13,
+	DI_PT_QUADSTRIP = 14,
+	DI_PT_POLYGON = 15,
+	DI_PT_2D_COPY_RECT_LIST_V0 = 16,
+	DI_PT_2D_COPY_RECT_LIST_V1 = 17,
+	DI_PT_2D_COPY_RECT_LIST_V2 = 18,
+	DI_PT_2D_COPY_RECT_LIST_V3 = 19,
+	DI_PT_2D_FILL_RECT_LIST = 20,
+	DI_PT_2D_LINE_STRIP = 21,
+	DI_PT_2D_TRI_STRIP = 22,
+};
+
+enum pc_di_src_sel {
+	DI_SRC_SEL_DMA = 0,
+	DI_SRC_SEL_IMMEDIATE = 1,
+	DI_SRC_SEL_AUTO_INDEX = 2,
+	DI_SRC_SEL_RESERVED = 3,
+};
+
+enum pc_di_index_size {
+	INDEX_SIZE_IGN = 0,
+	INDEX_SIZE_16_BIT = 0,
+	INDEX_SIZE_32_BIT = 1,
+	INDEX_SIZE_8_BIT = 2,
+	INDEX_SIZE_INVALID = 0,
+};
+
+enum pc_di_vis_cull_mode {
+	IGNORE_VISIBILITY = 0,
+};
+
+enum adreno_pm4_packet_type {
+	CP_TYPE0_PKT = 0,
+	CP_TYPE1_PKT = 0x40000000,
+	CP_TYPE2_PKT = 0x80000000,
+	CP_TYPE3_PKT = 0xc0000000,
+};
+
+enum adreno_pm4_type3_packets {
+	CP_ME_INIT = 72,
+	CP_NOP = 16,
+	CP_INDIRECT_BUFFER = 63,
+	CP_INDIRECT_BUFFER_PFD = 55,
+	CP_WAIT_FOR_IDLE = 38,
+	CP_WAIT_REG_MEM = 60,
+	CP_WAIT_REG_EQ = 82,
+	CP_WAT_REG_GTE = 83,
+	CP_WAIT_UNTIL_READ = 92,
+	CP_WAIT_IB_PFD_COMPLETE = 93,
+	CP_REG_RMW = 33,
+	CP_SET_BIN_DATA = 47,
+	CP_REG_TO_MEM = 62,
+	CP_MEM_WRITE = 61,
+	CP_MEM_WRITE_CNTR = 79,
+	CP_COND_EXEC = 68,
+	CP_COND_WRITE = 69,
+	CP_EVENT_WRITE = 70,
+	CP_EVENT_WRITE_SHD = 88,
+	CP_EVENT_WRITE_CFL = 89,
+	CP_EVENT_WRITE_ZPD = 91,
+	CP_RUN_OPENCL = 49,
+	CP_DRAW_INDX = 34,
+	CP_DRAW_INDX_2 = 54,
+	CP_DRAW_INDX_BIN = 52,
+	CP_DRAW_INDX_2_BIN = 53,
+	CP_VIZ_QUERY = 35,
+	CP_SET_STATE = 37,
+	CP_SET_CONSTANT = 45,
+	CP_IM_LOAD = 39,
+	CP_IM_LOAD_IMMEDIATE = 43,
+	CP_LOAD_CONSTANT_CONTEXT = 46,
+	CP_INVALIDATE_STATE = 59,
+	CP_SET_SHADER_BASES = 74,
+	CP_SET_BIN_MASK = 80,
+	CP_SET_BIN_SELECT = 81,
+	CP_CONTEXT_UPDATE = 94,
+	CP_INTERRUPT = 64,
+	CP_IM_STORE = 44,
+	CP_SET_BIN_BASE_OFFSET = 75,
+	CP_SET_DRAW_INIT_FLAGS = 75,
+	CP_SET_PROTECTED_MODE = 95,
+	CP_LOAD_STATE = 48,
+	CP_COND_INDIRECT_BUFFER_PFE = 58,
+	CP_COND_INDIRECT_BUFFER_PFD = 50,
+	CP_INDIRECT_BUFFER_PFE = 63,
+	CP_SET_BIN = 76,
+};
+
+enum adreno_state_block {
+	SB_VERT_TEX = 0,
+	SB_VERT_MIPADDR = 1,
+	SB_FRAG_TEX = 2,
+	SB_FRAG_MIPADDR = 3,
+	SB_VERT_SHADER = 4,
+	SB_FRAG_SHADER = 6,
+};
+
+enum adreno_state_type {
+	ST_SHADER = 0,
+	ST_CONSTANTS = 1,
+};
+
+enum adreno_state_src {
+	SS_DIRECT = 0,
+	SS_INDIRECT = 4,
+};
+
+#define REG_CP_LOAD_STATE_0					0x00000000
+#define CP_LOAD_STATE_0_DST_OFF__MASK				0x0000ffff
+#define CP_LOAD_STATE_0_DST_OFF__SHIFT				0
+static inline uint32_t CP_LOAD_STATE_0_DST_OFF(uint32_t val)
+{
+	return ((val) << CP_LOAD_STATE_0_DST_OFF__SHIFT) & CP_LOAD_STATE_0_DST_OFF__MASK;
+}
+#define CP_LOAD_STATE_0_STATE_SRC__MASK				0x00070000
+#define CP_LOAD_STATE_0_STATE_SRC__SHIFT			16
+static inline uint32_t CP_LOAD_STATE_0_STATE_SRC(enum adreno_state_src val)
+{
+	return ((val) << CP_LOAD_STATE_0_STATE_SRC__SHIFT) & CP_LOAD_STATE_0_STATE_SRC__MASK;
+}
+#define CP_LOAD_STATE_0_STATE_BLOCK__MASK			0x00380000
+#define CP_LOAD_STATE_0_STATE_BLOCK__SHIFT			19
+static inline uint32_t CP_LOAD_STATE_0_STATE_BLOCK(enum adreno_state_block val)
+{
+	return ((val) << CP_LOAD_STATE_0_STATE_BLOCK__SHIFT) & CP_LOAD_STATE_0_STATE_BLOCK__MASK;
+}
+#define CP_LOAD_STATE_0_NUM_UNIT__MASK				0x7fc00000
+#define CP_LOAD_STATE_0_NUM_UNIT__SHIFT				22
+static inline uint32_t CP_LOAD_STATE_0_NUM_UNIT(uint32_t val)
+{
+	return ((val) << CP_LOAD_STATE_0_NUM_UNIT__SHIFT) & CP_LOAD_STATE_0_NUM_UNIT__MASK;
+}
+
+#define REG_CP_LOAD_STATE_1					0x00000001
+#define CP_LOAD_STATE_1_STATE_TYPE__MASK			0x00000003
+#define CP_LOAD_STATE_1_STATE_TYPE__SHIFT			0
+static inline uint32_t CP_LOAD_STATE_1_STATE_TYPE(enum adreno_state_type val)
+{
+	return ((val) << CP_LOAD_STATE_1_STATE_TYPE__SHIFT) & CP_LOAD_STATE_1_STATE_TYPE__MASK;
+}
+#define CP_LOAD_STATE_1_EXT_SRC_ADDR__MASK			0xfffffffc
+#define CP_LOAD_STATE_1_EXT_SRC_ADDR__SHIFT			2
+static inline uint32_t CP_LOAD_STATE_1_EXT_SRC_ADDR(uint32_t val)
+{
+	return ((val >> 2) << CP_LOAD_STATE_1_EXT_SRC_ADDR__SHIFT) & CP_LOAD_STATE_1_EXT_SRC_ADDR__MASK;
+}
+
+#define REG_CP_SET_BIN_0					0x00000000
+
+#define REG_CP_SET_BIN_1					0x00000001
+#define CP_SET_BIN_1_X1__MASK					0x0000ffff
+#define CP_SET_BIN_1_X1__SHIFT					0
+static inline uint32_t CP_SET_BIN_1_X1(uint32_t val)
+{
+	return ((val) << CP_SET_BIN_1_X1__SHIFT) & CP_SET_BIN_1_X1__MASK;
+}
+#define CP_SET_BIN_1_Y1__MASK					0xffff0000
+#define CP_SET_BIN_1_Y1__SHIFT					16
+static inline uint32_t CP_SET_BIN_1_Y1(uint32_t val)
+{
+	return ((val) << CP_SET_BIN_1_Y1__SHIFT) & CP_SET_BIN_1_Y1__MASK;
+}
+
+#define REG_CP_SET_BIN_2					0x00000002
+#define CP_SET_BIN_2_X2__MASK					0x0000ffff
+#define CP_SET_BIN_2_X2__SHIFT					0
+static inline uint32_t CP_SET_BIN_2_X2(uint32_t val)
+{
+	return ((val) << CP_SET_BIN_2_X2__SHIFT) & CP_SET_BIN_2_X2__MASK;
+}
+#define CP_SET_BIN_2_Y2__MASK					0xffff0000
+#define CP_SET_BIN_2_Y2__SHIFT					16
+static inline uint32_t CP_SET_BIN_2_Y2(uint32_t val)
+{
+	return ((val) << CP_SET_BIN_2_Y2__SHIFT) & CP_SET_BIN_2_Y2__MASK;
+}
+
+
+#endif /* ADRENO_PM4_XML */
diff --git a/drivers/gpu/drm/msm/dsi/dsi.xml.h b/drivers/gpu/drm/msm/dsi/dsi.xml.h
new file mode 100644
index 0000000..6f8396b
--- /dev/null
+++ b/drivers/gpu/drm/msm/dsi/dsi.xml.h
@@ -0,0 +1,502 @@
+#ifndef DSI_XML
+#define DSI_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- /home/robclark/src/freedreno/envytools/rnndb/msm.xml                 (    595 bytes, from 2013-07-05 19:21:12)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2013-03-31 16:51:27)
+- /home/robclark/src/freedreno/envytools/rnndb/mdp4/mdp4.xml           (  19332 bytes, from 2013-08-16 22:16:36)
+- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml             (  11712 bytes, from 2013-08-17 17:13:43)
+- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml            (    344 bytes, from 2013-08-11 19:26:32)
+- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1544 bytes, from 2013-08-16 19:17:05)
+- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml         (    600 bytes, from 2013-07-05 19:21:12)
+- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  19288 bytes, from 2013-08-11 18:14:15)
+
+Copyright (C) 2013 by the following authors:
+- Rob Clark <robdclark@gmail.com> (robclark)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+enum dsi_traffic_mode {
+	NON_BURST_SYNCH_PULSE = 0,
+	NON_BURST_SYNCH_EVENT = 1,
+	BURST_MODE = 2,
+};
+
+enum dsi_dst_format {
+	DST_FORMAT_RGB565 = 0,
+	DST_FORMAT_RGB666 = 1,
+	DST_FORMAT_RGB666_LOOSE = 2,
+	DST_FORMAT_RGB888 = 3,
+};
+
+enum dsi_rgb_swap {
+	SWAP_RGB = 0,
+	SWAP_RBG = 1,
+	SWAP_BGR = 2,
+	SWAP_BRG = 3,
+	SWAP_GRB = 4,
+	SWAP_GBR = 5,
+};
+
+enum dsi_cmd_trigger {
+	TRIGGER_NONE = 0,
+	TRIGGER_TE = 2,
+	TRIGGER_SW = 4,
+	TRIGGER_SW_SEOF = 5,
+	TRIGGER_SW_TE = 6,
+};
+
+#define DSI_IRQ_CMD_DMA_DONE					0x00000001
+#define DSI_IRQ_MASK_CMD_DMA_DONE				0x00000002
+#define DSI_IRQ_CMD_MDP_DONE					0x00000100
+#define DSI_IRQ_MASK_CMD_MDP_DONE				0x00000200
+#define DSI_IRQ_VIDEO_DONE					0x00010000
+#define DSI_IRQ_MASK_VIDEO_DONE					0x00020000
+#define DSI_IRQ_ERROR						0x01000000
+#define DSI_IRQ_MASK_ERROR					0x02000000
+#define REG_DSI_CTRL						0x00000000
+#define DSI_CTRL_ENABLE						0x00000001
+#define DSI_CTRL_VID_MODE_EN					0x00000002
+#define DSI_CTRL_CMD_MODE_EN					0x00000004
+#define DSI_CTRL_LANE0						0x00000010
+#define DSI_CTRL_LANE1						0x00000020
+#define DSI_CTRL_LANE2						0x00000040
+#define DSI_CTRL_LANE3						0x00000080
+#define DSI_CTRL_CLK_EN						0x00000100
+#define DSI_CTRL_ECC_CHECK					0x00100000
+#define DSI_CTRL_CRC_CHECK					0x01000000
+
+#define REG_DSI_STATUS0						0x00000004
+#define DSI_STATUS0_CMD_MODE_DMA_BUSY				0x00000002
+#define DSI_STATUS0_VIDEO_MODE_ENGINE_BUSY			0x00000008
+#define DSI_STATUS0_DSI_BUSY					0x00000010
+
+#define REG_DSI_FIFO_STATUS					0x00000008
+
+#define REG_DSI_VID_CFG0					0x0000000c
+#define DSI_VID_CFG0_VIRT_CHANNEL__MASK				0x00000003
+#define DSI_VID_CFG0_VIRT_CHANNEL__SHIFT			0
+static inline uint32_t DSI_VID_CFG0_VIRT_CHANNEL(uint32_t val)
+{
+	return ((val) << DSI_VID_CFG0_VIRT_CHANNEL__SHIFT) & DSI_VID_CFG0_VIRT_CHANNEL__MASK;
+}
+#define DSI_VID_CFG0_DST_FORMAT__MASK				0x00000030
+#define DSI_VID_CFG0_DST_FORMAT__SHIFT				4
+static inline uint32_t DSI_VID_CFG0_DST_FORMAT(enum dsi_dst_format val)
+{
+	return ((val) << DSI_VID_CFG0_DST_FORMAT__SHIFT) & DSI_VID_CFG0_DST_FORMAT__MASK;
+}
+#define DSI_VID_CFG0_TRAFFIC_MODE__MASK				0x00000300
+#define DSI_VID_CFG0_TRAFFIC_MODE__SHIFT			8
+static inline uint32_t DSI_VID_CFG0_TRAFFIC_MODE(enum dsi_traffic_mode val)
+{
+	return ((val) << DSI_VID_CFG0_TRAFFIC_MODE__SHIFT) & DSI_VID_CFG0_TRAFFIC_MODE__MASK;
+}
+#define DSI_VID_CFG0_BLLP_POWER_STOP				0x00001000
+#define DSI_VID_CFG0_EOF_BLLP_POWER_STOP			0x00008000
+#define DSI_VID_CFG0_HSA_POWER_STOP				0x00010000
+#define DSI_VID_CFG0_HBP_POWER_STOP				0x00100000
+#define DSI_VID_CFG0_HFP_POWER_STOP				0x01000000
+#define DSI_VID_CFG0_PULSE_MODE_HSA_HE				0x10000000
+
+#define REG_DSI_VID_CFG1					0x0000001c
+#define DSI_VID_CFG1_R_SEL					0x00000010
+#define DSI_VID_CFG1_G_SEL					0x00000100
+#define DSI_VID_CFG1_B_SEL					0x00001000
+#define DSI_VID_CFG1_RGB_SWAP__MASK				0x00070000
+#define DSI_VID_CFG1_RGB_SWAP__SHIFT				16
+static inline uint32_t DSI_VID_CFG1_RGB_SWAP(enum dsi_rgb_swap val)
+{
+	return ((val) << DSI_VID_CFG1_RGB_SWAP__SHIFT) & DSI_VID_CFG1_RGB_SWAP__MASK;
+}
+#define DSI_VID_CFG1_INTERLEAVE_MAX__MASK			0x00f00000
+#define DSI_VID_CFG1_INTERLEAVE_MAX__SHIFT			20
+static inline uint32_t DSI_VID_CFG1_INTERLEAVE_MAX(uint32_t val)
+{
+	return ((val) << DSI_VID_CFG1_INTERLEAVE_MAX__SHIFT) & DSI_VID_CFG1_INTERLEAVE_MAX__MASK;
+}
+
+#define REG_DSI_ACTIVE_H					0x00000020
+#define DSI_ACTIVE_H_START__MASK				0x00000fff
+#define DSI_ACTIVE_H_START__SHIFT				0
+static inline uint32_t DSI_ACTIVE_H_START(uint32_t val)
+{
+	return ((val) << DSI_ACTIVE_H_START__SHIFT) & DSI_ACTIVE_H_START__MASK;
+}
+#define DSI_ACTIVE_H_END__MASK					0x0fff0000
+#define DSI_ACTIVE_H_END__SHIFT					16
+static inline uint32_t DSI_ACTIVE_H_END(uint32_t val)
+{
+	return ((val) << DSI_ACTIVE_H_END__SHIFT) & DSI_ACTIVE_H_END__MASK;
+}
+
+#define REG_DSI_ACTIVE_V					0x00000024
+#define DSI_ACTIVE_V_START__MASK				0x00000fff
+#define DSI_ACTIVE_V_START__SHIFT				0
+static inline uint32_t DSI_ACTIVE_V_START(uint32_t val)
+{
+	return ((val) << DSI_ACTIVE_V_START__SHIFT) & DSI_ACTIVE_V_START__MASK;
+}
+#define DSI_ACTIVE_V_END__MASK					0x0fff0000
+#define DSI_ACTIVE_V_END__SHIFT					16
+static inline uint32_t DSI_ACTIVE_V_END(uint32_t val)
+{
+	return ((val) << DSI_ACTIVE_V_END__SHIFT) & DSI_ACTIVE_V_END__MASK;
+}
+
+#define REG_DSI_TOTAL						0x00000028
+#define DSI_TOTAL_H_TOTAL__MASK					0x00000fff
+#define DSI_TOTAL_H_TOTAL__SHIFT				0
+static inline uint32_t DSI_TOTAL_H_TOTAL(uint32_t val)
+{
+	return ((val) << DSI_TOTAL_H_TOTAL__SHIFT) & DSI_TOTAL_H_TOTAL__MASK;
+}
+#define DSI_TOTAL_V_TOTAL__MASK					0x0fff0000
+#define DSI_TOTAL_V_TOTAL__SHIFT				16
+static inline uint32_t DSI_TOTAL_V_TOTAL(uint32_t val)
+{
+	return ((val) << DSI_TOTAL_V_TOTAL__SHIFT) & DSI_TOTAL_V_TOTAL__MASK;
+}
+
+#define REG_DSI_ACTIVE_HSYNC					0x0000002c
+#define DSI_ACTIVE_HSYNC_START__MASK				0x00000fff
+#define DSI_ACTIVE_HSYNC_START__SHIFT				0
+static inline uint32_t DSI_ACTIVE_HSYNC_START(uint32_t val)
+{
+	return ((val) << DSI_ACTIVE_HSYNC_START__SHIFT) & DSI_ACTIVE_HSYNC_START__MASK;
+}
+#define DSI_ACTIVE_HSYNC_END__MASK				0x0fff0000
+#define DSI_ACTIVE_HSYNC_END__SHIFT				16
+static inline uint32_t DSI_ACTIVE_HSYNC_END(uint32_t val)
+{
+	return ((val) << DSI_ACTIVE_HSYNC_END__SHIFT) & DSI_ACTIVE_HSYNC_END__MASK;
+}
+
+#define REG_DSI_ACTIVE_VSYNC					0x00000034
+#define DSI_ACTIVE_VSYNC_START__MASK				0x00000fff
+#define DSI_ACTIVE_VSYNC_START__SHIFT				0
+static inline uint32_t DSI_ACTIVE_VSYNC_START(uint32_t val)
+{
+	return ((val) << DSI_ACTIVE_VSYNC_START__SHIFT) & DSI_ACTIVE_VSYNC_START__MASK;
+}
+#define DSI_ACTIVE_VSYNC_END__MASK				0x0fff0000
+#define DSI_ACTIVE_VSYNC_END__SHIFT				16
+static inline uint32_t DSI_ACTIVE_VSYNC_END(uint32_t val)
+{
+	return ((val) << DSI_ACTIVE_VSYNC_END__SHIFT) & DSI_ACTIVE_VSYNC_END__MASK;
+}
+
+#define REG_DSI_CMD_DMA_CTRL					0x00000038
+#define DSI_CMD_DMA_CTRL_FROM_FRAME_BUFFER			0x10000000
+#define DSI_CMD_DMA_CTRL_LOW_POWER				0x04000000
+
+#define REG_DSI_CMD_CFG0					0x0000003c
+
+#define REG_DSI_CMD_CFG1					0x00000040
+
+#define REG_DSI_DMA_BASE					0x00000044
+
+#define REG_DSI_DMA_LEN						0x00000048
+
+#define REG_DSI_ACK_ERR_STATUS					0x00000064
+
+static inline uint32_t REG_DSI_RDBK(uint32_t i0) { return 0x00000068 + 0x4*i0; }
+
+static inline uint32_t REG_DSI_RDBK_DATA(uint32_t i0) { return 0x00000068 + 0x4*i0; }
+
+#define REG_DSI_TRIG_CTRL					0x00000080
+#define DSI_TRIG_CTRL_DMA_TRIGGER__MASK				0x0000000f
+#define DSI_TRIG_CTRL_DMA_TRIGGER__SHIFT			0
+static inline uint32_t DSI_TRIG_CTRL_DMA_TRIGGER(enum dsi_cmd_trigger val)
+{
+	return ((val) << DSI_TRIG_CTRL_DMA_TRIGGER__SHIFT) & DSI_TRIG_CTRL_DMA_TRIGGER__MASK;
+}
+#define DSI_TRIG_CTRL_MDP_TRIGGER__MASK				0x000000f0
+#define DSI_TRIG_CTRL_MDP_TRIGGER__SHIFT			4
+static inline uint32_t DSI_TRIG_CTRL_MDP_TRIGGER(enum dsi_cmd_trigger val)
+{
+	return ((val) << DSI_TRIG_CTRL_MDP_TRIGGER__SHIFT) & DSI_TRIG_CTRL_MDP_TRIGGER__MASK;
+}
+#define DSI_TRIG_CTRL_STREAM					0x00000100
+#define DSI_TRIG_CTRL_TE					0x80000000
+
+#define REG_DSI_TRIG_DMA					0x0000008c
+
+#define REG_DSI_DLN0_PHY_ERR					0x000000b0
+
+#define REG_DSI_TIMEOUT_STATUS					0x000000bc
+
+#define REG_DSI_CLKOUT_TIMING_CTRL				0x000000c0
+#define DSI_CLKOUT_TIMING_CTRL_T_CLK_PRE__MASK			0x0000003f
+#define DSI_CLKOUT_TIMING_CTRL_T_CLK_PRE__SHIFT			0
+static inline uint32_t DSI_CLKOUT_TIMING_CTRL_T_CLK_PRE(uint32_t val)
+{
+	return ((val) << DSI_CLKOUT_TIMING_CTRL_T_CLK_PRE__SHIFT) & DSI_CLKOUT_TIMING_CTRL_T_CLK_PRE__MASK;
+}
+#define DSI_CLKOUT_TIMING_CTRL_T_CLK_POST__MASK			0x00003f00
+#define DSI_CLKOUT_TIMING_CTRL_T_CLK_POST__SHIFT		8
+static inline uint32_t DSI_CLKOUT_TIMING_CTRL_T_CLK_POST(uint32_t val)
+{
+	return ((val) << DSI_CLKOUT_TIMING_CTRL_T_CLK_POST__SHIFT) & DSI_CLKOUT_TIMING_CTRL_T_CLK_POST__MASK;
+}
+
+#define REG_DSI_EOT_PACKET_CTRL					0x000000c8
+#define DSI_EOT_PACKET_CTRL_TX_EOT_APPEND			0x00000001
+#define DSI_EOT_PACKET_CTRL_RX_EOT_IGNORE			0x00000010
+
+#define REG_DSI_LANE_SWAP_CTRL					0x000000ac
+
+#define REG_DSI_ERR_INT_MASK0					0x00000108
+
+#define REG_DSI_INTR_CTRL					0x0000010c
+
+#define REG_DSI_RESET						0x00000114
+
+#define REG_DSI_CLK_CTRL					0x00000118
+
+#define REG_DSI_PHY_RESET					0x00000128
+
+#define REG_DSI_PHY_PLL_CTRL_0					0x00000200
+#define DSI_PHY_PLL_CTRL_0_ENABLE				0x00000001
+
+#define REG_DSI_PHY_PLL_CTRL_1					0x00000204
+
+#define REG_DSI_PHY_PLL_CTRL_2					0x00000208
+
+#define REG_DSI_PHY_PLL_CTRL_3					0x0000020c
+
+#define REG_DSI_PHY_PLL_CTRL_4					0x00000210
+
+#define REG_DSI_PHY_PLL_CTRL_5					0x00000214
+
+#define REG_DSI_PHY_PLL_CTRL_6					0x00000218
+
+#define REG_DSI_PHY_PLL_CTRL_7					0x0000021c
+
+#define REG_DSI_PHY_PLL_CTRL_8					0x00000220
+
+#define REG_DSI_PHY_PLL_CTRL_9					0x00000224
+
+#define REG_DSI_PHY_PLL_CTRL_10					0x00000228
+
+#define REG_DSI_PHY_PLL_CTRL_11					0x0000022c
+
+#define REG_DSI_PHY_PLL_CTRL_12					0x00000230
+
+#define REG_DSI_PHY_PLL_CTRL_13					0x00000234
+
+#define REG_DSI_PHY_PLL_CTRL_14					0x00000238
+
+#define REG_DSI_PHY_PLL_CTRL_15					0x0000023c
+
+#define REG_DSI_PHY_PLL_CTRL_16					0x00000240
+
+#define REG_DSI_PHY_PLL_CTRL_17					0x00000244
+
+#define REG_DSI_PHY_PLL_CTRL_18					0x00000248
+
+#define REG_DSI_PHY_PLL_CTRL_19					0x0000024c
+
+#define REG_DSI_PHY_PLL_CTRL_20					0x00000250
+
+#define REG_DSI_PHY_PLL_STATUS					0x00000280
+#define DSI_PHY_PLL_STATUS_PLL_BUSY				0x00000001
+
+#define REG_DSI_8x60_PHY_TPA_CTRL_1				0x00000258
+
+#define REG_DSI_8x60_PHY_TPA_CTRL_2				0x0000025c
+
+#define REG_DSI_8x60_PHY_TIMING_CTRL_0				0x00000260
+
+#define REG_DSI_8x60_PHY_TIMING_CTRL_1				0x00000264
+
+#define REG_DSI_8x60_PHY_TIMING_CTRL_2				0x00000268
+
+#define REG_DSI_8x60_PHY_TIMING_CTRL_3				0x0000026c
+
+#define REG_DSI_8x60_PHY_TIMING_CTRL_4				0x00000270
+
+#define REG_DSI_8x60_PHY_TIMING_CTRL_5				0x00000274
+
+#define REG_DSI_8x60_PHY_TIMING_CTRL_6				0x00000278
+
+#define REG_DSI_8x60_PHY_TIMING_CTRL_7				0x0000027c
+
+#define REG_DSI_8x60_PHY_TIMING_CTRL_8				0x00000280
+
+#define REG_DSI_8x60_PHY_TIMING_CTRL_9				0x00000284
+
+#define REG_DSI_8x60_PHY_TIMING_CTRL_10				0x00000288
+
+#define REG_DSI_8x60_PHY_TIMING_CTRL_11				0x0000028c
+
+#define REG_DSI_8x60_PHY_CTRL_0					0x00000290
+
+#define REG_DSI_8x60_PHY_CTRL_1					0x00000294
+
+#define REG_DSI_8x60_PHY_CTRL_2					0x00000298
+
+#define REG_DSI_8x60_PHY_CTRL_3					0x0000029c
+
+#define REG_DSI_8x60_PHY_STRENGTH_0				0x000002a0
+
+#define REG_DSI_8x60_PHY_STRENGTH_1				0x000002a4
+
+#define REG_DSI_8x60_PHY_STRENGTH_2				0x000002a8
+
+#define REG_DSI_8x60_PHY_STRENGTH_3				0x000002ac
+
+#define REG_DSI_8x60_PHY_REGULATOR_CTRL_0			0x000002cc
+
+#define REG_DSI_8x60_PHY_REGULATOR_CTRL_1			0x000002d0
+
+#define REG_DSI_8x60_PHY_REGULATOR_CTRL_2			0x000002d4
+
+#define REG_DSI_8x60_PHY_REGULATOR_CTRL_3			0x000002d8
+
+#define REG_DSI_8x60_PHY_REGULATOR_CTRL_4			0x000002dc
+
+#define REG_DSI_8x60_PHY_CAL_HW_TRIGGER				0x000000f0
+
+#define REG_DSI_8x60_PHY_CAL_CTRL				0x000000f4
+
+#define REG_DSI_8x60_PHY_CAL_STATUS				0x000000fc
+#define DSI_8x60_PHY_CAL_STATUS_CAL_BUSY			0x10000000
+
+static inline uint32_t REG_DSI_8960_LN(uint32_t i0) { return 0x00000300 + 0x40*i0; }
+
+static inline uint32_t REG_DSI_8960_LN_CFG_0(uint32_t i0) { return 0x00000300 + 0x40*i0; }
+
+static inline uint32_t REG_DSI_8960_LN_CFG_1(uint32_t i0) { return 0x00000304 + 0x40*i0; }
+
+static inline uint32_t REG_DSI_8960_LN_CFG_2(uint32_t i0) { return 0x00000308 + 0x40*i0; }
+
+static inline uint32_t REG_DSI_8960_LN_TEST_DATAPATH(uint32_t i0) { return 0x0000030c + 0x40*i0; }
+
+static inline uint32_t REG_DSI_8960_LN_TEST_STR_0(uint32_t i0) { return 0x00000314 + 0x40*i0; }
+
+static inline uint32_t REG_DSI_8960_LN_TEST_STR_1(uint32_t i0) { return 0x00000318 + 0x40*i0; }
+
+#define REG_DSI_8960_PHY_LNCK_CFG_0				0x00000400
+
+#define REG_DSI_8960_PHY_LNCK_CFG_1				0x00000404
+
+#define REG_DSI_8960_PHY_LNCK_CFG_2				0x00000408
+
+#define REG_DSI_8960_PHY_LNCK_TEST_DATAPATH			0x0000040c
+
+#define REG_DSI_8960_PHY_LNCK_TEST_STR0				0x00000414
+
+#define REG_DSI_8960_PHY_LNCK_TEST_STR1				0x00000418
+
+#define REG_DSI_8960_PHY_TIMING_CTRL_0				0x00000440
+
+#define REG_DSI_8960_PHY_TIMING_CTRL_1				0x00000444
+
+#define REG_DSI_8960_PHY_TIMING_CTRL_2				0x00000448
+
+#define REG_DSI_8960_PHY_TIMING_CTRL_3				0x0000044c
+
+#define REG_DSI_8960_PHY_TIMING_CTRL_4				0x00000450
+
+#define REG_DSI_8960_PHY_TIMING_CTRL_5				0x00000454
+
+#define REG_DSI_8960_PHY_TIMING_CTRL_6				0x00000458
+
+#define REG_DSI_8960_PHY_TIMING_CTRL_7				0x0000045c
+
+#define REG_DSI_8960_PHY_TIMING_CTRL_8				0x00000460
+
+#define REG_DSI_8960_PHY_TIMING_CTRL_9				0x00000464
+
+#define REG_DSI_8960_PHY_TIMING_CTRL_10				0x00000468
+
+#define REG_DSI_8960_PHY_TIMING_CTRL_11				0x0000046c
+
+#define REG_DSI_8960_PHY_CTRL_0					0x00000470
+
+#define REG_DSI_8960_PHY_CTRL_1					0x00000474
+
+#define REG_DSI_8960_PHY_CTRL_2					0x00000478
+
+#define REG_DSI_8960_PHY_CTRL_3					0x0000047c
+
+#define REG_DSI_8960_PHY_STRENGTH_0				0x00000480
+
+#define REG_DSI_8960_PHY_STRENGTH_1				0x00000484
+
+#define REG_DSI_8960_PHY_STRENGTH_2				0x00000488
+
+#define REG_DSI_8960_PHY_BIST_CTRL_0				0x0000048c
+
+#define REG_DSI_8960_PHY_BIST_CTRL_1				0x00000490
+
+#define REG_DSI_8960_PHY_BIST_CTRL_2				0x00000494
+
+#define REG_DSI_8960_PHY_BIST_CTRL_3				0x00000498
+
+#define REG_DSI_8960_PHY_BIST_CTRL_4				0x0000049c
+
+#define REG_DSI_8960_PHY_LDO_CTRL				0x000004b0
+
+#define REG_DSI_8960_PHY_REGULATOR_CTRL_0			0x00000500
+
+#define REG_DSI_8960_PHY_REGULATOR_CTRL_1			0x00000504
+
+#define REG_DSI_8960_PHY_REGULATOR_CTRL_2			0x00000508
+
+#define REG_DSI_8960_PHY_REGULATOR_CTRL_3			0x0000050c
+
+#define REG_DSI_8960_PHY_REGULATOR_CTRL_4			0x00000510
+
+#define REG_DSI_8960_PHY_REGULATOR_CAL_PWR_CFG			0x00000518
+
+#define REG_DSI_8960_PHY_CAL_HW_TRIGGER				0x00000528
+
+#define REG_DSI_8960_PHY_CAL_SW_CFG_0				0x0000052c
+
+#define REG_DSI_8960_PHY_CAL_SW_CFG_1				0x00000530
+
+#define REG_DSI_8960_PHY_CAL_SW_CFG_2				0x00000534
+
+#define REG_DSI_8960_PHY_CAL_HW_CFG_0				0x00000538
+
+#define REG_DSI_8960_PHY_CAL_HW_CFG_1				0x0000053c
+
+#define REG_DSI_8960_PHY_CAL_HW_CFG_2				0x00000540
+
+#define REG_DSI_8960_PHY_CAL_HW_CFG_3				0x00000544
+
+#define REG_DSI_8960_PHY_CAL_HW_CFG_4				0x00000548
+
+#define REG_DSI_8960_PHY_CAL_STATUS				0x00000550
+#define DSI_8960_PHY_CAL_STATUS_CAL_BUSY			0x00000010
+
+
+#endif /* DSI_XML */
diff --git a/drivers/gpu/drm/msm/dsi/mmss_cc.xml.h b/drivers/gpu/drm/msm/dsi/mmss_cc.xml.h
new file mode 100644
index 0000000..aefc1b8
--- /dev/null
+++ b/drivers/gpu/drm/msm/dsi/mmss_cc.xml.h
@@ -0,0 +1,114 @@
+#ifndef MMSS_CC_XML
+#define MMSS_CC_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- /home/robclark/src/freedreno/envytools/rnndb/msm.xml                 (    595 bytes, from 2013-07-05 19:21:12)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2013-03-31 16:51:27)
+- /home/robclark/src/freedreno/envytools/rnndb/mdp4/mdp4.xml           (  19332 bytes, from 2013-08-16 22:16:36)
+- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml             (  11712 bytes, from 2013-08-17 17:13:43)
+- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml            (    344 bytes, from 2013-08-11 19:26:32)
+- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1544 bytes, from 2013-08-16 19:17:05)
+- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml         (    600 bytes, from 2013-07-05 19:21:12)
+- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  19288 bytes, from 2013-08-11 18:14:15)
+
+Copyright (C) 2013 by the following authors:
+- Rob Clark <robdclark@gmail.com> (robclark)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+enum mmss_cc_clk {
+	CLK = 0,
+	PCLK = 1,
+};
+
+#define REG_MMSS_CC_AHB						0x00000008
+
+static inline uint32_t __offset_CLK(enum mmss_cc_clk idx)
+{
+	switch (idx) {
+		case CLK: return 0x0000004c;
+		case PCLK: return 0x00000130;
+		default: return INVALID_IDX(idx);
+	}
+}
+static inline uint32_t REG_MMSS_CC_CLK(enum mmss_cc_clk i0) { return 0x00000000 + __offset_CLK(i0); }
+
+static inline uint32_t REG_MMSS_CC_CLK_CC(enum mmss_cc_clk i0) { return 0x00000000 + __offset_CLK(i0); }
+#define MMSS_CC_CLK_CC_CLK_EN					0x00000001
+#define MMSS_CC_CLK_CC_ROOT_EN					0x00000004
+#define MMSS_CC_CLK_CC_MND_EN					0x00000020
+#define MMSS_CC_CLK_CC_MND_MODE__MASK				0x000000c0
+#define MMSS_CC_CLK_CC_MND_MODE__SHIFT				6
+static inline uint32_t MMSS_CC_CLK_CC_MND_MODE(uint32_t val)
+{
+	return ((val) << MMSS_CC_CLK_CC_MND_MODE__SHIFT) & MMSS_CC_CLK_CC_MND_MODE__MASK;
+}
+#define MMSS_CC_CLK_CC_PMXO_SEL__MASK				0x00000300
+#define MMSS_CC_CLK_CC_PMXO_SEL__SHIFT				8
+static inline uint32_t MMSS_CC_CLK_CC_PMXO_SEL(uint32_t val)
+{
+	return ((val) << MMSS_CC_CLK_CC_PMXO_SEL__SHIFT) & MMSS_CC_CLK_CC_PMXO_SEL__MASK;
+}
+
+static inline uint32_t REG_MMSS_CC_CLK_MD(enum mmss_cc_clk i0) { return 0x00000004 + __offset_CLK(i0); }
+#define MMSS_CC_CLK_MD_D__MASK					0x000000ff
+#define MMSS_CC_CLK_MD_D__SHIFT					0
+static inline uint32_t MMSS_CC_CLK_MD_D(uint32_t val)
+{
+	return ((val) << MMSS_CC_CLK_MD_D__SHIFT) & MMSS_CC_CLK_MD_D__MASK;
+}
+#define MMSS_CC_CLK_MD_M__MASK					0x0000ff00
+#define MMSS_CC_CLK_MD_M__SHIFT					8
+static inline uint32_t MMSS_CC_CLK_MD_M(uint32_t val)
+{
+	return ((val) << MMSS_CC_CLK_MD_M__SHIFT) & MMSS_CC_CLK_MD_M__MASK;
+}
+
+static inline uint32_t REG_MMSS_CC_CLK_NS(enum mmss_cc_clk i0) { return 0x00000008 + __offset_CLK(i0); }
+#define MMSS_CC_CLK_NS_SRC__MASK				0x0000000f
+#define MMSS_CC_CLK_NS_SRC__SHIFT				0
+static inline uint32_t MMSS_CC_CLK_NS_SRC(uint32_t val)
+{
+	return ((val) << MMSS_CC_CLK_NS_SRC__SHIFT) & MMSS_CC_CLK_NS_SRC__MASK;
+}
+#define MMSS_CC_CLK_NS_PRE_DIV_FUNC__MASK			0x00fff000
+#define MMSS_CC_CLK_NS_PRE_DIV_FUNC__SHIFT			12
+static inline uint32_t MMSS_CC_CLK_NS_PRE_DIV_FUNC(uint32_t val)
+{
+	return ((val) << MMSS_CC_CLK_NS_PRE_DIV_FUNC__SHIFT) & MMSS_CC_CLK_NS_PRE_DIV_FUNC__MASK;
+}
+#define MMSS_CC_CLK_NS_VAL__MASK				0xff000000
+#define MMSS_CC_CLK_NS_VAL__SHIFT				24
+static inline uint32_t MMSS_CC_CLK_NS_VAL(uint32_t val)
+{
+	return ((val) << MMSS_CC_CLK_NS_VAL__SHIFT) & MMSS_CC_CLK_NS_VAL__MASK;
+}
+
+
+#endif /* MMSS_CC_XML */
diff --git a/drivers/gpu/drm/msm/dsi/sfpb.xml.h b/drivers/gpu/drm/msm/dsi/sfpb.xml.h
new file mode 100644
index 0000000..a225e81
--- /dev/null
+++ b/drivers/gpu/drm/msm/dsi/sfpb.xml.h
@@ -0,0 +1,48 @@
+#ifndef SFPB_XML
+#define SFPB_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- /home/robclark/src/freedreno/envytools/rnndb/msm.xml                 (    595 bytes, from 2013-07-05 19:21:12)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2013-03-31 16:51:27)
+- /home/robclark/src/freedreno/envytools/rnndb/mdp4/mdp4.xml           (  19332 bytes, from 2013-08-16 22:16:36)
+- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml             (  11712 bytes, from 2013-08-17 17:13:43)
+- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml            (    344 bytes, from 2013-08-11 19:26:32)
+- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1544 bytes, from 2013-08-16 19:17:05)
+- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml         (    600 bytes, from 2013-07-05 19:21:12)
+- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  19288 bytes, from 2013-08-11 18:14:15)
+
+Copyright (C) 2013 by the following authors:
+- Rob Clark <robdclark@gmail.com> (robclark)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+#define REG_SFPB_CFG						0x00000058
+
+
+#endif /* SFPB_XML */
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c
new file mode 100644
index 0000000..50d11df
--- /dev/null
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.c
@@ -0,0 +1,272 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hdmi.h"
+
+static struct platform_device *hdmi_pdev;
+
+void hdmi_set_mode(struct hdmi *hdmi, bool power_on)
+{
+	uint32_t ctrl = 0;
+
+	if (power_on) {
+		ctrl |= HDMI_CTRL_ENABLE;
+		if (!hdmi->hdmi_mode) {
+			ctrl |= HDMI_CTRL_HDMI;
+			hdmi_write(hdmi, REG_HDMI_CTRL, ctrl);
+			ctrl &= ~HDMI_CTRL_HDMI;
+		} else {
+			ctrl |= HDMI_CTRL_HDMI;
+		}
+	} else {
+		ctrl = HDMI_CTRL_HDMI;
+	}
+
+	hdmi_write(hdmi, REG_HDMI_CTRL, ctrl);
+	DBG("HDMI Core: %s, HDMI_CTRL=0x%08x",
+			power_on ? "Enable" : "Disable", ctrl);
+}
+
+static irqreturn_t hdmi_irq(int irq, void *dev_id)
+{
+	struct hdmi *hdmi = dev_id;
+
+	/* Process HPD: */
+	hdmi_connector_irq(hdmi->connector);
+
+	/* Process DDC: */
+	hdmi_i2c_irq(hdmi->i2c);
+
+	/* TODO audio.. */
+
+	return IRQ_HANDLED;
+}
+
+void hdmi_destroy(struct kref *kref)
+{
+	struct hdmi *hdmi = container_of(kref, struct hdmi, refcount);
+	struct hdmi_phy *phy = hdmi->phy;
+
+	if (phy)
+		phy->funcs->destroy(phy);
+
+	if (hdmi->i2c)
+		hdmi_i2c_destroy(hdmi->i2c);
+
+	put_device(&hdmi->pdev->dev);
+}
+
+/* initialize connector */
+int hdmi_init(struct drm_device *dev, struct drm_encoder *encoder)
+{
+	struct hdmi *hdmi = NULL;
+	struct msm_drm_private *priv = dev->dev_private;
+	struct platform_device *pdev = hdmi_pdev;
+	struct hdmi_platform_config *config;
+	int ret;
+
+	if (!pdev) {
+		dev_err(dev->dev, "no hdmi device\n");
+		ret = -ENXIO;
+		goto fail;
+	}
+
+	config = pdev->dev.platform_data;
+
+	hdmi = kzalloc(sizeof(*hdmi), GFP_KERNEL);
+	if (!hdmi) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	kref_init(&hdmi->refcount);
+
+	get_device(&pdev->dev);
+
+	hdmi->dev = dev;
+	hdmi->pdev = pdev;
+	hdmi->encoder = encoder;
+
+	/* not sure about which phy maps to which msm.. probably I miss some */
+	if (config->phy_init)
+		hdmi->phy = config->phy_init(hdmi);
+	else
+		hdmi->phy = ERR_PTR(-ENXIO);
+
+	if (IS_ERR(hdmi->phy)) {
+		ret = PTR_ERR(hdmi->phy);
+		dev_err(dev->dev, "failed to load phy: %d\n", ret);
+		hdmi->phy = NULL;
+		goto fail;
+	}
+
+	hdmi->mmio = msm_ioremap(pdev, "hdmi_msm_hdmi_addr", "HDMI");
+	if (IS_ERR(hdmi->mmio)) {
+		ret = PTR_ERR(hdmi->mmio);
+		goto fail;
+	}
+
+	hdmi->mvs = devm_regulator_get(&pdev->dev, "8901_hdmi_mvs");
+	if (IS_ERR(hdmi->mvs))
+		hdmi->mvs = devm_regulator_get(&pdev->dev, "hdmi_mvs");
+	if (IS_ERR(hdmi->mvs)) {
+		ret = PTR_ERR(hdmi->mvs);
+		dev_err(dev->dev, "failed to get mvs regulator: %d\n", ret);
+		goto fail;
+	}
+
+	hdmi->mpp0 = devm_regulator_get(&pdev->dev, "8901_mpp0");
+	if (IS_ERR(hdmi->mpp0))
+		hdmi->mpp0 = NULL;
+
+	hdmi->clk = devm_clk_get(&pdev->dev, "core_clk");
+	if (IS_ERR(hdmi->clk)) {
+		ret = PTR_ERR(hdmi->clk);
+		dev_err(dev->dev, "failed to get 'clk': %d\n", ret);
+		goto fail;
+	}
+
+	hdmi->m_pclk = devm_clk_get(&pdev->dev, "master_iface_clk");
+	if (IS_ERR(hdmi->m_pclk)) {
+		ret = PTR_ERR(hdmi->m_pclk);
+		dev_err(dev->dev, "failed to get 'm_pclk': %d\n", ret);
+		goto fail;
+	}
+
+	hdmi->s_pclk = devm_clk_get(&pdev->dev, "slave_iface_clk");
+	if (IS_ERR(hdmi->s_pclk)) {
+		ret = PTR_ERR(hdmi->s_pclk);
+		dev_err(dev->dev, "failed to get 's_pclk': %d\n", ret);
+		goto fail;
+	}
+
+	hdmi->i2c = hdmi_i2c_init(hdmi);
+	if (IS_ERR(hdmi->i2c)) {
+		ret = PTR_ERR(hdmi->i2c);
+		dev_err(dev->dev, "failed to get i2c: %d\n", ret);
+		hdmi->i2c = NULL;
+		goto fail;
+	}
+
+	hdmi->bridge = hdmi_bridge_init(hdmi);
+	if (IS_ERR(hdmi->bridge)) {
+		ret = PTR_ERR(hdmi->bridge);
+		dev_err(dev->dev, "failed to create HDMI bridge: %d\n", ret);
+		hdmi->bridge = NULL;
+		goto fail;
+	}
+
+	hdmi->connector = hdmi_connector_init(hdmi);
+	if (IS_ERR(hdmi->connector)) {
+		ret = PTR_ERR(hdmi->connector);
+		dev_err(dev->dev, "failed to create HDMI connector: %d\n", ret);
+		hdmi->connector = NULL;
+		goto fail;
+	}
+
+	hdmi->irq = platform_get_irq(pdev, 0);
+	if (hdmi->irq < 0) {
+		ret = hdmi->irq;
+		dev_err(dev->dev, "failed to get irq: %d\n", ret);
+		goto fail;
+	}
+
+	ret = devm_request_threaded_irq(&pdev->dev, hdmi->irq,
+			NULL, hdmi_irq, IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+			"hdmi_isr", hdmi);
+	if (ret < 0) {
+		dev_err(dev->dev, "failed to request IRQ%u: %d\n",
+				hdmi->irq, ret);
+		goto fail;
+	}
+
+	encoder->bridge = hdmi->bridge;
+
+	priv->bridges[priv->num_bridges++]       = hdmi->bridge;
+	priv->connectors[priv->num_connectors++] = hdmi->connector;
+
+	return 0;
+
+fail:
+	if (hdmi) {
+		/* bridge/connector are normally destroyed by drm: */
+		if (hdmi->bridge)
+			hdmi->bridge->funcs->destroy(hdmi->bridge);
+		if (hdmi->connector)
+			hdmi->connector->funcs->destroy(hdmi->connector);
+		hdmi_destroy(&hdmi->refcount);
+	}
+
+	return ret;
+}
+
+/*
+ * The hdmi device:
+ */
+
+static int hdmi_dev_probe(struct platform_device *pdev)
+{
+	static struct hdmi_platform_config config = {};
+#ifdef CONFIG_OF
+	/* TODO */
+#else
+	if (cpu_is_apq8064()) {
+		config.phy_init      = hdmi_phy_8960_init;
+		config.ddc_clk_gpio  = 70;
+		config.ddc_data_gpio = 71;
+		config.hpd_gpio      = 72;
+		config.pmic_gpio     = 13 + NR_GPIO_IRQS;
+	} else if (cpu_is_msm8960()) {
+		config.phy_init      = hdmi_phy_8960_init;
+		config.ddc_clk_gpio  = 100;
+		config.ddc_data_gpio = 101;
+		config.hpd_gpio      = 102;
+		config.pmic_gpio     = -1;
+	} else if (cpu_is_msm8x60()) {
+		config.phy_init      = hdmi_phy_8x60_init;
+		config.ddc_clk_gpio  = 170;
+		config.ddc_data_gpio = 171;
+		config.hpd_gpio      = 172;
+		config.pmic_gpio     = -1;
+	}
+#endif
+	pdev->dev.platform_data = &config;
+	hdmi_pdev = pdev;
+	return 0;
+}
+
+static int hdmi_dev_remove(struct platform_device *pdev)
+{
+	hdmi_pdev = NULL;
+	return 0;
+}
+
+static struct platform_driver hdmi_driver = {
+	.probe = hdmi_dev_probe,
+	.remove = hdmi_dev_remove,
+	.driver.name = "hdmi_msm",
+};
+
+void __init hdmi_register(void)
+{
+	platform_driver_register(&hdmi_driver);
+}
+
+void __exit hdmi_unregister(void)
+{
+	platform_driver_unregister(&hdmi_driver);
+}
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.h b/drivers/gpu/drm/msm/hdmi/hdmi.h
new file mode 100644
index 0000000..2c2ec56
--- /dev/null
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __HDMI_CONNECTOR_H__
+#define __HDMI_CONNECTOR_H__
+
+#include <linux/i2c.h>
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/consumer.h>
+
+#include "msm_drv.h"
+#include "hdmi.xml.h"
+
+
+struct hdmi_phy;
+
+struct hdmi {
+	struct kref refcount;
+
+	struct drm_device *dev;
+	struct platform_device *pdev;
+
+	void __iomem *mmio;
+
+	struct regulator *mvs;        /* HDMI_5V */
+	struct regulator *mpp0;       /* External 5V */
+
+	struct clk *clk;
+	struct clk *m_pclk;
+	struct clk *s_pclk;
+
+	struct hdmi_phy *phy;
+	struct i2c_adapter *i2c;
+	struct drm_connector *connector;
+	struct drm_bridge *bridge;
+
+	/* the encoder we are hooked to (outside of hdmi block) */
+	struct drm_encoder *encoder;
+
+	bool hdmi_mode;               /* are we in hdmi mode? */
+
+	int irq;
+};
+
+/* platform config data (ie. from DT, or pdata) */
+struct hdmi_platform_config {
+	struct hdmi_phy *(*phy_init)(struct hdmi *hdmi);
+	int ddc_clk_gpio, ddc_data_gpio, hpd_gpio, pmic_gpio;
+};
+
+void hdmi_set_mode(struct hdmi *hdmi, bool power_on);
+void hdmi_destroy(struct kref *kref);
+
+static inline void hdmi_write(struct hdmi *hdmi, u32 reg, u32 data)
+{
+	msm_writel(data, hdmi->mmio + reg);
+}
+
+static inline u32 hdmi_read(struct hdmi *hdmi, u32 reg)
+{
+	return msm_readl(hdmi->mmio + reg);
+}
+
+static inline struct hdmi * hdmi_reference(struct hdmi *hdmi)
+{
+	kref_get(&hdmi->refcount);
+	return hdmi;
+}
+
+static inline void hdmi_unreference(struct hdmi *hdmi)
+{
+	kref_put(&hdmi->refcount, hdmi_destroy);
+}
+
+/*
+ * The phy appears to be different, for example between 8960 and 8x60,
+ * so split the phy related functions out and load the correct one at
+ * runtime:
+ */
+
+struct hdmi_phy_funcs {
+	void (*destroy)(struct hdmi_phy *phy);
+	void (*reset)(struct hdmi_phy *phy);
+	void (*powerup)(struct hdmi_phy *phy, unsigned long int pixclock);
+	void (*powerdown)(struct hdmi_phy *phy);
+};
+
+struct hdmi_phy {
+	const struct hdmi_phy_funcs *funcs;
+};
+
+struct hdmi_phy *hdmi_phy_8960_init(struct hdmi *hdmi);
+struct hdmi_phy *hdmi_phy_8x60_init(struct hdmi *hdmi);
+
+/*
+ * hdmi bridge:
+ */
+
+struct drm_bridge *hdmi_bridge_init(struct hdmi *hdmi);
+
+/*
+ * hdmi connector:
+ */
+
+void hdmi_connector_irq(struct drm_connector *connector);
+struct drm_connector *hdmi_connector_init(struct hdmi *hdmi);
+
+/*
+ * i2c adapter for ddc:
+ */
+
+void hdmi_i2c_irq(struct i2c_adapter *i2c);
+void hdmi_i2c_destroy(struct i2c_adapter *i2c);
+struct i2c_adapter *hdmi_i2c_init(struct hdmi *hdmi);
+
+#endif /* __HDMI_CONNECTOR_H__ */
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.xml.h b/drivers/gpu/drm/msm/hdmi/hdmi.xml.h
new file mode 100644
index 0000000..f5fa486
--- /dev/null
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.xml.h
@@ -0,0 +1,508 @@
+#ifndef HDMI_XML
+#define HDMI_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- /home/robclark/src/freedreno/envytools/rnndb/msm.xml                 (    595 bytes, from 2013-07-05 19:21:12)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2013-03-31 16:51:27)
+- /home/robclark/src/freedreno/envytools/rnndb/mdp4/mdp4.xml           (  19332 bytes, from 2013-08-16 22:16:36)
+- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml             (  11712 bytes, from 2013-08-17 17:13:43)
+- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml            (    344 bytes, from 2013-08-11 19:26:32)
+- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1544 bytes, from 2013-08-16 19:17:05)
+- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml         (    600 bytes, from 2013-07-05 19:21:12)
+- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  19288 bytes, from 2013-08-11 18:14:15)
+
+Copyright (C) 2013 by the following authors:
+- Rob Clark <robdclark@gmail.com> (robclark)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+enum hdmi_hdcp_key_state {
+	NO_KEYS = 0,
+	NOT_CHECKED = 1,
+	CHECKING = 2,
+	KEYS_VALID = 3,
+	AKSV_INVALID = 4,
+	CHECKSUM_MISMATCH = 5,
+};
+
+enum hdmi_ddc_read_write {
+	DDC_WRITE = 0,
+	DDC_READ = 1,
+};
+
+enum hdmi_acr_cts {
+	ACR_NONE = 0,
+	ACR_32 = 1,
+	ACR_44 = 2,
+	ACR_48 = 3,
+};
+
+#define REG_HDMI_CTRL						0x00000000
+#define HDMI_CTRL_ENABLE					0x00000001
+#define HDMI_CTRL_HDMI						0x00000002
+#define HDMI_CTRL_ENCRYPTED					0x00000004
+
+#define REG_HDMI_AUDIO_PKT_CTRL1				0x00000020
+#define HDMI_AUDIO_PKT_CTRL1_AUDIO_SAMPLE_SEND			0x00000001
+
+#define REG_HDMI_ACR_PKT_CTRL					0x00000024
+#define HDMI_ACR_PKT_CTRL_CONT					0x00000001
+#define HDMI_ACR_PKT_CTRL_SEND					0x00000002
+#define HDMI_ACR_PKT_CTRL_SELECT__MASK				0x00000030
+#define HDMI_ACR_PKT_CTRL_SELECT__SHIFT				4
+static inline uint32_t HDMI_ACR_PKT_CTRL_SELECT(enum hdmi_acr_cts val)
+{
+	return ((val) << HDMI_ACR_PKT_CTRL_SELECT__SHIFT) & HDMI_ACR_PKT_CTRL_SELECT__MASK;
+}
+#define HDMI_ACR_PKT_CTRL_SOURCE				0x00000100
+#define HDMI_ACR_PKT_CTRL_N_MULTIPLIER__MASK			0x00070000
+#define HDMI_ACR_PKT_CTRL_N_MULTIPLIER__SHIFT			16
+static inline uint32_t HDMI_ACR_PKT_CTRL_N_MULTIPLIER(uint32_t val)
+{
+	return ((val) << HDMI_ACR_PKT_CTRL_N_MULTIPLIER__SHIFT) & HDMI_ACR_PKT_CTRL_N_MULTIPLIER__MASK;
+}
+#define HDMI_ACR_PKT_CTRL_AUDIO_PRIORITY			0x80000000
+
+#define REG_HDMI_VBI_PKT_CTRL					0x00000028
+#define HDMI_VBI_PKT_CTRL_GC_ENABLE				0x00000010
+#define HDMI_VBI_PKT_CTRL_GC_EVERY_FRAME			0x00000020
+#define HDMI_VBI_PKT_CTRL_ISRC_SEND				0x00000100
+#define HDMI_VBI_PKT_CTRL_ISRC_CONTINUOUS			0x00000200
+#define HDMI_VBI_PKT_CTRL_ACP_SEND				0x00001000
+#define HDMI_VBI_PKT_CTRL_ACP_SRC_SW				0x00002000
+
+#define REG_HDMI_INFOFRAME_CTRL0				0x0000002c
+#define HDMI_INFOFRAME_CTRL0_AVI_SEND				0x00000001
+#define HDMI_INFOFRAME_CTRL0_AVI_CONT				0x00000002
+#define HDMI_INFOFRAME_CTRL0_AUDIO_INFO_SEND			0x00000010
+#define HDMI_INFOFRAME_CTRL0_AUDIO_INFO_CONT			0x00000020
+#define HDMI_INFOFRAME_CTRL0_AUDIO_INFO_SOURCE			0x00000040
+#define HDMI_INFOFRAME_CTRL0_AUDIO_INFO_UPDATE			0x00000080
+
+#define REG_HDMI_GEN_PKT_CTRL					0x00000034
+#define HDMI_GEN_PKT_CTRL_GENERIC0_SEND				0x00000001
+#define HDMI_GEN_PKT_CTRL_GENERIC0_CONT				0x00000002
+#define HDMI_GEN_PKT_CTRL_GENERIC0_UPDATE__MASK			0x0000000c
+#define HDMI_GEN_PKT_CTRL_GENERIC0_UPDATE__SHIFT		2
+static inline uint32_t HDMI_GEN_PKT_CTRL_GENERIC0_UPDATE(uint32_t val)
+{
+	return ((val) << HDMI_GEN_PKT_CTRL_GENERIC0_UPDATE__SHIFT) & HDMI_GEN_PKT_CTRL_GENERIC0_UPDATE__MASK;
+}
+#define HDMI_GEN_PKT_CTRL_GENERIC1_SEND				0x00000010
+#define HDMI_GEN_PKT_CTRL_GENERIC1_CONT				0x00000020
+#define HDMI_GEN_PKT_CTRL_GENERIC0_LINE__MASK			0x003f0000
+#define HDMI_GEN_PKT_CTRL_GENERIC0_LINE__SHIFT			16
+static inline uint32_t HDMI_GEN_PKT_CTRL_GENERIC0_LINE(uint32_t val)
+{
+	return ((val) << HDMI_GEN_PKT_CTRL_GENERIC0_LINE__SHIFT) & HDMI_GEN_PKT_CTRL_GENERIC0_LINE__MASK;
+}
+#define HDMI_GEN_PKT_CTRL_GENERIC1_LINE__MASK			0x3f000000
+#define HDMI_GEN_PKT_CTRL_GENERIC1_LINE__SHIFT			24
+static inline uint32_t HDMI_GEN_PKT_CTRL_GENERIC1_LINE(uint32_t val)
+{
+	return ((val) << HDMI_GEN_PKT_CTRL_GENERIC1_LINE__SHIFT) & HDMI_GEN_PKT_CTRL_GENERIC1_LINE__MASK;
+}
+
+#define REG_HDMI_GC						0x00000040
+#define HDMI_GC_MUTE						0x00000001
+
+#define REG_HDMI_AUDIO_PKT_CTRL2				0x00000044
+#define HDMI_AUDIO_PKT_CTRL2_OVERRIDE				0x00000001
+#define HDMI_AUDIO_PKT_CTRL2_LAYOUT				0x00000002
+
+static inline uint32_t REG_HDMI_AVI_INFO(uint32_t i0) { return 0x0000006c + 0x4*i0; }
+
+#define REG_HDMI_GENERIC0_HDR					0x00000084
+
+static inline uint32_t REG_HDMI_GENERIC0(uint32_t i0) { return 0x00000088 + 0x4*i0; }
+
+#define REG_HDMI_GENERIC1_HDR					0x000000a4
+
+static inline uint32_t REG_HDMI_GENERIC1(uint32_t i0) { return 0x000000a8 + 0x4*i0; }
+
+static inline uint32_t REG_HDMI_ACR(uint32_t i0) { return 0x000000c4 + 0x8*i0; }
+
+static inline uint32_t REG_HDMI_ACR_0(uint32_t i0) { return 0x000000c4 + 0x8*i0; }
+#define HDMI_ACR_0_CTS__MASK					0xfffff000
+#define HDMI_ACR_0_CTS__SHIFT					12
+static inline uint32_t HDMI_ACR_0_CTS(uint32_t val)
+{
+	return ((val) << HDMI_ACR_0_CTS__SHIFT) & HDMI_ACR_0_CTS__MASK;
+}
+
+static inline uint32_t REG_HDMI_ACR_1(uint32_t i0) { return 0x000000c8 + 0x8*i0; }
+#define HDMI_ACR_1_N__MASK					0xffffffff
+#define HDMI_ACR_1_N__SHIFT					0
+static inline uint32_t HDMI_ACR_1_N(uint32_t val)
+{
+	return ((val) << HDMI_ACR_1_N__SHIFT) & HDMI_ACR_1_N__MASK;
+}
+
+#define REG_HDMI_AUDIO_INFO0					0x000000e4
+#define HDMI_AUDIO_INFO0_CHECKSUM__MASK				0x000000ff
+#define HDMI_AUDIO_INFO0_CHECKSUM__SHIFT			0
+static inline uint32_t HDMI_AUDIO_INFO0_CHECKSUM(uint32_t val)
+{
+	return ((val) << HDMI_AUDIO_INFO0_CHECKSUM__SHIFT) & HDMI_AUDIO_INFO0_CHECKSUM__MASK;
+}
+#define HDMI_AUDIO_INFO0_CC__MASK				0x00000700
+#define HDMI_AUDIO_INFO0_CC__SHIFT				8
+static inline uint32_t HDMI_AUDIO_INFO0_CC(uint32_t val)
+{
+	return ((val) << HDMI_AUDIO_INFO0_CC__SHIFT) & HDMI_AUDIO_INFO0_CC__MASK;
+}
+
+#define REG_HDMI_AUDIO_INFO1					0x000000e8
+#define HDMI_AUDIO_INFO1_CA__MASK				0x000000ff
+#define HDMI_AUDIO_INFO1_CA__SHIFT				0
+static inline uint32_t HDMI_AUDIO_INFO1_CA(uint32_t val)
+{
+	return ((val) << HDMI_AUDIO_INFO1_CA__SHIFT) & HDMI_AUDIO_INFO1_CA__MASK;
+}
+#define HDMI_AUDIO_INFO1_LSV__MASK				0x00007800
+#define HDMI_AUDIO_INFO1_LSV__SHIFT				11
+static inline uint32_t HDMI_AUDIO_INFO1_LSV(uint32_t val)
+{
+	return ((val) << HDMI_AUDIO_INFO1_LSV__SHIFT) & HDMI_AUDIO_INFO1_LSV__MASK;
+}
+#define HDMI_AUDIO_INFO1_DM_INH					0x00008000
+
+#define REG_HDMI_HDCP_CTRL					0x00000110
+#define HDMI_HDCP_CTRL_ENABLE					0x00000001
+#define HDMI_HDCP_CTRL_ENCRYPTION_ENABLE			0x00000100
+
+#define REG_HDMI_HDCP_INT_CTRL					0x00000118
+
+#define REG_HDMI_HDCP_LINK0_STATUS				0x0000011c
+#define HDMI_HDCP_LINK0_STATUS_AN_0_READY			0x00000100
+#define HDMI_HDCP_LINK0_STATUS_AN_1_READY			0x00000200
+#define HDMI_HDCP_LINK0_STATUS_KEY_STATE__MASK			0x70000000
+#define HDMI_HDCP_LINK0_STATUS_KEY_STATE__SHIFT			28
+static inline uint32_t HDMI_HDCP_LINK0_STATUS_KEY_STATE(enum hdmi_hdcp_key_state val)
+{
+	return ((val) << HDMI_HDCP_LINK0_STATUS_KEY_STATE__SHIFT) & HDMI_HDCP_LINK0_STATUS_KEY_STATE__MASK;
+}
+
+#define REG_HDMI_HDCP_RESET					0x00000130
+#define HDMI_HDCP_RESET_LINK0_DEAUTHENTICATE			0x00000001
+
+#define REG_HDMI_AUDIO_CFG					0x000001d0
+#define HDMI_AUDIO_CFG_ENGINE_ENABLE				0x00000001
+#define HDMI_AUDIO_CFG_FIFO_WATERMARK__MASK			0x000000f0
+#define HDMI_AUDIO_CFG_FIFO_WATERMARK__SHIFT			4
+static inline uint32_t HDMI_AUDIO_CFG_FIFO_WATERMARK(uint32_t val)
+{
+	return ((val) << HDMI_AUDIO_CFG_FIFO_WATERMARK__SHIFT) & HDMI_AUDIO_CFG_FIFO_WATERMARK__MASK;
+}
+
+#define REG_HDMI_USEC_REFTIMER					0x00000208
+
+#define REG_HDMI_DDC_CTRL					0x0000020c
+#define HDMI_DDC_CTRL_GO					0x00000001
+#define HDMI_DDC_CTRL_SOFT_RESET				0x00000002
+#define HDMI_DDC_CTRL_SEND_RESET				0x00000004
+#define HDMI_DDC_CTRL_SW_STATUS_RESET				0x00000008
+#define HDMI_DDC_CTRL_TRANSACTION_CNT__MASK			0x00300000
+#define HDMI_DDC_CTRL_TRANSACTION_CNT__SHIFT			20
+static inline uint32_t HDMI_DDC_CTRL_TRANSACTION_CNT(uint32_t val)
+{
+	return ((val) << HDMI_DDC_CTRL_TRANSACTION_CNT__SHIFT) & HDMI_DDC_CTRL_TRANSACTION_CNT__MASK;
+}
+
+#define REG_HDMI_DDC_INT_CTRL					0x00000214
+#define HDMI_DDC_INT_CTRL_SW_DONE_INT				0x00000001
+#define HDMI_DDC_INT_CTRL_SW_DONE_ACK				0x00000002
+#define HDMI_DDC_INT_CTRL_SW_DONE_MASK				0x00000004
+
+#define REG_HDMI_DDC_SW_STATUS					0x00000218
+#define HDMI_DDC_SW_STATUS_NACK0				0x00001000
+#define HDMI_DDC_SW_STATUS_NACK1				0x00002000
+#define HDMI_DDC_SW_STATUS_NACK2				0x00004000
+#define HDMI_DDC_SW_STATUS_NACK3				0x00008000
+
+#define REG_HDMI_DDC_HW_STATUS					0x0000021c
+
+#define REG_HDMI_DDC_SPEED					0x00000220
+#define HDMI_DDC_SPEED_THRESHOLD__MASK				0x00000003
+#define HDMI_DDC_SPEED_THRESHOLD__SHIFT				0
+static inline uint32_t HDMI_DDC_SPEED_THRESHOLD(uint32_t val)
+{
+	return ((val) << HDMI_DDC_SPEED_THRESHOLD__SHIFT) & HDMI_DDC_SPEED_THRESHOLD__MASK;
+}
+#define HDMI_DDC_SPEED_PRESCALE__MASK				0xffff0000
+#define HDMI_DDC_SPEED_PRESCALE__SHIFT				16
+static inline uint32_t HDMI_DDC_SPEED_PRESCALE(uint32_t val)
+{
+	return ((val) << HDMI_DDC_SPEED_PRESCALE__SHIFT) & HDMI_DDC_SPEED_PRESCALE__MASK;
+}
+
+#define REG_HDMI_DDC_SETUP					0x00000224
+#define HDMI_DDC_SETUP_TIMEOUT__MASK				0xff000000
+#define HDMI_DDC_SETUP_TIMEOUT__SHIFT				24
+static inline uint32_t HDMI_DDC_SETUP_TIMEOUT(uint32_t val)
+{
+	return ((val) << HDMI_DDC_SETUP_TIMEOUT__SHIFT) & HDMI_DDC_SETUP_TIMEOUT__MASK;
+}
+
+static inline uint32_t REG_HDMI_I2C_TRANSACTION(uint32_t i0) { return 0x00000228 + 0x4*i0; }
+
+static inline uint32_t REG_HDMI_I2C_TRANSACTION_REG(uint32_t i0) { return 0x00000228 + 0x4*i0; }
+#define HDMI_I2C_TRANSACTION_REG_RW__MASK			0x00000001
+#define HDMI_I2C_TRANSACTION_REG_RW__SHIFT			0
+static inline uint32_t HDMI_I2C_TRANSACTION_REG_RW(enum hdmi_ddc_read_write val)
+{
+	return ((val) << HDMI_I2C_TRANSACTION_REG_RW__SHIFT) & HDMI_I2C_TRANSACTION_REG_RW__MASK;
+}
+#define HDMI_I2C_TRANSACTION_REG_STOP_ON_NACK			0x00000100
+#define HDMI_I2C_TRANSACTION_REG_START				0x00001000
+#define HDMI_I2C_TRANSACTION_REG_STOP				0x00002000
+#define HDMI_I2C_TRANSACTION_REG_CNT__MASK			0x00ff0000
+#define HDMI_I2C_TRANSACTION_REG_CNT__SHIFT			16
+static inline uint32_t HDMI_I2C_TRANSACTION_REG_CNT(uint32_t val)
+{
+	return ((val) << HDMI_I2C_TRANSACTION_REG_CNT__SHIFT) & HDMI_I2C_TRANSACTION_REG_CNT__MASK;
+}
+
+#define REG_HDMI_DDC_DATA					0x00000238
+#define HDMI_DDC_DATA_DATA_RW__MASK				0x00000001
+#define HDMI_DDC_DATA_DATA_RW__SHIFT				0
+static inline uint32_t HDMI_DDC_DATA_DATA_RW(enum hdmi_ddc_read_write val)
+{
+	return ((val) << HDMI_DDC_DATA_DATA_RW__SHIFT) & HDMI_DDC_DATA_DATA_RW__MASK;
+}
+#define HDMI_DDC_DATA_DATA__MASK				0x0000ff00
+#define HDMI_DDC_DATA_DATA__SHIFT				8
+static inline uint32_t HDMI_DDC_DATA_DATA(uint32_t val)
+{
+	return ((val) << HDMI_DDC_DATA_DATA__SHIFT) & HDMI_DDC_DATA_DATA__MASK;
+}
+#define HDMI_DDC_DATA_INDEX__MASK				0x00ff0000
+#define HDMI_DDC_DATA_INDEX__SHIFT				16
+static inline uint32_t HDMI_DDC_DATA_INDEX(uint32_t val)
+{
+	return ((val) << HDMI_DDC_DATA_INDEX__SHIFT) & HDMI_DDC_DATA_INDEX__MASK;
+}
+#define HDMI_DDC_DATA_INDEX_WRITE				0x80000000
+
+#define REG_HDMI_HPD_INT_STATUS					0x00000250
+#define HDMI_HPD_INT_STATUS_INT					0x00000001
+#define HDMI_HPD_INT_STATUS_CABLE_DETECTED			0x00000002
+
+#define REG_HDMI_HPD_INT_CTRL					0x00000254
+#define HDMI_HPD_INT_CTRL_INT_ACK				0x00000001
+#define HDMI_HPD_INT_CTRL_INT_CONNECT				0x00000002
+#define HDMI_HPD_INT_CTRL_INT_EN				0x00000004
+#define HDMI_HPD_INT_CTRL_RX_INT_ACK				0x00000010
+#define HDMI_HPD_INT_CTRL_RX_INT_EN				0x00000020
+#define HDMI_HPD_INT_CTRL_RCV_PLUGIN_DET_MASK			0x00000200
+
+#define REG_HDMI_HPD_CTRL					0x00000258
+#define HDMI_HPD_CTRL_TIMEOUT__MASK				0x00001fff
+#define HDMI_HPD_CTRL_TIMEOUT__SHIFT				0
+static inline uint32_t HDMI_HPD_CTRL_TIMEOUT(uint32_t val)
+{
+	return ((val) << HDMI_HPD_CTRL_TIMEOUT__SHIFT) & HDMI_HPD_CTRL_TIMEOUT__MASK;
+}
+#define HDMI_HPD_CTRL_ENABLE					0x10000000
+
+#define REG_HDMI_DDC_REF					0x0000027c
+#define HDMI_DDC_REF_REFTIMER_ENABLE				0x00010000
+#define HDMI_DDC_REF_REFTIMER__MASK				0x0000ffff
+#define HDMI_DDC_REF_REFTIMER__SHIFT				0
+static inline uint32_t HDMI_DDC_REF_REFTIMER(uint32_t val)
+{
+	return ((val) << HDMI_DDC_REF_REFTIMER__SHIFT) & HDMI_DDC_REF_REFTIMER__MASK;
+}
+
+#define REG_HDMI_ACTIVE_HSYNC					0x000002b4
+#define HDMI_ACTIVE_HSYNC_START__MASK				0x00000fff
+#define HDMI_ACTIVE_HSYNC_START__SHIFT				0
+static inline uint32_t HDMI_ACTIVE_HSYNC_START(uint32_t val)
+{
+	return ((val) << HDMI_ACTIVE_HSYNC_START__SHIFT) & HDMI_ACTIVE_HSYNC_START__MASK;
+}
+#define HDMI_ACTIVE_HSYNC_END__MASK				0x0fff0000
+#define HDMI_ACTIVE_HSYNC_END__SHIFT				16
+static inline uint32_t HDMI_ACTIVE_HSYNC_END(uint32_t val)
+{
+	return ((val) << HDMI_ACTIVE_HSYNC_END__SHIFT) & HDMI_ACTIVE_HSYNC_END__MASK;
+}
+
+#define REG_HDMI_ACTIVE_VSYNC					0x000002b8
+#define HDMI_ACTIVE_VSYNC_START__MASK				0x00000fff
+#define HDMI_ACTIVE_VSYNC_START__SHIFT				0
+static inline uint32_t HDMI_ACTIVE_VSYNC_START(uint32_t val)
+{
+	return ((val) << HDMI_ACTIVE_VSYNC_START__SHIFT) & HDMI_ACTIVE_VSYNC_START__MASK;
+}
+#define HDMI_ACTIVE_VSYNC_END__MASK				0x0fff0000
+#define HDMI_ACTIVE_VSYNC_END__SHIFT				16
+static inline uint32_t HDMI_ACTIVE_VSYNC_END(uint32_t val)
+{
+	return ((val) << HDMI_ACTIVE_VSYNC_END__SHIFT) & HDMI_ACTIVE_VSYNC_END__MASK;
+}
+
+#define REG_HDMI_VSYNC_ACTIVE_F2				0x000002bc
+#define HDMI_VSYNC_ACTIVE_F2_START__MASK			0x00000fff
+#define HDMI_VSYNC_ACTIVE_F2_START__SHIFT			0
+static inline uint32_t HDMI_VSYNC_ACTIVE_F2_START(uint32_t val)
+{
+	return ((val) << HDMI_VSYNC_ACTIVE_F2_START__SHIFT) & HDMI_VSYNC_ACTIVE_F2_START__MASK;
+}
+#define HDMI_VSYNC_ACTIVE_F2_END__MASK				0x0fff0000
+#define HDMI_VSYNC_ACTIVE_F2_END__SHIFT				16
+static inline uint32_t HDMI_VSYNC_ACTIVE_F2_END(uint32_t val)
+{
+	return ((val) << HDMI_VSYNC_ACTIVE_F2_END__SHIFT) & HDMI_VSYNC_ACTIVE_F2_END__MASK;
+}
+
+#define REG_HDMI_TOTAL						0x000002c0
+#define HDMI_TOTAL_H_TOTAL__MASK				0x00000fff
+#define HDMI_TOTAL_H_TOTAL__SHIFT				0
+static inline uint32_t HDMI_TOTAL_H_TOTAL(uint32_t val)
+{
+	return ((val) << HDMI_TOTAL_H_TOTAL__SHIFT) & HDMI_TOTAL_H_TOTAL__MASK;
+}
+#define HDMI_TOTAL_V_TOTAL__MASK				0x0fff0000
+#define HDMI_TOTAL_V_TOTAL__SHIFT				16
+static inline uint32_t HDMI_TOTAL_V_TOTAL(uint32_t val)
+{
+	return ((val) << HDMI_TOTAL_V_TOTAL__SHIFT) & HDMI_TOTAL_V_TOTAL__MASK;
+}
+
+#define REG_HDMI_VSYNC_TOTAL_F2					0x000002c4
+#define HDMI_VSYNC_TOTAL_F2_V_TOTAL__MASK			0x00000fff
+#define HDMI_VSYNC_TOTAL_F2_V_TOTAL__SHIFT			0
+static inline uint32_t HDMI_VSYNC_TOTAL_F2_V_TOTAL(uint32_t val)
+{
+	return ((val) << HDMI_VSYNC_TOTAL_F2_V_TOTAL__SHIFT) & HDMI_VSYNC_TOTAL_F2_V_TOTAL__MASK;
+}
+
+#define REG_HDMI_FRAME_CTRL					0x000002c8
+#define HDMI_FRAME_CTRL_RGB_MUX_SEL_BGR				0x00001000
+#define HDMI_FRAME_CTRL_VSYNC_LOW				0x10000000
+#define HDMI_FRAME_CTRL_HSYNC_LOW				0x20000000
+#define HDMI_FRAME_CTRL_INTERLACED_EN				0x80000000
+
+#define REG_HDMI_PHY_CTRL					0x000002d4
+#define HDMI_PHY_CTRL_SW_RESET_PLL				0x00000001
+#define HDMI_PHY_CTRL_SW_RESET_PLL_LOW				0x00000002
+#define HDMI_PHY_CTRL_SW_RESET					0x00000004
+#define HDMI_PHY_CTRL_SW_RESET_LOW				0x00000008
+
+#define REG_HDMI_AUD_INT					0x000002cc
+#define HDMI_AUD_INT_AUD_FIFO_URUN_INT				0x00000001
+#define HDMI_AUD_INT_AUD_FIFO_URAN_MASK				0x00000002
+#define HDMI_AUD_INT_AUD_SAM_DROP_INT				0x00000004
+#define HDMI_AUD_INT_AUD_SAM_DROP_MASK				0x00000008
+
+#define REG_HDMI_8x60_PHY_REG0					0x00000300
+#define HDMI_8x60_PHY_REG0_DESER_DEL_CTRL__MASK			0x0000001c
+#define HDMI_8x60_PHY_REG0_DESER_DEL_CTRL__SHIFT		2
+static inline uint32_t HDMI_8x60_PHY_REG0_DESER_DEL_CTRL(uint32_t val)
+{
+	return ((val) << HDMI_8x60_PHY_REG0_DESER_DEL_CTRL__SHIFT) & HDMI_8x60_PHY_REG0_DESER_DEL_CTRL__MASK;
+}
+
+#define REG_HDMI_8x60_PHY_REG1					0x00000304
+#define HDMI_8x60_PHY_REG1_DTEST_MUX_SEL__MASK			0x000000f0
+#define HDMI_8x60_PHY_REG1_DTEST_MUX_SEL__SHIFT			4
+static inline uint32_t HDMI_8x60_PHY_REG1_DTEST_MUX_SEL(uint32_t val)
+{
+	return ((val) << HDMI_8x60_PHY_REG1_DTEST_MUX_SEL__SHIFT) & HDMI_8x60_PHY_REG1_DTEST_MUX_SEL__MASK;
+}
+#define HDMI_8x60_PHY_REG1_OUTVOL_SWING_CTRL__MASK		0x0000000f
+#define HDMI_8x60_PHY_REG1_OUTVOL_SWING_CTRL__SHIFT		0
+static inline uint32_t HDMI_8x60_PHY_REG1_OUTVOL_SWING_CTRL(uint32_t val)
+{
+	return ((val) << HDMI_8x60_PHY_REG1_OUTVOL_SWING_CTRL__SHIFT) & HDMI_8x60_PHY_REG1_OUTVOL_SWING_CTRL__MASK;
+}
+
+#define REG_HDMI_8x60_PHY_REG2					0x00000308
+#define HDMI_8x60_PHY_REG2_PD_DESER				0x00000001
+#define HDMI_8x60_PHY_REG2_PD_DRIVE_1				0x00000002
+#define HDMI_8x60_PHY_REG2_PD_DRIVE_2				0x00000004
+#define HDMI_8x60_PHY_REG2_PD_DRIVE_3				0x00000008
+#define HDMI_8x60_PHY_REG2_PD_DRIVE_4				0x00000010
+#define HDMI_8x60_PHY_REG2_PD_PLL				0x00000020
+#define HDMI_8x60_PHY_REG2_PD_PWRGEN				0x00000040
+#define HDMI_8x60_PHY_REG2_RCV_SENSE_EN				0x00000080
+
+#define REG_HDMI_8x60_PHY_REG3					0x0000030c
+#define HDMI_8x60_PHY_REG3_PLL_ENABLE				0x00000001
+
+#define REG_HDMI_8x60_PHY_REG4					0x00000310
+
+#define REG_HDMI_8x60_PHY_REG5					0x00000314
+
+#define REG_HDMI_8x60_PHY_REG6					0x00000318
+
+#define REG_HDMI_8x60_PHY_REG7					0x0000031c
+
+#define REG_HDMI_8x60_PHY_REG8					0x00000320
+
+#define REG_HDMI_8x60_PHY_REG9					0x00000324
+
+#define REG_HDMI_8x60_PHY_REG10					0x00000328
+
+#define REG_HDMI_8x60_PHY_REG11					0x0000032c
+
+#define REG_HDMI_8x60_PHY_REG12					0x00000330
+#define HDMI_8x60_PHY_REG12_RETIMING_EN				0x00000001
+#define HDMI_8x60_PHY_REG12_PLL_LOCK_DETECT_EN			0x00000002
+#define HDMI_8x60_PHY_REG12_FORCE_LOCK				0x00000010
+
+#define REG_HDMI_8960_PHY_REG0					0x00000400
+
+#define REG_HDMI_8960_PHY_REG1					0x00000404
+
+#define REG_HDMI_8960_PHY_REG2					0x00000408
+
+#define REG_HDMI_8960_PHY_REG3					0x0000040c
+
+#define REG_HDMI_8960_PHY_REG4					0x00000410
+
+#define REG_HDMI_8960_PHY_REG5					0x00000414
+
+#define REG_HDMI_8960_PHY_REG6					0x00000418
+
+#define REG_HDMI_8960_PHY_REG7					0x0000041c
+
+#define REG_HDMI_8960_PHY_REG8					0x00000420
+
+#define REG_HDMI_8960_PHY_REG9					0x00000424
+
+#define REG_HDMI_8960_PHY_REG10					0x00000428
+
+#define REG_HDMI_8960_PHY_REG11					0x0000042c
+
+#define REG_HDMI_8960_PHY_REG12					0x00000430
+
+
+#endif /* HDMI_XML */
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c b/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c
new file mode 100644
index 0000000..5a8ee34
--- /dev/null
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_bridge.c
@@ -0,0 +1,167 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hdmi.h"
+
+struct hdmi_bridge {
+	struct drm_bridge base;
+
+	struct hdmi *hdmi;
+
+	unsigned long int pixclock;
+};
+#define to_hdmi_bridge(x) container_of(x, struct hdmi_bridge, base)
+
+static void hdmi_bridge_destroy(struct drm_bridge *bridge)
+{
+	struct hdmi_bridge *hdmi_bridge = to_hdmi_bridge(bridge);
+	hdmi_unreference(hdmi_bridge->hdmi);
+	drm_bridge_cleanup(bridge);
+	kfree(hdmi_bridge);
+}
+
+static void hdmi_bridge_pre_enable(struct drm_bridge *bridge)
+{
+	struct hdmi_bridge *hdmi_bridge = to_hdmi_bridge(bridge);
+	struct hdmi *hdmi = hdmi_bridge->hdmi;
+	struct hdmi_phy *phy = hdmi->phy;
+
+	DBG("power up");
+	phy->funcs->powerup(phy, hdmi_bridge->pixclock);
+	hdmi_set_mode(hdmi, true);
+}
+
+static void hdmi_bridge_enable(struct drm_bridge *bridge)
+{
+}
+
+static void hdmi_bridge_disable(struct drm_bridge *bridge)
+{
+}
+
+static void hdmi_bridge_post_disable(struct drm_bridge *bridge)
+{
+	struct hdmi_bridge *hdmi_bridge = to_hdmi_bridge(bridge);
+	struct hdmi *hdmi = hdmi_bridge->hdmi;
+	struct hdmi_phy *phy = hdmi->phy;
+
+	DBG("power down");
+	hdmi_set_mode(hdmi, false);
+	phy->funcs->powerdown(phy);
+}
+
+static void hdmi_bridge_mode_set(struct drm_bridge *bridge,
+		 struct drm_display_mode *mode,
+		 struct drm_display_mode *adjusted_mode)
+{
+	struct hdmi_bridge *hdmi_bridge = to_hdmi_bridge(bridge);
+	struct hdmi *hdmi = hdmi_bridge->hdmi;
+	int hstart, hend, vstart, vend;
+	uint32_t frame_ctrl;
+
+	mode = adjusted_mode;
+
+	hdmi_bridge->pixclock = mode->clock * 1000;
+
+	hdmi->hdmi_mode = drm_match_cea_mode(mode) > 1;
+
+	hstart = mode->htotal - mode->hsync_start;
+	hend   = mode->htotal - mode->hsync_start + mode->hdisplay;
+
+	vstart = mode->vtotal - mode->vsync_start - 1;
+	vend   = mode->vtotal - mode->vsync_start + mode->vdisplay - 1;
+
+	DBG("htotal=%d, vtotal=%d, hstart=%d, hend=%d, vstart=%d, vend=%d",
+			mode->htotal, mode->vtotal, hstart, hend, vstart, vend);
+
+	hdmi_write(hdmi, REG_HDMI_TOTAL,
+			HDMI_TOTAL_H_TOTAL(mode->htotal - 1) |
+			HDMI_TOTAL_V_TOTAL(mode->vtotal - 1));
+
+	hdmi_write(hdmi, REG_HDMI_ACTIVE_HSYNC,
+			HDMI_ACTIVE_HSYNC_START(hstart) |
+			HDMI_ACTIVE_HSYNC_END(hend));
+	hdmi_write(hdmi, REG_HDMI_ACTIVE_VSYNC,
+			HDMI_ACTIVE_VSYNC_START(vstart) |
+			HDMI_ACTIVE_VSYNC_END(vend));
+
+	if (mode->flags & DRM_MODE_FLAG_INTERLACE) {
+		hdmi_write(hdmi, REG_HDMI_VSYNC_TOTAL_F2,
+				HDMI_VSYNC_TOTAL_F2_V_TOTAL(mode->vtotal));
+		hdmi_write(hdmi, REG_HDMI_VSYNC_ACTIVE_F2,
+				HDMI_VSYNC_ACTIVE_F2_START(vstart + 1) |
+				HDMI_VSYNC_ACTIVE_F2_END(vend + 1));
+	} else {
+		hdmi_write(hdmi, REG_HDMI_VSYNC_TOTAL_F2,
+				HDMI_VSYNC_TOTAL_F2_V_TOTAL(0));
+		hdmi_write(hdmi, REG_HDMI_VSYNC_ACTIVE_F2,
+				HDMI_VSYNC_ACTIVE_F2_START(0) |
+				HDMI_VSYNC_ACTIVE_F2_END(0));
+	}
+
+	frame_ctrl = 0;
+	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+		frame_ctrl |= HDMI_FRAME_CTRL_HSYNC_LOW;
+	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+		frame_ctrl |= HDMI_FRAME_CTRL_VSYNC_LOW;
+	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+		frame_ctrl |= HDMI_FRAME_CTRL_INTERLACED_EN;
+	DBG("frame_ctrl=%08x", frame_ctrl);
+	hdmi_write(hdmi, REG_HDMI_FRAME_CTRL, frame_ctrl);
+
+	// TODO until we have audio, this might be safest:
+	if (hdmi->hdmi_mode)
+		hdmi_write(hdmi, REG_HDMI_GC, HDMI_GC_MUTE);
+}
+
+static const struct drm_bridge_funcs hdmi_bridge_funcs = {
+		.pre_enable = hdmi_bridge_pre_enable,
+		.enable = hdmi_bridge_enable,
+		.disable = hdmi_bridge_disable,
+		.post_disable = hdmi_bridge_post_disable,
+		.mode_set = hdmi_bridge_mode_set,
+		.destroy = hdmi_bridge_destroy,
+};
+
+
+/* initialize bridge */
+struct drm_bridge *hdmi_bridge_init(struct hdmi *hdmi)
+{
+	struct drm_bridge *bridge = NULL;
+	struct hdmi_bridge *hdmi_bridge;
+	int ret;
+
+	hdmi_bridge = kzalloc(sizeof(*hdmi_bridge), GFP_KERNEL);
+	if (!hdmi_bridge) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	hdmi_bridge->hdmi = hdmi_reference(hdmi);
+
+	bridge = &hdmi_bridge->base;
+
+	drm_bridge_init(hdmi->dev, bridge, &hdmi_bridge_funcs);
+
+	return bridge;
+
+fail:
+	if (bridge)
+		hdmi_bridge_destroy(bridge);
+
+	return ERR_PTR(ret);
+}
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_connector.c b/drivers/gpu/drm/msm/hdmi/hdmi_connector.c
new file mode 100644
index 0000000..823eee5
--- /dev/null
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_connector.c
@@ -0,0 +1,367 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/gpio.h>
+
+#include "hdmi.h"
+
+struct hdmi_connector {
+	struct drm_connector base;
+	struct hdmi *hdmi;
+};
+#define to_hdmi_connector(x) container_of(x, struct hdmi_connector, base)
+
+static int gpio_config(struct hdmi *hdmi, bool on)
+{
+	struct drm_device *dev = hdmi->dev;
+	struct hdmi_platform_config *config =
+			hdmi->pdev->dev.platform_data;
+	int ret;
+
+	if (on) {
+		ret = gpio_request(config->ddc_clk_gpio, "HDMI_DDC_CLK");
+		if (ret) {
+			dev_err(dev->dev, "'%s'(%d) gpio_request failed: %d\n",
+				"HDMI_DDC_CLK", config->ddc_clk_gpio, ret);
+			goto error1;
+		}
+		ret = gpio_request(config->ddc_data_gpio, "HDMI_DDC_DATA");
+		if (ret) {
+			dev_err(dev->dev, "'%s'(%d) gpio_request failed: %d\n",
+				"HDMI_DDC_DATA", config->ddc_data_gpio, ret);
+			goto error2;
+		}
+		ret = gpio_request(config->hpd_gpio, "HDMI_HPD");
+		if (ret) {
+			dev_err(dev->dev, "'%s'(%d) gpio_request failed: %d\n",
+				"HDMI_HPD", config->hpd_gpio, ret);
+			goto error3;
+		}
+		if (config->pmic_gpio != -1) {
+			ret = gpio_request(config->pmic_gpio, "PMIC_HDMI_MUX_SEL");
+			if (ret) {
+				dev_err(dev->dev, "'%s'(%d) gpio_request failed: %d\n",
+					"PMIC_HDMI_MUX_SEL", config->pmic_gpio, ret);
+				goto error4;
+			}
+			gpio_set_value_cansleep(config->pmic_gpio, 0);
+		}
+		DBG("gpio on");
+	} else {
+		gpio_free(config->ddc_clk_gpio);
+		gpio_free(config->ddc_data_gpio);
+		gpio_free(config->hpd_gpio);
+
+		if (config->pmic_gpio != -1) {
+			gpio_set_value_cansleep(config->pmic_gpio, 1);
+			gpio_free(config->pmic_gpio);
+		}
+		DBG("gpio off");
+	}
+
+	return 0;
+
+error4:
+	gpio_free(config->hpd_gpio);
+error3:
+	gpio_free(config->ddc_data_gpio);
+error2:
+	gpio_free(config->ddc_clk_gpio);
+error1:
+	return ret;
+}
+
+static int hpd_enable(struct hdmi_connector *hdmi_connector)
+{
+	struct hdmi *hdmi = hdmi_connector->hdmi;
+	struct drm_device *dev = hdmi_connector->base.dev;
+	struct hdmi_phy *phy = hdmi->phy;
+	uint32_t hpd_ctrl;
+	int ret;
+
+	ret = gpio_config(hdmi, true);
+	if (ret) {
+		dev_err(dev->dev, "failed to configure GPIOs: %d\n", ret);
+		goto fail;
+	}
+
+	ret = clk_prepare_enable(hdmi->clk);
+	if (ret) {
+		dev_err(dev->dev, "failed to enable 'clk': %d\n", ret);
+		goto fail;
+	}
+
+	ret = clk_prepare_enable(hdmi->m_pclk);
+	if (ret) {
+		dev_err(dev->dev, "failed to enable 'm_pclk': %d\n", ret);
+		goto fail;
+	}
+
+	ret = clk_prepare_enable(hdmi->s_pclk);
+	if (ret) {
+		dev_err(dev->dev, "failed to enable 's_pclk': %d\n", ret);
+		goto fail;
+	}
+
+	if (hdmi->mpp0)
+		ret = regulator_enable(hdmi->mpp0);
+	if (!ret)
+		ret = regulator_enable(hdmi->mvs);
+	if (ret) {
+		dev_err(dev->dev, "failed to enable regulators: %d\n", ret);
+		goto fail;
+	}
+
+	hdmi_set_mode(hdmi, false);
+	phy->funcs->reset(phy);
+	hdmi_set_mode(hdmi, true);
+
+	hdmi_write(hdmi, REG_HDMI_USEC_REFTIMER, 0x0001001b);
+
+	/* enable HPD events: */
+	hdmi_write(hdmi, REG_HDMI_HPD_INT_CTRL,
+			HDMI_HPD_INT_CTRL_INT_CONNECT |
+			HDMI_HPD_INT_CTRL_INT_EN);
+
+	/* set timeout to 4.1ms (max) for hardware debounce */
+	hpd_ctrl = hdmi_read(hdmi, REG_HDMI_HPD_CTRL);
+	hpd_ctrl |= HDMI_HPD_CTRL_TIMEOUT(0x1fff);
+
+	/* Toggle HPD circuit to trigger HPD sense */
+	hdmi_write(hdmi, REG_HDMI_HPD_CTRL,
+			~HDMI_HPD_CTRL_ENABLE & hpd_ctrl);
+	hdmi_write(hdmi, REG_HDMI_HPD_CTRL,
+			HDMI_HPD_CTRL_ENABLE | hpd_ctrl);
+
+	return 0;
+
+fail:
+	return ret;
+}
+
+static int hdp_disable(struct hdmi_connector *hdmi_connector)
+{
+	struct hdmi *hdmi = hdmi_connector->hdmi;
+	struct drm_device *dev = hdmi_connector->base.dev;
+	int ret = 0;
+
+	/* Disable HPD interrupt */
+	hdmi_write(hdmi, REG_HDMI_HPD_INT_CTRL, 0);
+
+	hdmi_set_mode(hdmi, false);
+
+	if (hdmi->mpp0)
+		ret = regulator_disable(hdmi->mpp0);
+	if (!ret)
+		ret = regulator_disable(hdmi->mvs);
+	if (ret) {
+		dev_err(dev->dev, "failed to enable regulators: %d\n", ret);
+		goto fail;
+	}
+
+	clk_disable_unprepare(hdmi->clk);
+	clk_disable_unprepare(hdmi->m_pclk);
+	clk_disable_unprepare(hdmi->s_pclk);
+
+	ret = gpio_config(hdmi, false);
+	if (ret) {
+		dev_err(dev->dev, "failed to unconfigure GPIOs: %d\n", ret);
+		goto fail;
+	}
+
+	return 0;
+
+fail:
+	return ret;
+}
+
+void hdmi_connector_irq(struct drm_connector *connector)
+{
+	struct hdmi_connector *hdmi_connector = to_hdmi_connector(connector);
+	struct hdmi *hdmi = hdmi_connector->hdmi;
+	uint32_t hpd_int_status, hpd_int_ctrl;
+
+	/* Process HPD: */
+	hpd_int_status = hdmi_read(hdmi, REG_HDMI_HPD_INT_STATUS);
+	hpd_int_ctrl   = hdmi_read(hdmi, REG_HDMI_HPD_INT_CTRL);
+
+	if ((hpd_int_ctrl & HDMI_HPD_INT_CTRL_INT_EN) &&
+			(hpd_int_status & HDMI_HPD_INT_STATUS_INT)) {
+		bool detected = !!(hpd_int_status & HDMI_HPD_INT_STATUS_CABLE_DETECTED);
+
+		DBG("status=%04x, ctrl=%04x", hpd_int_status, hpd_int_ctrl);
+
+		/* ack the irq: */
+		hdmi_write(hdmi, REG_HDMI_HPD_INT_CTRL,
+				hpd_int_ctrl | HDMI_HPD_INT_CTRL_INT_ACK);
+
+		drm_helper_hpd_irq_event(connector->dev);
+
+		/* detect disconnect if we are connected or visa versa: */
+		hpd_int_ctrl = HDMI_HPD_INT_CTRL_INT_EN;
+		if (!detected)
+			hpd_int_ctrl |= HDMI_HPD_INT_CTRL_INT_CONNECT;
+		hdmi_write(hdmi, REG_HDMI_HPD_INT_CTRL, hpd_int_ctrl);
+	}
+}
+
+static enum drm_connector_status hdmi_connector_detect(
+		struct drm_connector *connector, bool force)
+{
+	struct hdmi_connector *hdmi_connector = to_hdmi_connector(connector);
+	struct hdmi *hdmi = hdmi_connector->hdmi;
+	uint32_t hpd_int_status;
+	int retry = 20;
+
+	hpd_int_status = hdmi_read(hdmi, REG_HDMI_HPD_INT_STATUS);
+
+	/* sense seems to in some cases be momentarily de-asserted, don't
+	 * let that trick us into thinking the monitor is gone:
+	 */
+	while (retry-- && !(hpd_int_status & HDMI_HPD_INT_STATUS_CABLE_DETECTED)) {
+		mdelay(10);
+		hpd_int_status = hdmi_read(hdmi, REG_HDMI_HPD_INT_STATUS);
+		DBG("status=%08x", hpd_int_status);
+	}
+
+	return (hpd_int_status & HDMI_HPD_INT_STATUS_CABLE_DETECTED) ?
+			connector_status_connected : connector_status_disconnected;
+}
+
+static void hdmi_connector_destroy(struct drm_connector *connector)
+{
+	struct hdmi_connector *hdmi_connector = to_hdmi_connector(connector);
+
+	hdp_disable(hdmi_connector);
+
+	drm_sysfs_connector_remove(connector);
+	drm_connector_cleanup(connector);
+
+	hdmi_unreference(hdmi_connector->hdmi);
+
+	kfree(hdmi_connector);
+}
+
+static int hdmi_connector_get_modes(struct drm_connector *connector)
+{
+	struct hdmi_connector *hdmi_connector = to_hdmi_connector(connector);
+	struct hdmi *hdmi = hdmi_connector->hdmi;
+	struct edid *edid;
+	uint32_t hdmi_ctrl;
+	int ret = 0;
+
+	hdmi_ctrl = hdmi_read(hdmi, REG_HDMI_CTRL);
+	hdmi_write(hdmi, REG_HDMI_CTRL, hdmi_ctrl | HDMI_CTRL_ENABLE);
+
+	edid = drm_get_edid(connector, hdmi->i2c);
+
+	hdmi_write(hdmi, REG_HDMI_CTRL, hdmi_ctrl);
+
+	drm_mode_connector_update_edid_property(connector, edid);
+
+	if (edid) {
+		ret = drm_add_edid_modes(connector, edid);
+		kfree(edid);
+	}
+
+	return ret;
+}
+
+static int hdmi_connector_mode_valid(struct drm_connector *connector,
+				 struct drm_display_mode *mode)
+{
+	struct hdmi_connector *hdmi_connector = to_hdmi_connector(connector);
+	struct msm_drm_private *priv = connector->dev->dev_private;
+	struct msm_kms *kms = priv->kms;
+	long actual, requested;
+
+	requested = 1000 * mode->clock;
+	actual = kms->funcs->round_pixclk(kms,
+			requested, hdmi_connector->hdmi->encoder);
+
+	DBG("requested=%ld, actual=%ld", requested, actual);
+
+	if (actual != requested)
+		return MODE_CLOCK_RANGE;
+
+	return 0;
+}
+
+static struct drm_encoder *
+hdmi_connector_best_encoder(struct drm_connector *connector)
+{
+	struct hdmi_connector *hdmi_connector = to_hdmi_connector(connector);
+	return hdmi_connector->hdmi->encoder;
+}
+
+static const struct drm_connector_funcs hdmi_connector_funcs = {
+	.dpms = drm_helper_connector_dpms,
+	.detect = hdmi_connector_detect,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = hdmi_connector_destroy,
+};
+
+static const struct drm_connector_helper_funcs hdmi_connector_helper_funcs = {
+	.get_modes = hdmi_connector_get_modes,
+	.mode_valid = hdmi_connector_mode_valid,
+	.best_encoder = hdmi_connector_best_encoder,
+};
+
+/* initialize connector */
+struct drm_connector *hdmi_connector_init(struct hdmi *hdmi)
+{
+	struct drm_connector *connector = NULL;
+	struct hdmi_connector *hdmi_connector;
+	int ret;
+
+	hdmi_connector = kzalloc(sizeof(*hdmi_connector), GFP_KERNEL);
+	if (!hdmi_connector) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	hdmi_connector->hdmi = hdmi_reference(hdmi);
+
+	connector = &hdmi_connector->base;
+
+	drm_connector_init(hdmi->dev, connector, &hdmi_connector_funcs,
+			DRM_MODE_CONNECTOR_HDMIA);
+	drm_connector_helper_add(connector, &hdmi_connector_helper_funcs);
+
+	connector->polled = DRM_CONNECTOR_POLL_HPD;
+
+	connector->interlace_allowed = 1;
+	connector->doublescan_allowed = 0;
+
+	drm_sysfs_connector_add(connector);
+
+	ret = hpd_enable(hdmi_connector);
+	if (ret) {
+		dev_err(hdmi->dev->dev, "failed to enable HPD: %d\n", ret);
+		goto fail;
+	}
+
+	drm_mode_connector_attach_encoder(connector, hdmi->encoder);
+
+	return connector;
+
+fail:
+	if (connector)
+		hdmi_connector_destroy(connector);
+
+	return ERR_PTR(ret);
+}
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_i2c.c b/drivers/gpu/drm/msm/hdmi/hdmi_i2c.c
new file mode 100644
index 0000000..f4ab7f7
--- /dev/null
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_i2c.c
@@ -0,0 +1,281 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hdmi.h"
+
+struct hdmi_i2c_adapter {
+	struct i2c_adapter base;
+	struct hdmi *hdmi;
+	bool sw_done;
+	wait_queue_head_t ddc_event;
+};
+#define to_hdmi_i2c_adapter(x) container_of(x, struct hdmi_i2c_adapter, base)
+
+static void init_ddc(struct hdmi_i2c_adapter *hdmi_i2c)
+{
+	struct hdmi *hdmi = hdmi_i2c->hdmi;
+
+	hdmi_write(hdmi, REG_HDMI_DDC_CTRL,
+			HDMI_DDC_CTRL_SW_STATUS_RESET);
+	hdmi_write(hdmi, REG_HDMI_DDC_CTRL,
+			HDMI_DDC_CTRL_SOFT_RESET);
+
+	hdmi_write(hdmi, REG_HDMI_DDC_SPEED,
+			HDMI_DDC_SPEED_THRESHOLD(2) |
+			HDMI_DDC_SPEED_PRESCALE(10));
+
+	hdmi_write(hdmi, REG_HDMI_DDC_SETUP,
+			HDMI_DDC_SETUP_TIMEOUT(0xff));
+
+	/* enable reference timer for 27us */
+	hdmi_write(hdmi, REG_HDMI_DDC_REF,
+			HDMI_DDC_REF_REFTIMER_ENABLE |
+			HDMI_DDC_REF_REFTIMER(27));
+}
+
+static int ddc_clear_irq(struct hdmi_i2c_adapter *hdmi_i2c)
+{
+	struct hdmi *hdmi = hdmi_i2c->hdmi;
+	struct drm_device *dev = hdmi->dev;
+	uint32_t retry = 0xffff;
+	uint32_t ddc_int_ctrl;
+
+	do {
+		--retry;
+
+		hdmi_write(hdmi, REG_HDMI_DDC_INT_CTRL,
+				HDMI_DDC_INT_CTRL_SW_DONE_ACK |
+				HDMI_DDC_INT_CTRL_SW_DONE_MASK);
+
+		ddc_int_ctrl = hdmi_read(hdmi, REG_HDMI_DDC_INT_CTRL);
+
+	} while ((ddc_int_ctrl & HDMI_DDC_INT_CTRL_SW_DONE_INT) && retry);
+
+	if (!retry) {
+		dev_err(dev->dev, "timeout waiting for DDC\n");
+		return -ETIMEDOUT;
+	}
+
+	hdmi_i2c->sw_done = false;
+
+	return 0;
+}
+
+#define MAX_TRANSACTIONS 4
+
+static bool sw_done(struct hdmi_i2c_adapter *hdmi_i2c)
+{
+	struct hdmi *hdmi = hdmi_i2c->hdmi;
+
+	if (!hdmi_i2c->sw_done) {
+		uint32_t ddc_int_ctrl;
+
+		ddc_int_ctrl = hdmi_read(hdmi, REG_HDMI_DDC_INT_CTRL);
+
+		if ((ddc_int_ctrl & HDMI_DDC_INT_CTRL_SW_DONE_MASK) &&
+				(ddc_int_ctrl & HDMI_DDC_INT_CTRL_SW_DONE_INT)) {
+			hdmi_i2c->sw_done = true;
+			hdmi_write(hdmi, REG_HDMI_DDC_INT_CTRL,
+					HDMI_DDC_INT_CTRL_SW_DONE_ACK);
+		}
+	}
+
+	return hdmi_i2c->sw_done;
+}
+
+static int hdmi_i2c_xfer(struct i2c_adapter *i2c,
+		struct i2c_msg *msgs, int num)
+{
+	struct hdmi_i2c_adapter *hdmi_i2c = to_hdmi_i2c_adapter(i2c);
+	struct hdmi *hdmi = hdmi_i2c->hdmi;
+	struct drm_device *dev = hdmi->dev;
+	static const uint32_t nack[] = {
+			HDMI_DDC_SW_STATUS_NACK0, HDMI_DDC_SW_STATUS_NACK1,
+			HDMI_DDC_SW_STATUS_NACK2, HDMI_DDC_SW_STATUS_NACK3,
+	};
+	int indices[MAX_TRANSACTIONS];
+	int ret, i, j, index = 0;
+	uint32_t ddc_status, ddc_data, i2c_trans;
+
+	num = min(num, MAX_TRANSACTIONS);
+
+	WARN_ON(!(hdmi_read(hdmi, REG_HDMI_CTRL) & HDMI_CTRL_ENABLE));
+
+	if (num == 0)
+		return num;
+
+	init_ddc(hdmi_i2c);
+
+	ret = ddc_clear_irq(hdmi_i2c);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < num; i++) {
+		struct i2c_msg *p = &msgs[i];
+		uint32_t raw_addr = p->addr << 1;
+
+		if (p->flags & I2C_M_RD)
+			raw_addr |= 1;
+
+		ddc_data = HDMI_DDC_DATA_DATA(raw_addr) |
+				HDMI_DDC_DATA_DATA_RW(DDC_WRITE);
+
+		if (i == 0) {
+			ddc_data |= HDMI_DDC_DATA_INDEX(0) |
+					HDMI_DDC_DATA_INDEX_WRITE;
+		}
+
+		hdmi_write(hdmi, REG_HDMI_DDC_DATA, ddc_data);
+		index++;
+
+		indices[i] = index;
+
+		if (p->flags & I2C_M_RD) {
+			index += p->len;
+		} else {
+			for (j = 0; j < p->len; j++) {
+				ddc_data = HDMI_DDC_DATA_DATA(p->buf[j]) |
+						HDMI_DDC_DATA_DATA_RW(DDC_WRITE);
+				hdmi_write(hdmi, REG_HDMI_DDC_DATA, ddc_data);
+				index++;
+			}
+		}
+
+		i2c_trans = HDMI_I2C_TRANSACTION_REG_CNT(p->len) |
+				HDMI_I2C_TRANSACTION_REG_RW(
+						(p->flags & I2C_M_RD) ? DDC_READ : DDC_WRITE) |
+				HDMI_I2C_TRANSACTION_REG_START;
+
+		if (i == (num - 1))
+			i2c_trans |= HDMI_I2C_TRANSACTION_REG_STOP;
+
+		hdmi_write(hdmi, REG_HDMI_I2C_TRANSACTION(i), i2c_trans);
+	}
+
+	/* trigger the transfer: */
+	hdmi_write(hdmi, REG_HDMI_DDC_CTRL,
+			HDMI_DDC_CTRL_TRANSACTION_CNT(num - 1) |
+			HDMI_DDC_CTRL_GO);
+
+	ret = wait_event_timeout(hdmi_i2c->ddc_event, sw_done(hdmi_i2c), HZ/4);
+	if (ret <= 0) {
+		if (ret == 0)
+			ret = -ETIMEDOUT;
+		dev_warn(dev->dev, "DDC timeout: %d\n", ret);
+		DBG("sw_status=%08x, hw_status=%08x, int_ctrl=%08x",
+				hdmi_read(hdmi, REG_HDMI_DDC_SW_STATUS),
+				hdmi_read(hdmi, REG_HDMI_DDC_HW_STATUS),
+				hdmi_read(hdmi, REG_HDMI_DDC_INT_CTRL));
+		return ret;
+	}
+
+	ddc_status = hdmi_read(hdmi, REG_HDMI_DDC_SW_STATUS);
+
+	/* read back results of any read transactions: */
+	for (i = 0; i < num; i++) {
+		struct i2c_msg *p = &msgs[i];
+
+		if (!(p->flags & I2C_M_RD))
+			continue;
+
+		/* check for NACK: */
+		if (ddc_status & nack[i]) {
+			DBG("ddc_status=%08x", ddc_status);
+			break;
+		}
+
+		ddc_data = HDMI_DDC_DATA_DATA_RW(DDC_READ) |
+				HDMI_DDC_DATA_INDEX(indices[i]) |
+				HDMI_DDC_DATA_INDEX_WRITE;
+
+		hdmi_write(hdmi, REG_HDMI_DDC_DATA, ddc_data);
+
+		/* discard first byte: */
+		hdmi_read(hdmi, REG_HDMI_DDC_DATA);
+
+		for (j = 0; j < p->len; j++) {
+			ddc_data = hdmi_read(hdmi, REG_HDMI_DDC_DATA);
+			p->buf[j] = FIELD(ddc_data, HDMI_DDC_DATA_DATA);
+		}
+	}
+
+	return i;
+}
+
+static u32 hdmi_i2c_func(struct i2c_adapter *adapter)
+{
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
+}
+
+static const struct i2c_algorithm hdmi_i2c_algorithm = {
+	.master_xfer	= hdmi_i2c_xfer,
+	.functionality	= hdmi_i2c_func,
+};
+
+void hdmi_i2c_irq(struct i2c_adapter *i2c)
+{
+	struct hdmi_i2c_adapter *hdmi_i2c = to_hdmi_i2c_adapter(i2c);
+
+	if (sw_done(hdmi_i2c))
+		wake_up_all(&hdmi_i2c->ddc_event);
+}
+
+void hdmi_i2c_destroy(struct i2c_adapter *i2c)
+{
+	struct hdmi_i2c_adapter *hdmi_i2c = to_hdmi_i2c_adapter(i2c);
+	i2c_del_adapter(i2c);
+	kfree(hdmi_i2c);
+}
+
+struct i2c_adapter *hdmi_i2c_init(struct hdmi *hdmi)
+{
+	struct drm_device *dev = hdmi->dev;
+	struct hdmi_i2c_adapter *hdmi_i2c;
+	struct i2c_adapter *i2c = NULL;
+	int ret;
+
+	hdmi_i2c = kzalloc(sizeof(*hdmi_i2c), GFP_KERNEL);
+	if (!hdmi_i2c) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	i2c = &hdmi_i2c->base;
+
+	hdmi_i2c->hdmi = hdmi;
+	init_waitqueue_head(&hdmi_i2c->ddc_event);
+
+
+	i2c->owner = THIS_MODULE;
+	i2c->class = I2C_CLASS_DDC;
+	snprintf(i2c->name, sizeof(i2c->name), "msm hdmi i2c");
+	i2c->dev.parent = &hdmi->pdev->dev;
+	i2c->algo = &hdmi_i2c_algorithm;
+
+	ret = i2c_add_adapter(i2c);
+	if (ret) {
+		dev_err(dev->dev, "failed to register hdmi i2c: %d\n", ret);
+		goto fail;
+	}
+
+	return i2c;
+
+fail:
+	if (i2c)
+		hdmi_i2c_destroy(i2c);
+	return ERR_PTR(ret);
+}
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8960.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8960.c
new file mode 100644
index 0000000..e5b7ed5
--- /dev/null
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8960.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hdmi.h"
+
+struct hdmi_phy_8960 {
+	struct hdmi_phy base;
+	struct hdmi *hdmi;
+};
+#define to_hdmi_phy_8960(x) container_of(x, struct hdmi_phy_8960, base)
+
+static void hdmi_phy_8960_destroy(struct hdmi_phy *phy)
+{
+	struct hdmi_phy_8960 *phy_8960 = to_hdmi_phy_8960(phy);
+	kfree(phy_8960);
+}
+
+static void hdmi_phy_8960_reset(struct hdmi_phy *phy)
+{
+	struct hdmi_phy_8960 *phy_8960 = to_hdmi_phy_8960(phy);
+	struct hdmi *hdmi = phy_8960->hdmi;
+	unsigned int val;
+
+	val = hdmi_read(hdmi, REG_HDMI_PHY_CTRL);
+
+	if (val & HDMI_PHY_CTRL_SW_RESET_LOW) {
+		/* pull low */
+		hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
+				val & ~HDMI_PHY_CTRL_SW_RESET);
+	} else {
+		/* pull high */
+		hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
+				val | HDMI_PHY_CTRL_SW_RESET);
+	}
+
+	if (val & HDMI_PHY_CTRL_SW_RESET_PLL_LOW) {
+		/* pull low */
+		hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
+				val & ~HDMI_PHY_CTRL_SW_RESET_PLL);
+	} else {
+		/* pull high */
+		hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
+				val | HDMI_PHY_CTRL_SW_RESET_PLL);
+	}
+
+	msleep(100);
+
+	if (val & HDMI_PHY_CTRL_SW_RESET_LOW) {
+		/* pull high */
+		hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
+				val | HDMI_PHY_CTRL_SW_RESET);
+	} else {
+		/* pull low */
+		hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
+				val & ~HDMI_PHY_CTRL_SW_RESET);
+	}
+
+	if (val & HDMI_PHY_CTRL_SW_RESET_PLL_LOW) {
+		/* pull high */
+		hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
+				val | HDMI_PHY_CTRL_SW_RESET_PLL);
+	} else {
+		/* pull low */
+		hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
+				val & ~HDMI_PHY_CTRL_SW_RESET_PLL);
+	}
+}
+
+static void hdmi_phy_8960_powerup(struct hdmi_phy *phy,
+		unsigned long int pixclock)
+{
+	struct hdmi_phy_8960 *phy_8960 = to_hdmi_phy_8960(phy);
+	struct hdmi *hdmi = phy_8960->hdmi;
+
+	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG0, 0x1b);
+	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG1, 0xf2);
+	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG4, 0x00);
+	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG5, 0x00);
+	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG6, 0x00);
+	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG7, 0x00);
+	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG8, 0x00);
+	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG9, 0x00);
+	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG10, 0x00);
+	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG11, 0x00);
+	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG3, 0x20);
+}
+
+static void hdmi_phy_8960_powerdown(struct hdmi_phy *phy)
+{
+	struct hdmi_phy_8960 *phy_8960 = to_hdmi_phy_8960(phy);
+	struct hdmi *hdmi = phy_8960->hdmi;
+
+	hdmi_write(hdmi, REG_HDMI_8960_PHY_REG2, 0x7f);
+}
+
+static const struct hdmi_phy_funcs hdmi_phy_8960_funcs = {
+		.destroy = hdmi_phy_8960_destroy,
+		.reset = hdmi_phy_8960_reset,
+		.powerup = hdmi_phy_8960_powerup,
+		.powerdown = hdmi_phy_8960_powerdown,
+};
+
+struct hdmi_phy *hdmi_phy_8960_init(struct hdmi *hdmi)
+{
+	struct hdmi_phy_8960 *phy_8960;
+	struct hdmi_phy *phy = NULL;
+	int ret;
+
+	phy_8960 = kzalloc(sizeof(*phy_8960), GFP_KERNEL);
+	if (!phy_8960) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	phy = &phy_8960->base;
+
+	phy->funcs = &hdmi_phy_8960_funcs;
+
+	phy_8960->hdmi = hdmi;
+
+	return phy;
+
+fail:
+	if (phy)
+		hdmi_phy_8960_destroy(phy);
+	return ERR_PTR(ret);
+}
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_phy_8x60.c b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8x60.c
new file mode 100644
index 0000000..391433c
--- /dev/null
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_phy_8x60.c
@@ -0,0 +1,214 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hdmi.h"
+
+struct hdmi_phy_8x60 {
+	struct hdmi_phy base;
+	struct hdmi *hdmi;
+};
+#define to_hdmi_phy_8x60(x) container_of(x, struct hdmi_phy_8x60, base)
+
+static void hdmi_phy_8x60_destroy(struct hdmi_phy *phy)
+{
+	struct hdmi_phy_8x60 *phy_8x60 = to_hdmi_phy_8x60(phy);
+	kfree(phy_8x60);
+}
+
+static void hdmi_phy_8x60_reset(struct hdmi_phy *phy)
+{
+	struct hdmi_phy_8x60 *phy_8x60 = to_hdmi_phy_8x60(phy);
+	struct hdmi *hdmi = phy_8x60->hdmi;
+	unsigned int val;
+
+	val = hdmi_read(hdmi, REG_HDMI_PHY_CTRL);
+
+	if (val & HDMI_PHY_CTRL_SW_RESET_LOW) {
+		/* pull low */
+		hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
+				val & ~HDMI_PHY_CTRL_SW_RESET);
+	} else {
+		/* pull high */
+		hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
+				val | HDMI_PHY_CTRL_SW_RESET);
+	}
+
+	msleep(100);
+
+	if (val & HDMI_PHY_CTRL_SW_RESET_LOW) {
+		/* pull high */
+		hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
+				val | HDMI_PHY_CTRL_SW_RESET);
+	} else {
+		/* pull low */
+		hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
+				val & ~HDMI_PHY_CTRL_SW_RESET);
+	}
+}
+
+static void hdmi_phy_8x60_powerup(struct hdmi_phy *phy,
+		unsigned long int pixclock)
+{
+	struct hdmi_phy_8x60 *phy_8x60 = to_hdmi_phy_8x60(phy);
+	struct hdmi *hdmi = phy_8x60->hdmi;
+
+	/* De-serializer delay D/C for non-lbk mode: */
+	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG0,
+			HDMI_8x60_PHY_REG0_DESER_DEL_CTRL(3));
+
+	if (pixclock == 27000000) {
+		/* video_format == HDMI_VFRMT_720x480p60_16_9 */
+		hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG1,
+				HDMI_8x60_PHY_REG1_DTEST_MUX_SEL(5) |
+				HDMI_8x60_PHY_REG1_OUTVOL_SWING_CTRL(3));
+	} else {
+		hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG1,
+				HDMI_8x60_PHY_REG1_DTEST_MUX_SEL(5) |
+				HDMI_8x60_PHY_REG1_OUTVOL_SWING_CTRL(4));
+	}
+
+	/* No matter what, start from the power down mode: */
+	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG2,
+			HDMI_8x60_PHY_REG2_PD_PWRGEN |
+			HDMI_8x60_PHY_REG2_PD_PLL |
+			HDMI_8x60_PHY_REG2_PD_DRIVE_4 |
+			HDMI_8x60_PHY_REG2_PD_DRIVE_3 |
+			HDMI_8x60_PHY_REG2_PD_DRIVE_2 |
+			HDMI_8x60_PHY_REG2_PD_DRIVE_1 |
+			HDMI_8x60_PHY_REG2_PD_DESER);
+
+	/* Turn PowerGen on: */
+	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG2,
+			HDMI_8x60_PHY_REG2_PD_PLL |
+			HDMI_8x60_PHY_REG2_PD_DRIVE_4 |
+			HDMI_8x60_PHY_REG2_PD_DRIVE_3 |
+			HDMI_8x60_PHY_REG2_PD_DRIVE_2 |
+			HDMI_8x60_PHY_REG2_PD_DRIVE_1 |
+			HDMI_8x60_PHY_REG2_PD_DESER);
+
+	/* Turn PLL power on: */
+	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG2,
+			HDMI_8x60_PHY_REG2_PD_DRIVE_4 |
+			HDMI_8x60_PHY_REG2_PD_DRIVE_3 |
+			HDMI_8x60_PHY_REG2_PD_DRIVE_2 |
+			HDMI_8x60_PHY_REG2_PD_DRIVE_1 |
+			HDMI_8x60_PHY_REG2_PD_DESER);
+
+	/* Write to HIGH after PLL power down de-assert: */
+	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG3,
+			HDMI_8x60_PHY_REG3_PLL_ENABLE);
+
+	/* ASIC power on; PHY REG9 = 0 */
+	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG9, 0);
+
+	/* Enable PLL lock detect, PLL lock det will go high after lock
+	 * Enable the re-time logic
+	 */
+	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG12,
+			HDMI_8x60_PHY_REG12_RETIMING_EN |
+			HDMI_8x60_PHY_REG12_PLL_LOCK_DETECT_EN);
+
+	/* Drivers are on: */
+	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG2,
+			HDMI_8x60_PHY_REG2_PD_DESER);
+
+	/* If the RX detector is needed: */
+	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG2,
+			HDMI_8x60_PHY_REG2_RCV_SENSE_EN |
+			HDMI_8x60_PHY_REG2_PD_DESER);
+
+	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG4, 0);
+	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG5, 0);
+	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG6, 0);
+	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG7, 0);
+	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG8, 0);
+	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG9, 0);
+	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG10, 0);
+	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG11, 0);
+
+	/* If we want to use lock enable based on counting: */
+	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG12,
+			HDMI_8x60_PHY_REG12_RETIMING_EN |
+			HDMI_8x60_PHY_REG12_PLL_LOCK_DETECT_EN |
+			HDMI_8x60_PHY_REG12_FORCE_LOCK);
+}
+
+static void hdmi_phy_8x60_powerdown(struct hdmi_phy *phy)
+{
+	struct hdmi_phy_8x60 *phy_8x60 = to_hdmi_phy_8x60(phy);
+	struct hdmi *hdmi = phy_8x60->hdmi;
+
+	/* Assert RESET PHY from controller */
+	hdmi_write(hdmi, REG_HDMI_PHY_CTRL,
+			HDMI_PHY_CTRL_SW_RESET);
+	udelay(10);
+	/* De-assert RESET PHY from controller */
+	hdmi_write(hdmi, REG_HDMI_PHY_CTRL, 0);
+	/* Turn off Driver */
+	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG2,
+			HDMI_8x60_PHY_REG2_PD_DRIVE_4 |
+			HDMI_8x60_PHY_REG2_PD_DRIVE_3 |
+			HDMI_8x60_PHY_REG2_PD_DRIVE_2 |
+			HDMI_8x60_PHY_REG2_PD_DRIVE_1 |
+			HDMI_8x60_PHY_REG2_PD_DESER);
+	udelay(10);
+	/* Disable PLL */
+	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG3, 0);
+	/* Power down PHY, but keep RX-sense: */
+	hdmi_write(hdmi, REG_HDMI_8x60_PHY_REG2,
+			HDMI_8x60_PHY_REG2_RCV_SENSE_EN |
+			HDMI_8x60_PHY_REG2_PD_PWRGEN |
+			HDMI_8x60_PHY_REG2_PD_PLL |
+			HDMI_8x60_PHY_REG2_PD_DRIVE_4 |
+			HDMI_8x60_PHY_REG2_PD_DRIVE_3 |
+			HDMI_8x60_PHY_REG2_PD_DRIVE_2 |
+			HDMI_8x60_PHY_REG2_PD_DRIVE_1 |
+			HDMI_8x60_PHY_REG2_PD_DESER);
+}
+
+static const struct hdmi_phy_funcs hdmi_phy_8x60_funcs = {
+		.destroy = hdmi_phy_8x60_destroy,
+		.reset = hdmi_phy_8x60_reset,
+		.powerup = hdmi_phy_8x60_powerup,
+		.powerdown = hdmi_phy_8x60_powerdown,
+};
+
+struct hdmi_phy *hdmi_phy_8x60_init(struct hdmi *hdmi)
+{
+	struct hdmi_phy_8x60 *phy_8x60;
+	struct hdmi_phy *phy = NULL;
+	int ret;
+
+	phy_8x60 = kzalloc(sizeof(*phy_8x60), GFP_KERNEL);
+	if (!phy_8x60) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	phy = &phy_8x60->base;
+
+	phy->funcs = &hdmi_phy_8x60_funcs;
+
+	phy_8x60->hdmi = hdmi;
+
+	return phy;
+
+fail:
+	if (phy)
+		hdmi_phy_8x60_destroy(phy);
+	return ERR_PTR(ret);
+}
diff --git a/drivers/gpu/drm/msm/hdmi/qfprom.xml.h b/drivers/gpu/drm/msm/hdmi/qfprom.xml.h
new file mode 100644
index 0000000..bee3636
--- /dev/null
+++ b/drivers/gpu/drm/msm/hdmi/qfprom.xml.h
@@ -0,0 +1,50 @@
+#ifndef QFPROM_XML
+#define QFPROM_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- /home/robclark/src/freedreno/envytools/rnndb/msm.xml                 (    595 bytes, from 2013-07-05 19:21:12)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2013-03-31 16:51:27)
+- /home/robclark/src/freedreno/envytools/rnndb/mdp4/mdp4.xml           (  19332 bytes, from 2013-08-16 22:16:36)
+- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml             (  11712 bytes, from 2013-08-17 17:13:43)
+- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml            (    344 bytes, from 2013-08-11 19:26:32)
+- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1544 bytes, from 2013-08-16 19:17:05)
+- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml         (    600 bytes, from 2013-07-05 19:21:12)
+- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  19288 bytes, from 2013-08-11 18:14:15)
+
+Copyright (C) 2013 by the following authors:
+- Rob Clark <robdclark@gmail.com> (robclark)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+#define REG_QFPROM_CONFIG_ROW0_LSB				0x00000238
+#define QFPROM_CONFIG_ROW0_LSB_HDMI_DISABLE			0x00200000
+#define QFPROM_CONFIG_ROW0_LSB_HDCP_DISABLE			0x00400000
+
+
+#endif /* QFPROM_XML */
diff --git a/drivers/gpu/drm/msm/mdp4/mdp4.xml.h b/drivers/gpu/drm/msm/mdp4/mdp4.xml.h
new file mode 100644
index 0000000..bbeeebe
--- /dev/null
+++ b/drivers/gpu/drm/msm/mdp4/mdp4.xml.h
@@ -0,0 +1,1061 @@
+#ifndef MDP4_XML
+#define MDP4_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- /home/robclark/src/freedreno/envytools/rnndb/msm.xml                 (    595 bytes, from 2013-07-05 19:21:12)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2013-03-31 16:51:27)
+- /home/robclark/src/freedreno/envytools/rnndb/mdp4/mdp4.xml           (  19332 bytes, from 2013-08-16 22:16:36)
+- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml             (  11712 bytes, from 2013-08-17 17:13:43)
+- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml            (    344 bytes, from 2013-08-11 19:26:32)
+- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1544 bytes, from 2013-08-16 19:17:05)
+- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml         (    600 bytes, from 2013-07-05 19:21:12)
+- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  19288 bytes, from 2013-08-11 18:14:15)
+
+Copyright (C) 2013 by the following authors:
+- Rob Clark <robdclark@gmail.com> (robclark)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+enum mpd4_bpc {
+	BPC1 = 0,
+	BPC5 = 1,
+	BPC6 = 2,
+	BPC8 = 3,
+};
+
+enum mpd4_bpc_alpha {
+	BPC1A = 0,
+	BPC4A = 1,
+	BPC6A = 2,
+	BPC8A = 3,
+};
+
+enum mpd4_alpha_type {
+	FG_CONST = 0,
+	BG_CONST = 1,
+	FG_PIXEL = 2,
+	BG_PIXEL = 3,
+};
+
+enum mpd4_pipe {
+	VG1 = 0,
+	VG2 = 1,
+	RGB1 = 2,
+	RGB2 = 3,
+	RGB3 = 4,
+	VG3 = 5,
+	VG4 = 6,
+};
+
+enum mpd4_mixer {
+	MIXER0 = 0,
+	MIXER1 = 1,
+	MIXER2 = 2,
+};
+
+enum mpd4_mixer_stage_id {
+	STAGE_UNUSED = 0,
+	STAGE_BASE = 1,
+	STAGE0 = 2,
+	STAGE1 = 3,
+	STAGE2 = 4,
+	STAGE3 = 5,
+};
+
+enum mdp4_intf {
+	INTF_LCDC_DTV = 0,
+	INTF_DSI_VIDEO = 1,
+	INTF_DSI_CMD = 2,
+	INTF_EBI2_TV = 3,
+};
+
+enum mdp4_cursor_format {
+	CURSOR_ARGB = 1,
+	CURSOR_XRGB = 2,
+};
+
+enum mdp4_dma {
+	DMA_P = 0,
+	DMA_S = 1,
+	DMA_E = 2,
+};
+
+#define MDP4_IRQ_OVERLAY0_DONE					0x00000001
+#define MDP4_IRQ_OVERLAY1_DONE					0x00000002
+#define MDP4_IRQ_DMA_S_DONE					0x00000004
+#define MDP4_IRQ_DMA_E_DONE					0x00000008
+#define MDP4_IRQ_DMA_P_DONE					0x00000010
+#define MDP4_IRQ_VG1_HISTOGRAM					0x00000020
+#define MDP4_IRQ_VG2_HISTOGRAM					0x00000040
+#define MDP4_IRQ_PRIMARY_VSYNC					0x00000080
+#define MDP4_IRQ_PRIMARY_INTF_UDERRUN				0x00000100
+#define MDP4_IRQ_EXTERNAL_VSYNC					0x00000200
+#define MDP4_IRQ_EXTERNAL_INTF_UDERRUN				0x00000400
+#define MDP4_IRQ_PRIMARY_RDPTR					0x00000800
+#define MDP4_IRQ_DMA_P_HISTOGRAM				0x00020000
+#define MDP4_IRQ_DMA_S_HISTOGRAM				0x04000000
+#define MDP4_IRQ_OVERLAY2_DONE					0x40000000
+#define REG_MDP4_VERSION					0x00000000
+#define MDP4_VERSION_MINOR__MASK				0x00ff0000
+#define MDP4_VERSION_MINOR__SHIFT				16
+static inline uint32_t MDP4_VERSION_MINOR(uint32_t val)
+{
+	return ((val) << MDP4_VERSION_MINOR__SHIFT) & MDP4_VERSION_MINOR__MASK;
+}
+#define MDP4_VERSION_MAJOR__MASK				0xff000000
+#define MDP4_VERSION_MAJOR__SHIFT				24
+static inline uint32_t MDP4_VERSION_MAJOR(uint32_t val)
+{
+	return ((val) << MDP4_VERSION_MAJOR__SHIFT) & MDP4_VERSION_MAJOR__MASK;
+}
+
+#define REG_MDP4_OVLP0_KICK					0x00000004
+
+#define REG_MDP4_OVLP1_KICK					0x00000008
+
+#define REG_MDP4_OVLP2_KICK					0x000000d0
+
+#define REG_MDP4_DMA_P_KICK					0x0000000c
+
+#define REG_MDP4_DMA_S_KICK					0x00000010
+
+#define REG_MDP4_DMA_E_KICK					0x00000014
+
+#define REG_MDP4_DISP_STATUS					0x00000018
+
+#define REG_MDP4_DISP_INTF_SEL					0x00000038
+#define MDP4_DISP_INTF_SEL_PRIM__MASK				0x00000003
+#define MDP4_DISP_INTF_SEL_PRIM__SHIFT				0
+static inline uint32_t MDP4_DISP_INTF_SEL_PRIM(enum mdp4_intf val)
+{
+	return ((val) << MDP4_DISP_INTF_SEL_PRIM__SHIFT) & MDP4_DISP_INTF_SEL_PRIM__MASK;
+}
+#define MDP4_DISP_INTF_SEL_SEC__MASK				0x0000000c
+#define MDP4_DISP_INTF_SEL_SEC__SHIFT				2
+static inline uint32_t MDP4_DISP_INTF_SEL_SEC(enum mdp4_intf val)
+{
+	return ((val) << MDP4_DISP_INTF_SEL_SEC__SHIFT) & MDP4_DISP_INTF_SEL_SEC__MASK;
+}
+#define MDP4_DISP_INTF_SEL_EXT__MASK				0x00000030
+#define MDP4_DISP_INTF_SEL_EXT__SHIFT				4
+static inline uint32_t MDP4_DISP_INTF_SEL_EXT(enum mdp4_intf val)
+{
+	return ((val) << MDP4_DISP_INTF_SEL_EXT__SHIFT) & MDP4_DISP_INTF_SEL_EXT__MASK;
+}
+#define MDP4_DISP_INTF_SEL_DSI_VIDEO				0x00000040
+#define MDP4_DISP_INTF_SEL_DSI_CMD				0x00000080
+
+#define REG_MDP4_RESET_STATUS					0x0000003c
+
+#define REG_MDP4_READ_CNFG					0x0000004c
+
+#define REG_MDP4_INTR_ENABLE					0x00000050
+
+#define REG_MDP4_INTR_STATUS					0x00000054
+
+#define REG_MDP4_INTR_CLEAR					0x00000058
+
+#define REG_MDP4_EBI2_LCD0					0x00000060
+
+#define REG_MDP4_EBI2_LCD1					0x00000064
+
+#define REG_MDP4_PORTMAP_MODE					0x00000070
+
+#define REG_MDP4_CS_CONTROLLER0					0x000000c0
+
+#define REG_MDP4_CS_CONTROLLER1					0x000000c4
+
+#define REG_MDP4_LAYERMIXER2_IN_CFG				0x000100f0
+#define MDP4_LAYERMIXER2_IN_CFG_PIPE0__MASK			0x00000007
+#define MDP4_LAYERMIXER2_IN_CFG_PIPE0__SHIFT			0
+static inline uint32_t MDP4_LAYERMIXER2_IN_CFG_PIPE0(enum mpd4_mixer_stage_id val)
+{
+	return ((val) << MDP4_LAYERMIXER2_IN_CFG_PIPE0__SHIFT) & MDP4_LAYERMIXER2_IN_CFG_PIPE0__MASK;
+}
+#define MDP4_LAYERMIXER2_IN_CFG_PIPE0_MIXER1			0x00000008
+#define MDP4_LAYERMIXER2_IN_CFG_PIPE1__MASK			0x00000070
+#define MDP4_LAYERMIXER2_IN_CFG_PIPE1__SHIFT			4
+static inline uint32_t MDP4_LAYERMIXER2_IN_CFG_PIPE1(enum mpd4_mixer_stage_id val)
+{
+	return ((val) << MDP4_LAYERMIXER2_IN_CFG_PIPE1__SHIFT) & MDP4_LAYERMIXER2_IN_CFG_PIPE1__MASK;
+}
+#define MDP4_LAYERMIXER2_IN_CFG_PIPE1_MIXER1			0x00000080
+#define MDP4_LAYERMIXER2_IN_CFG_PIPE2__MASK			0x00000700
+#define MDP4_LAYERMIXER2_IN_CFG_PIPE2__SHIFT			8
+static inline uint32_t MDP4_LAYERMIXER2_IN_CFG_PIPE2(enum mpd4_mixer_stage_id val)
+{
+	return ((val) << MDP4_LAYERMIXER2_IN_CFG_PIPE2__SHIFT) & MDP4_LAYERMIXER2_IN_CFG_PIPE2__MASK;
+}
+#define MDP4_LAYERMIXER2_IN_CFG_PIPE2_MIXER1			0x00000800
+#define MDP4_LAYERMIXER2_IN_CFG_PIPE3__MASK			0x00007000
+#define MDP4_LAYERMIXER2_IN_CFG_PIPE3__SHIFT			12
+static inline uint32_t MDP4_LAYERMIXER2_IN_CFG_PIPE3(enum mpd4_mixer_stage_id val)
+{
+	return ((val) << MDP4_LAYERMIXER2_IN_CFG_PIPE3__SHIFT) & MDP4_LAYERMIXER2_IN_CFG_PIPE3__MASK;
+}
+#define MDP4_LAYERMIXER2_IN_CFG_PIPE3_MIXER1			0x00008000
+#define MDP4_LAYERMIXER2_IN_CFG_PIPE4__MASK			0x00070000
+#define MDP4_LAYERMIXER2_IN_CFG_PIPE4__SHIFT			16
+static inline uint32_t MDP4_LAYERMIXER2_IN_CFG_PIPE4(enum mpd4_mixer_stage_id val)
+{
+	return ((val) << MDP4_LAYERMIXER2_IN_CFG_PIPE4__SHIFT) & MDP4_LAYERMIXER2_IN_CFG_PIPE4__MASK;
+}
+#define MDP4_LAYERMIXER2_IN_CFG_PIPE4_MIXER1			0x00080000
+#define MDP4_LAYERMIXER2_IN_CFG_PIPE5__MASK			0x00700000
+#define MDP4_LAYERMIXER2_IN_CFG_PIPE5__SHIFT			20
+static inline uint32_t MDP4_LAYERMIXER2_IN_CFG_PIPE5(enum mpd4_mixer_stage_id val)
+{
+	return ((val) << MDP4_LAYERMIXER2_IN_CFG_PIPE5__SHIFT) & MDP4_LAYERMIXER2_IN_CFG_PIPE5__MASK;
+}
+#define MDP4_LAYERMIXER2_IN_CFG_PIPE5_MIXER1			0x00800000
+#define MDP4_LAYERMIXER2_IN_CFG_PIPE6__MASK			0x07000000
+#define MDP4_LAYERMIXER2_IN_CFG_PIPE6__SHIFT			24
+static inline uint32_t MDP4_LAYERMIXER2_IN_CFG_PIPE6(enum mpd4_mixer_stage_id val)
+{
+	return ((val) << MDP4_LAYERMIXER2_IN_CFG_PIPE6__SHIFT) & MDP4_LAYERMIXER2_IN_CFG_PIPE6__MASK;
+}
+#define MDP4_LAYERMIXER2_IN_CFG_PIPE6_MIXER1			0x08000000
+#define MDP4_LAYERMIXER2_IN_CFG_PIPE7__MASK			0x70000000
+#define MDP4_LAYERMIXER2_IN_CFG_PIPE7__SHIFT			28
+static inline uint32_t MDP4_LAYERMIXER2_IN_CFG_PIPE7(enum mpd4_mixer_stage_id val)
+{
+	return ((val) << MDP4_LAYERMIXER2_IN_CFG_PIPE7__SHIFT) & MDP4_LAYERMIXER2_IN_CFG_PIPE7__MASK;
+}
+#define MDP4_LAYERMIXER2_IN_CFG_PIPE7_MIXER1			0x80000000
+
+#define REG_MDP4_LAYERMIXER_IN_CFG_UPDATE_METHOD		0x000100fc
+
+#define REG_MDP4_LAYERMIXER_IN_CFG				0x00010100
+#define MDP4_LAYERMIXER_IN_CFG_PIPE0__MASK			0x00000007
+#define MDP4_LAYERMIXER_IN_CFG_PIPE0__SHIFT			0
+static inline uint32_t MDP4_LAYERMIXER_IN_CFG_PIPE0(enum mpd4_mixer_stage_id val)
+{
+	return ((val) << MDP4_LAYERMIXER_IN_CFG_PIPE0__SHIFT) & MDP4_LAYERMIXER_IN_CFG_PIPE0__MASK;
+}
+#define MDP4_LAYERMIXER_IN_CFG_PIPE0_MIXER1			0x00000008
+#define MDP4_LAYERMIXER_IN_CFG_PIPE1__MASK			0x00000070
+#define MDP4_LAYERMIXER_IN_CFG_PIPE1__SHIFT			4
+static inline uint32_t MDP4_LAYERMIXER_IN_CFG_PIPE1(enum mpd4_mixer_stage_id val)
+{
+	return ((val) << MDP4_LAYERMIXER_IN_CFG_PIPE1__SHIFT) & MDP4_LAYERMIXER_IN_CFG_PIPE1__MASK;
+}
+#define MDP4_LAYERMIXER_IN_CFG_PIPE1_MIXER1			0x00000080
+#define MDP4_LAYERMIXER_IN_CFG_PIPE2__MASK			0x00000700
+#define MDP4_LAYERMIXER_IN_CFG_PIPE2__SHIFT			8
+static inline uint32_t MDP4_LAYERMIXER_IN_CFG_PIPE2(enum mpd4_mixer_stage_id val)
+{
+	return ((val) << MDP4_LAYERMIXER_IN_CFG_PIPE2__SHIFT) & MDP4_LAYERMIXER_IN_CFG_PIPE2__MASK;
+}
+#define MDP4_LAYERMIXER_IN_CFG_PIPE2_MIXER1			0x00000800
+#define MDP4_LAYERMIXER_IN_CFG_PIPE3__MASK			0x00007000
+#define MDP4_LAYERMIXER_IN_CFG_PIPE3__SHIFT			12
+static inline uint32_t MDP4_LAYERMIXER_IN_CFG_PIPE3(enum mpd4_mixer_stage_id val)
+{
+	return ((val) << MDP4_LAYERMIXER_IN_CFG_PIPE3__SHIFT) & MDP4_LAYERMIXER_IN_CFG_PIPE3__MASK;
+}
+#define MDP4_LAYERMIXER_IN_CFG_PIPE3_MIXER1			0x00008000
+#define MDP4_LAYERMIXER_IN_CFG_PIPE4__MASK			0x00070000
+#define MDP4_LAYERMIXER_IN_CFG_PIPE4__SHIFT			16
+static inline uint32_t MDP4_LAYERMIXER_IN_CFG_PIPE4(enum mpd4_mixer_stage_id val)
+{
+	return ((val) << MDP4_LAYERMIXER_IN_CFG_PIPE4__SHIFT) & MDP4_LAYERMIXER_IN_CFG_PIPE4__MASK;
+}
+#define MDP4_LAYERMIXER_IN_CFG_PIPE4_MIXER1			0x00080000
+#define MDP4_LAYERMIXER_IN_CFG_PIPE5__MASK			0x00700000
+#define MDP4_LAYERMIXER_IN_CFG_PIPE5__SHIFT			20
+static inline uint32_t MDP4_LAYERMIXER_IN_CFG_PIPE5(enum mpd4_mixer_stage_id val)
+{
+	return ((val) << MDP4_LAYERMIXER_IN_CFG_PIPE5__SHIFT) & MDP4_LAYERMIXER_IN_CFG_PIPE5__MASK;
+}
+#define MDP4_LAYERMIXER_IN_CFG_PIPE5_MIXER1			0x00800000
+#define MDP4_LAYERMIXER_IN_CFG_PIPE6__MASK			0x07000000
+#define MDP4_LAYERMIXER_IN_CFG_PIPE6__SHIFT			24
+static inline uint32_t MDP4_LAYERMIXER_IN_CFG_PIPE6(enum mpd4_mixer_stage_id val)
+{
+	return ((val) << MDP4_LAYERMIXER_IN_CFG_PIPE6__SHIFT) & MDP4_LAYERMIXER_IN_CFG_PIPE6__MASK;
+}
+#define MDP4_LAYERMIXER_IN_CFG_PIPE6_MIXER1			0x08000000
+#define MDP4_LAYERMIXER_IN_CFG_PIPE7__MASK			0x70000000
+#define MDP4_LAYERMIXER_IN_CFG_PIPE7__SHIFT			28
+static inline uint32_t MDP4_LAYERMIXER_IN_CFG_PIPE7(enum mpd4_mixer_stage_id val)
+{
+	return ((val) << MDP4_LAYERMIXER_IN_CFG_PIPE7__SHIFT) & MDP4_LAYERMIXER_IN_CFG_PIPE7__MASK;
+}
+#define MDP4_LAYERMIXER_IN_CFG_PIPE7_MIXER1			0x80000000
+
+#define REG_MDP4_VG2_SRC_FORMAT					0x00030050
+
+#define REG_MDP4_VG2_CONST_COLOR				0x00031008
+
+#define REG_MDP4_OVERLAY_FLUSH					0x00018000
+#define MDP4_OVERLAY_FLUSH_OVLP0				0x00000001
+#define MDP4_OVERLAY_FLUSH_OVLP1				0x00000002
+#define MDP4_OVERLAY_FLUSH_VG1					0x00000004
+#define MDP4_OVERLAY_FLUSH_VG2					0x00000008
+#define MDP4_OVERLAY_FLUSH_RGB1					0x00000010
+#define MDP4_OVERLAY_FLUSH_RGB2					0x00000020
+
+static inline uint32_t __offset_OVLP(uint32_t idx)
+{
+	switch (idx) {
+		case 0: return 0x00010000;
+		case 1: return 0x00018000;
+		case 2: return 0x00088000;
+		default: return INVALID_IDX(idx);
+	}
+}
+static inline uint32_t REG_MDP4_OVLP(uint32_t i0) { return 0x00000000 + __offset_OVLP(i0); }
+
+static inline uint32_t REG_MDP4_OVLP_CFG(uint32_t i0) { return 0x00000004 + __offset_OVLP(i0); }
+
+static inline uint32_t REG_MDP4_OVLP_SIZE(uint32_t i0) { return 0x00000008 + __offset_OVLP(i0); }
+#define MDP4_OVLP_SIZE_HEIGHT__MASK				0xffff0000
+#define MDP4_OVLP_SIZE_HEIGHT__SHIFT				16
+static inline uint32_t MDP4_OVLP_SIZE_HEIGHT(uint32_t val)
+{
+	return ((val) << MDP4_OVLP_SIZE_HEIGHT__SHIFT) & MDP4_OVLP_SIZE_HEIGHT__MASK;
+}
+#define MDP4_OVLP_SIZE_WIDTH__MASK				0x0000ffff
+#define MDP4_OVLP_SIZE_WIDTH__SHIFT				0
+static inline uint32_t MDP4_OVLP_SIZE_WIDTH(uint32_t val)
+{
+	return ((val) << MDP4_OVLP_SIZE_WIDTH__SHIFT) & MDP4_OVLP_SIZE_WIDTH__MASK;
+}
+
+static inline uint32_t REG_MDP4_OVLP_BASE(uint32_t i0) { return 0x0000000c + __offset_OVLP(i0); }
+
+static inline uint32_t REG_MDP4_OVLP_STRIDE(uint32_t i0) { return 0x00000010 + __offset_OVLP(i0); }
+
+static inline uint32_t REG_MDP4_OVLP_OPMODE(uint32_t i0) { return 0x00000014 + __offset_OVLP(i0); }
+
+static inline uint32_t __offset_STAGE(uint32_t idx)
+{
+	switch (idx) {
+		case 0: return 0x00000104;
+		case 1: return 0x00000124;
+		case 2: return 0x00000144;
+		case 3: return 0x00000160;
+		default: return INVALID_IDX(idx);
+	}
+}
+static inline uint32_t REG_MDP4_OVLP_STAGE(uint32_t i0, uint32_t i1) { return 0x00000000 + __offset_OVLP(i0) + __offset_STAGE(i1); }
+
+static inline uint32_t REG_MDP4_OVLP_STAGE_OP(uint32_t i0, uint32_t i1) { return 0x00000000 + __offset_OVLP(i0) + __offset_STAGE(i1); }
+#define MDP4_OVLP_STAGE_OP_FG_ALPHA__MASK			0x00000003
+#define MDP4_OVLP_STAGE_OP_FG_ALPHA__SHIFT			0
+static inline uint32_t MDP4_OVLP_STAGE_OP_FG_ALPHA(enum mpd4_alpha_type val)
+{
+	return ((val) << MDP4_OVLP_STAGE_OP_FG_ALPHA__SHIFT) & MDP4_OVLP_STAGE_OP_FG_ALPHA__MASK;
+}
+#define MDP4_OVLP_STAGE_OP_FG_INV_ALPHA				0x00000004
+#define MDP4_OVLP_STAGE_OP_FG_MOD_ALPHA				0x00000008
+#define MDP4_OVLP_STAGE_OP_BG_ALPHA__MASK			0x00000030
+#define MDP4_OVLP_STAGE_OP_BG_ALPHA__SHIFT			4
+static inline uint32_t MDP4_OVLP_STAGE_OP_BG_ALPHA(enum mpd4_alpha_type val)
+{
+	return ((val) << MDP4_OVLP_STAGE_OP_BG_ALPHA__SHIFT) & MDP4_OVLP_STAGE_OP_BG_ALPHA__MASK;
+}
+#define MDP4_OVLP_STAGE_OP_BG_INV_ALPHA				0x00000040
+#define MDP4_OVLP_STAGE_OP_BG_MOD_ALPHA				0x00000080
+#define MDP4_OVLP_STAGE_OP_FG_TRANSP				0x00000100
+#define MDP4_OVLP_STAGE_OP_BG_TRANSP				0x00000200
+
+static inline uint32_t REG_MDP4_OVLP_STAGE_FG_ALPHA(uint32_t i0, uint32_t i1) { return 0x00000004 + __offset_OVLP(i0) + __offset_STAGE(i1); }
+
+static inline uint32_t REG_MDP4_OVLP_STAGE_BG_ALPHA(uint32_t i0, uint32_t i1) { return 0x00000008 + __offset_OVLP(i0) + __offset_STAGE(i1); }
+
+static inline uint32_t REG_MDP4_OVLP_STAGE_TRANSP_LOW0(uint32_t i0, uint32_t i1) { return 0x0000000c + __offset_OVLP(i0) + __offset_STAGE(i1); }
+
+static inline uint32_t REG_MDP4_OVLP_STAGE_TRANSP_LOW1(uint32_t i0, uint32_t i1) { return 0x00000010 + __offset_OVLP(i0) + __offset_STAGE(i1); }
+
+static inline uint32_t REG_MDP4_OVLP_STAGE_TRANSP_HIGH0(uint32_t i0, uint32_t i1) { return 0x00000014 + __offset_OVLP(i0) + __offset_STAGE(i1); }
+
+static inline uint32_t REG_MDP4_OVLP_STAGE_TRANSP_HIGH1(uint32_t i0, uint32_t i1) { return 0x00000018 + __offset_OVLP(i0) + __offset_STAGE(i1); }
+
+static inline uint32_t __offset_STAGE_CO3(uint32_t idx)
+{
+	switch (idx) {
+		case 0: return 0x00001004;
+		case 1: return 0x00001404;
+		case 2: return 0x00001804;
+		case 3: return 0x00001b84;
+		default: return INVALID_IDX(idx);
+	}
+}
+static inline uint32_t REG_MDP4_OVLP_STAGE_CO3(uint32_t i0, uint32_t i1) { return 0x00000000 + __offset_OVLP(i0) + __offset_STAGE_CO3(i1); }
+
+static inline uint32_t REG_MDP4_OVLP_STAGE_CO3_SEL(uint32_t i0, uint32_t i1) { return 0x00000000 + __offset_OVLP(i0) + __offset_STAGE_CO3(i1); }
+#define MDP4_OVLP_STAGE_CO3_SEL_FG_ALPHA			0x00000001
+
+static inline uint32_t REG_MDP4_OVLP_TRANSP_LOW0(uint32_t i0) { return 0x00000180 + __offset_OVLP(i0); }
+
+static inline uint32_t REG_MDP4_OVLP_TRANSP_LOW1(uint32_t i0) { return 0x00000184 + __offset_OVLP(i0); }
+
+static inline uint32_t REG_MDP4_OVLP_TRANSP_HIGH0(uint32_t i0) { return 0x00000188 + __offset_OVLP(i0); }
+
+static inline uint32_t REG_MDP4_OVLP_TRANSP_HIGH1(uint32_t i0) { return 0x0000018c + __offset_OVLP(i0); }
+
+static inline uint32_t REG_MDP4_OVLP_CSC_CONFIG(uint32_t i0) { return 0x00000200 + __offset_OVLP(i0); }
+
+static inline uint32_t REG_MDP4_OVLP_CSC(uint32_t i0) { return 0x00002000 + __offset_OVLP(i0); }
+
+
+static inline uint32_t REG_MDP4_OVLP_CSC_MV(uint32_t i0, uint32_t i1) { return 0x00002400 + __offset_OVLP(i0) + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_OVLP_CSC_MV_VAL(uint32_t i0, uint32_t i1) { return 0x00002400 + __offset_OVLP(i0) + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_OVLP_CSC_PRE_BV(uint32_t i0, uint32_t i1) { return 0x00002500 + __offset_OVLP(i0) + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_OVLP_CSC_PRE_BV_VAL(uint32_t i0, uint32_t i1) { return 0x00002500 + __offset_OVLP(i0) + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_OVLP_CSC_POST_BV(uint32_t i0, uint32_t i1) { return 0x00002580 + __offset_OVLP(i0) + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_OVLP_CSC_POST_BV_VAL(uint32_t i0, uint32_t i1) { return 0x00002580 + __offset_OVLP(i0) + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_OVLP_CSC_PRE_LV(uint32_t i0, uint32_t i1) { return 0x00002600 + __offset_OVLP(i0) + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_OVLP_CSC_PRE_LV_VAL(uint32_t i0, uint32_t i1) { return 0x00002600 + __offset_OVLP(i0) + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_OVLP_CSC_POST_LV(uint32_t i0, uint32_t i1) { return 0x00002680 + __offset_OVLP(i0) + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_OVLP_CSC_POST_LV_VAL(uint32_t i0, uint32_t i1) { return 0x00002680 + __offset_OVLP(i0) + 0x4*i1; }
+
+#define REG_MDP4_DMA_P_OP_MODE					0x00090070
+
+static inline uint32_t REG_MDP4_LUTN(uint32_t i0) { return 0x00094800 + 0x400*i0; }
+
+static inline uint32_t REG_MDP4_LUTN_LUT(uint32_t i0, uint32_t i1) { return 0x00094800 + 0x400*i0 + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_LUTN_LUT_VAL(uint32_t i0, uint32_t i1) { return 0x00094800 + 0x400*i0 + 0x4*i1; }
+
+#define REG_MDP4_DMA_S_OP_MODE					0x000a0028
+
+static inline uint32_t REG_MDP4_DMA_E_QUANT(uint32_t i0) { return 0x000b0070 + 0x4*i0; }
+
+static inline uint32_t __offset_DMA(enum mdp4_dma idx)
+{
+	switch (idx) {
+		case DMA_P: return 0x00090000;
+		case DMA_S: return 0x000a0000;
+		case DMA_E: return 0x000b0000;
+		default: return INVALID_IDX(idx);
+	}
+}
+static inline uint32_t REG_MDP4_DMA(enum mdp4_dma i0) { return 0x00000000 + __offset_DMA(i0); }
+
+static inline uint32_t REG_MDP4_DMA_CONFIG(enum mdp4_dma i0) { return 0x00000000 + __offset_DMA(i0); }
+#define MDP4_DMA_CONFIG_G_BPC__MASK				0x00000003
+#define MDP4_DMA_CONFIG_G_BPC__SHIFT				0
+static inline uint32_t MDP4_DMA_CONFIG_G_BPC(enum mpd4_bpc val)
+{
+	return ((val) << MDP4_DMA_CONFIG_G_BPC__SHIFT) & MDP4_DMA_CONFIG_G_BPC__MASK;
+}
+#define MDP4_DMA_CONFIG_B_BPC__MASK				0x0000000c
+#define MDP4_DMA_CONFIG_B_BPC__SHIFT				2
+static inline uint32_t MDP4_DMA_CONFIG_B_BPC(enum mpd4_bpc val)
+{
+	return ((val) << MDP4_DMA_CONFIG_B_BPC__SHIFT) & MDP4_DMA_CONFIG_B_BPC__MASK;
+}
+#define MDP4_DMA_CONFIG_R_BPC__MASK				0x00000030
+#define MDP4_DMA_CONFIG_R_BPC__SHIFT				4
+static inline uint32_t MDP4_DMA_CONFIG_R_BPC(enum mpd4_bpc val)
+{
+	return ((val) << MDP4_DMA_CONFIG_R_BPC__SHIFT) & MDP4_DMA_CONFIG_R_BPC__MASK;
+}
+#define MDP4_DMA_CONFIG_PACK_ALIGN_MSB				0x00000080
+#define MDP4_DMA_CONFIG_PACK__MASK				0x0000ff00
+#define MDP4_DMA_CONFIG_PACK__SHIFT				8
+static inline uint32_t MDP4_DMA_CONFIG_PACK(uint32_t val)
+{
+	return ((val) << MDP4_DMA_CONFIG_PACK__SHIFT) & MDP4_DMA_CONFIG_PACK__MASK;
+}
+#define MDP4_DMA_CONFIG_DEFLKR_EN				0x01000000
+#define MDP4_DMA_CONFIG_DITHER_EN				0x01000000
+
+static inline uint32_t REG_MDP4_DMA_SRC_SIZE(enum mdp4_dma i0) { return 0x00000004 + __offset_DMA(i0); }
+#define MDP4_DMA_SRC_SIZE_HEIGHT__MASK				0xffff0000
+#define MDP4_DMA_SRC_SIZE_HEIGHT__SHIFT				16
+static inline uint32_t MDP4_DMA_SRC_SIZE_HEIGHT(uint32_t val)
+{
+	return ((val) << MDP4_DMA_SRC_SIZE_HEIGHT__SHIFT) & MDP4_DMA_SRC_SIZE_HEIGHT__MASK;
+}
+#define MDP4_DMA_SRC_SIZE_WIDTH__MASK				0x0000ffff
+#define MDP4_DMA_SRC_SIZE_WIDTH__SHIFT				0
+static inline uint32_t MDP4_DMA_SRC_SIZE_WIDTH(uint32_t val)
+{
+	return ((val) << MDP4_DMA_SRC_SIZE_WIDTH__SHIFT) & MDP4_DMA_SRC_SIZE_WIDTH__MASK;
+}
+
+static inline uint32_t REG_MDP4_DMA_SRC_BASE(enum mdp4_dma i0) { return 0x00000008 + __offset_DMA(i0); }
+
+static inline uint32_t REG_MDP4_DMA_SRC_STRIDE(enum mdp4_dma i0) { return 0x0000000c + __offset_DMA(i0); }
+
+static inline uint32_t REG_MDP4_DMA_DST_SIZE(enum mdp4_dma i0) { return 0x00000010 + __offset_DMA(i0); }
+#define MDP4_DMA_DST_SIZE_HEIGHT__MASK				0xffff0000
+#define MDP4_DMA_DST_SIZE_HEIGHT__SHIFT				16
+static inline uint32_t MDP4_DMA_DST_SIZE_HEIGHT(uint32_t val)
+{
+	return ((val) << MDP4_DMA_DST_SIZE_HEIGHT__SHIFT) & MDP4_DMA_DST_SIZE_HEIGHT__MASK;
+}
+#define MDP4_DMA_DST_SIZE_WIDTH__MASK				0x0000ffff
+#define MDP4_DMA_DST_SIZE_WIDTH__SHIFT				0
+static inline uint32_t MDP4_DMA_DST_SIZE_WIDTH(uint32_t val)
+{
+	return ((val) << MDP4_DMA_DST_SIZE_WIDTH__SHIFT) & MDP4_DMA_DST_SIZE_WIDTH__MASK;
+}
+
+static inline uint32_t REG_MDP4_DMA_CURSOR_SIZE(enum mdp4_dma i0) { return 0x00000044 + __offset_DMA(i0); }
+#define MDP4_DMA_CURSOR_SIZE_WIDTH__MASK			0x0000007f
+#define MDP4_DMA_CURSOR_SIZE_WIDTH__SHIFT			0
+static inline uint32_t MDP4_DMA_CURSOR_SIZE_WIDTH(uint32_t val)
+{
+	return ((val) << MDP4_DMA_CURSOR_SIZE_WIDTH__SHIFT) & MDP4_DMA_CURSOR_SIZE_WIDTH__MASK;
+}
+#define MDP4_DMA_CURSOR_SIZE_HEIGHT__MASK			0x007f0000
+#define MDP4_DMA_CURSOR_SIZE_HEIGHT__SHIFT			16
+static inline uint32_t MDP4_DMA_CURSOR_SIZE_HEIGHT(uint32_t val)
+{
+	return ((val) << MDP4_DMA_CURSOR_SIZE_HEIGHT__SHIFT) & MDP4_DMA_CURSOR_SIZE_HEIGHT__MASK;
+}
+
+static inline uint32_t REG_MDP4_DMA_CURSOR_BASE(enum mdp4_dma i0) { return 0x00000048 + __offset_DMA(i0); }
+
+static inline uint32_t REG_MDP4_DMA_CURSOR_POS(enum mdp4_dma i0) { return 0x0000004c + __offset_DMA(i0); }
+#define MDP4_DMA_CURSOR_POS_X__MASK				0x0000ffff
+#define MDP4_DMA_CURSOR_POS_X__SHIFT				0
+static inline uint32_t MDP4_DMA_CURSOR_POS_X(uint32_t val)
+{
+	return ((val) << MDP4_DMA_CURSOR_POS_X__SHIFT) & MDP4_DMA_CURSOR_POS_X__MASK;
+}
+#define MDP4_DMA_CURSOR_POS_Y__MASK				0xffff0000
+#define MDP4_DMA_CURSOR_POS_Y__SHIFT				16
+static inline uint32_t MDP4_DMA_CURSOR_POS_Y(uint32_t val)
+{
+	return ((val) << MDP4_DMA_CURSOR_POS_Y__SHIFT) & MDP4_DMA_CURSOR_POS_Y__MASK;
+}
+
+static inline uint32_t REG_MDP4_DMA_CURSOR_BLEND_CONFIG(enum mdp4_dma i0) { return 0x00000060 + __offset_DMA(i0); }
+#define MDP4_DMA_CURSOR_BLEND_CONFIG_CURSOR_EN			0x00000001
+#define MDP4_DMA_CURSOR_BLEND_CONFIG_FORMAT__MASK		0x00000006
+#define MDP4_DMA_CURSOR_BLEND_CONFIG_FORMAT__SHIFT		1
+static inline uint32_t MDP4_DMA_CURSOR_BLEND_CONFIG_FORMAT(enum mdp4_cursor_format val)
+{
+	return ((val) << MDP4_DMA_CURSOR_BLEND_CONFIG_FORMAT__SHIFT) & MDP4_DMA_CURSOR_BLEND_CONFIG_FORMAT__MASK;
+}
+#define MDP4_DMA_CURSOR_BLEND_CONFIG_TRANSP_EN			0x00000008
+
+static inline uint32_t REG_MDP4_DMA_CURSOR_BLEND_PARAM(enum mdp4_dma i0) { return 0x00000064 + __offset_DMA(i0); }
+
+static inline uint32_t REG_MDP4_DMA_BLEND_TRANS_LOW(enum mdp4_dma i0) { return 0x00000068 + __offset_DMA(i0); }
+
+static inline uint32_t REG_MDP4_DMA_BLEND_TRANS_HIGH(enum mdp4_dma i0) { return 0x0000006c + __offset_DMA(i0); }
+
+static inline uint32_t REG_MDP4_DMA_FETCH_CONFIG(enum mdp4_dma i0) { return 0x00001004 + __offset_DMA(i0); }
+
+static inline uint32_t REG_MDP4_DMA_CSC(enum mdp4_dma i0) { return 0x00003000 + __offset_DMA(i0); }
+
+
+static inline uint32_t REG_MDP4_DMA_CSC_MV(enum mdp4_dma i0, uint32_t i1) { return 0x00003400 + __offset_DMA(i0) + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_DMA_CSC_MV_VAL(enum mdp4_dma i0, uint32_t i1) { return 0x00003400 + __offset_DMA(i0) + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_DMA_CSC_PRE_BV(enum mdp4_dma i0, uint32_t i1) { return 0x00003500 + __offset_DMA(i0) + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_DMA_CSC_PRE_BV_VAL(enum mdp4_dma i0, uint32_t i1) { return 0x00003500 + __offset_DMA(i0) + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_DMA_CSC_POST_BV(enum mdp4_dma i0, uint32_t i1) { return 0x00003580 + __offset_DMA(i0) + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_DMA_CSC_POST_BV_VAL(enum mdp4_dma i0, uint32_t i1) { return 0x00003580 + __offset_DMA(i0) + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_DMA_CSC_PRE_LV(enum mdp4_dma i0, uint32_t i1) { return 0x00003600 + __offset_DMA(i0) + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_DMA_CSC_PRE_LV_VAL(enum mdp4_dma i0, uint32_t i1) { return 0x00003600 + __offset_DMA(i0) + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_DMA_CSC_POST_LV(enum mdp4_dma i0, uint32_t i1) { return 0x00003680 + __offset_DMA(i0) + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_DMA_CSC_POST_LV_VAL(enum mdp4_dma i0, uint32_t i1) { return 0x00003680 + __offset_DMA(i0) + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_PIPE(enum mpd4_pipe i0) { return 0x00020000 + 0x10000*i0; }
+
+static inline uint32_t REG_MDP4_PIPE_SRC_SIZE(enum mpd4_pipe i0) { return 0x00020000 + 0x10000*i0; }
+#define MDP4_PIPE_SRC_SIZE_HEIGHT__MASK				0xffff0000
+#define MDP4_PIPE_SRC_SIZE_HEIGHT__SHIFT			16
+static inline uint32_t MDP4_PIPE_SRC_SIZE_HEIGHT(uint32_t val)
+{
+	return ((val) << MDP4_PIPE_SRC_SIZE_HEIGHT__SHIFT) & MDP4_PIPE_SRC_SIZE_HEIGHT__MASK;
+}
+#define MDP4_PIPE_SRC_SIZE_WIDTH__MASK				0x0000ffff
+#define MDP4_PIPE_SRC_SIZE_WIDTH__SHIFT				0
+static inline uint32_t MDP4_PIPE_SRC_SIZE_WIDTH(uint32_t val)
+{
+	return ((val) << MDP4_PIPE_SRC_SIZE_WIDTH__SHIFT) & MDP4_PIPE_SRC_SIZE_WIDTH__MASK;
+}
+
+static inline uint32_t REG_MDP4_PIPE_SRC_XY(enum mpd4_pipe i0) { return 0x00020004 + 0x10000*i0; }
+#define MDP4_PIPE_SRC_XY_Y__MASK				0xffff0000
+#define MDP4_PIPE_SRC_XY_Y__SHIFT				16
+static inline uint32_t MDP4_PIPE_SRC_XY_Y(uint32_t val)
+{
+	return ((val) << MDP4_PIPE_SRC_XY_Y__SHIFT) & MDP4_PIPE_SRC_XY_Y__MASK;
+}
+#define MDP4_PIPE_SRC_XY_X__MASK				0x0000ffff
+#define MDP4_PIPE_SRC_XY_X__SHIFT				0
+static inline uint32_t MDP4_PIPE_SRC_XY_X(uint32_t val)
+{
+	return ((val) << MDP4_PIPE_SRC_XY_X__SHIFT) & MDP4_PIPE_SRC_XY_X__MASK;
+}
+
+static inline uint32_t REG_MDP4_PIPE_DST_SIZE(enum mpd4_pipe i0) { return 0x00020008 + 0x10000*i0; }
+#define MDP4_PIPE_DST_SIZE_HEIGHT__MASK				0xffff0000
+#define MDP4_PIPE_DST_SIZE_HEIGHT__SHIFT			16
+static inline uint32_t MDP4_PIPE_DST_SIZE_HEIGHT(uint32_t val)
+{
+	return ((val) << MDP4_PIPE_DST_SIZE_HEIGHT__SHIFT) & MDP4_PIPE_DST_SIZE_HEIGHT__MASK;
+}
+#define MDP4_PIPE_DST_SIZE_WIDTH__MASK				0x0000ffff
+#define MDP4_PIPE_DST_SIZE_WIDTH__SHIFT				0
+static inline uint32_t MDP4_PIPE_DST_SIZE_WIDTH(uint32_t val)
+{
+	return ((val) << MDP4_PIPE_DST_SIZE_WIDTH__SHIFT) & MDP4_PIPE_DST_SIZE_WIDTH__MASK;
+}
+
+static inline uint32_t REG_MDP4_PIPE_DST_XY(enum mpd4_pipe i0) { return 0x0002000c + 0x10000*i0; }
+#define MDP4_PIPE_DST_XY_Y__MASK				0xffff0000
+#define MDP4_PIPE_DST_XY_Y__SHIFT				16
+static inline uint32_t MDP4_PIPE_DST_XY_Y(uint32_t val)
+{
+	return ((val) << MDP4_PIPE_DST_XY_Y__SHIFT) & MDP4_PIPE_DST_XY_Y__MASK;
+}
+#define MDP4_PIPE_DST_XY_X__MASK				0x0000ffff
+#define MDP4_PIPE_DST_XY_X__SHIFT				0
+static inline uint32_t MDP4_PIPE_DST_XY_X(uint32_t val)
+{
+	return ((val) << MDP4_PIPE_DST_XY_X__SHIFT) & MDP4_PIPE_DST_XY_X__MASK;
+}
+
+static inline uint32_t REG_MDP4_PIPE_SRCP0_BASE(enum mpd4_pipe i0) { return 0x00020010 + 0x10000*i0; }
+
+static inline uint32_t REG_MDP4_PIPE_SRCP1_BASE(enum mpd4_pipe i0) { return 0x00020014 + 0x10000*i0; }
+
+static inline uint32_t REG_MDP4_PIPE_SRCP2_BASE(enum mpd4_pipe i0) { return 0x00020018 + 0x10000*i0; }
+
+static inline uint32_t REG_MDP4_PIPE_SRC_STRIDE_A(enum mpd4_pipe i0) { return 0x00020040 + 0x10000*i0; }
+#define MDP4_PIPE_SRC_STRIDE_A_P0__MASK				0x0000ffff
+#define MDP4_PIPE_SRC_STRIDE_A_P0__SHIFT			0
+static inline uint32_t MDP4_PIPE_SRC_STRIDE_A_P0(uint32_t val)
+{
+	return ((val) << MDP4_PIPE_SRC_STRIDE_A_P0__SHIFT) & MDP4_PIPE_SRC_STRIDE_A_P0__MASK;
+}
+#define MDP4_PIPE_SRC_STRIDE_A_P1__MASK				0xffff0000
+#define MDP4_PIPE_SRC_STRIDE_A_P1__SHIFT			16
+static inline uint32_t MDP4_PIPE_SRC_STRIDE_A_P1(uint32_t val)
+{
+	return ((val) << MDP4_PIPE_SRC_STRIDE_A_P1__SHIFT) & MDP4_PIPE_SRC_STRIDE_A_P1__MASK;
+}
+
+static inline uint32_t REG_MDP4_PIPE_SRC_STRIDE_B(enum mpd4_pipe i0) { return 0x00020044 + 0x10000*i0; }
+#define MDP4_PIPE_SRC_STRIDE_B_P2__MASK				0x0000ffff
+#define MDP4_PIPE_SRC_STRIDE_B_P2__SHIFT			0
+static inline uint32_t MDP4_PIPE_SRC_STRIDE_B_P2(uint32_t val)
+{
+	return ((val) << MDP4_PIPE_SRC_STRIDE_B_P2__SHIFT) & MDP4_PIPE_SRC_STRIDE_B_P2__MASK;
+}
+#define MDP4_PIPE_SRC_STRIDE_B_P3__MASK				0xffff0000
+#define MDP4_PIPE_SRC_STRIDE_B_P3__SHIFT			16
+static inline uint32_t MDP4_PIPE_SRC_STRIDE_B_P3(uint32_t val)
+{
+	return ((val) << MDP4_PIPE_SRC_STRIDE_B_P3__SHIFT) & MDP4_PIPE_SRC_STRIDE_B_P3__MASK;
+}
+
+static inline uint32_t REG_MDP4_PIPE_FRAME_SIZE(enum mpd4_pipe i0) { return 0x00020048 + 0x10000*i0; }
+#define MDP4_PIPE_FRAME_SIZE_HEIGHT__MASK			0xffff0000
+#define MDP4_PIPE_FRAME_SIZE_HEIGHT__SHIFT			16
+static inline uint32_t MDP4_PIPE_FRAME_SIZE_HEIGHT(uint32_t val)
+{
+	return ((val) << MDP4_PIPE_FRAME_SIZE_HEIGHT__SHIFT) & MDP4_PIPE_FRAME_SIZE_HEIGHT__MASK;
+}
+#define MDP4_PIPE_FRAME_SIZE_WIDTH__MASK			0x0000ffff
+#define MDP4_PIPE_FRAME_SIZE_WIDTH__SHIFT			0
+static inline uint32_t MDP4_PIPE_FRAME_SIZE_WIDTH(uint32_t val)
+{
+	return ((val) << MDP4_PIPE_FRAME_SIZE_WIDTH__SHIFT) & MDP4_PIPE_FRAME_SIZE_WIDTH__MASK;
+}
+
+static inline uint32_t REG_MDP4_PIPE_SRC_FORMAT(enum mpd4_pipe i0) { return 0x00020050 + 0x10000*i0; }
+#define MDP4_PIPE_SRC_FORMAT_G_BPC__MASK			0x00000003
+#define MDP4_PIPE_SRC_FORMAT_G_BPC__SHIFT			0
+static inline uint32_t MDP4_PIPE_SRC_FORMAT_G_BPC(enum mpd4_bpc val)
+{
+	return ((val) << MDP4_PIPE_SRC_FORMAT_G_BPC__SHIFT) & MDP4_PIPE_SRC_FORMAT_G_BPC__MASK;
+}
+#define MDP4_PIPE_SRC_FORMAT_B_BPC__MASK			0x0000000c
+#define MDP4_PIPE_SRC_FORMAT_B_BPC__SHIFT			2
+static inline uint32_t MDP4_PIPE_SRC_FORMAT_B_BPC(enum mpd4_bpc val)
+{
+	return ((val) << MDP4_PIPE_SRC_FORMAT_B_BPC__SHIFT) & MDP4_PIPE_SRC_FORMAT_B_BPC__MASK;
+}
+#define MDP4_PIPE_SRC_FORMAT_R_BPC__MASK			0x00000030
+#define MDP4_PIPE_SRC_FORMAT_R_BPC__SHIFT			4
+static inline uint32_t MDP4_PIPE_SRC_FORMAT_R_BPC(enum mpd4_bpc val)
+{
+	return ((val) << MDP4_PIPE_SRC_FORMAT_R_BPC__SHIFT) & MDP4_PIPE_SRC_FORMAT_R_BPC__MASK;
+}
+#define MDP4_PIPE_SRC_FORMAT_A_BPC__MASK			0x000000c0
+#define MDP4_PIPE_SRC_FORMAT_A_BPC__SHIFT			6
+static inline uint32_t MDP4_PIPE_SRC_FORMAT_A_BPC(enum mpd4_bpc_alpha val)
+{
+	return ((val) << MDP4_PIPE_SRC_FORMAT_A_BPC__SHIFT) & MDP4_PIPE_SRC_FORMAT_A_BPC__MASK;
+}
+#define MDP4_PIPE_SRC_FORMAT_ALPHA_ENABLE			0x00000100
+#define MDP4_PIPE_SRC_FORMAT_CPP__MASK				0x00000600
+#define MDP4_PIPE_SRC_FORMAT_CPP__SHIFT				9
+static inline uint32_t MDP4_PIPE_SRC_FORMAT_CPP(uint32_t val)
+{
+	return ((val) << MDP4_PIPE_SRC_FORMAT_CPP__SHIFT) & MDP4_PIPE_SRC_FORMAT_CPP__MASK;
+}
+#define MDP4_PIPE_SRC_FORMAT_ROTATED_90				0x00001000
+#define MDP4_PIPE_SRC_FORMAT_UNPACK_COUNT__MASK			0x00006000
+#define MDP4_PIPE_SRC_FORMAT_UNPACK_COUNT__SHIFT		13
+static inline uint32_t MDP4_PIPE_SRC_FORMAT_UNPACK_COUNT(uint32_t val)
+{
+	return ((val) << MDP4_PIPE_SRC_FORMAT_UNPACK_COUNT__SHIFT) & MDP4_PIPE_SRC_FORMAT_UNPACK_COUNT__MASK;
+}
+#define MDP4_PIPE_SRC_FORMAT_UNPACK_TIGHT			0x00020000
+#define MDP4_PIPE_SRC_FORMAT_UNPACK_ALIGN_MSB			0x00040000
+#define MDP4_PIPE_SRC_FORMAT_SOLID_FILL				0x00400000
+
+static inline uint32_t REG_MDP4_PIPE_SRC_UNPACK(enum mpd4_pipe i0) { return 0x00020054 + 0x10000*i0; }
+#define MDP4_PIPE_SRC_UNPACK_ELEM0__MASK			0x000000ff
+#define MDP4_PIPE_SRC_UNPACK_ELEM0__SHIFT			0
+static inline uint32_t MDP4_PIPE_SRC_UNPACK_ELEM0(uint32_t val)
+{
+	return ((val) << MDP4_PIPE_SRC_UNPACK_ELEM0__SHIFT) & MDP4_PIPE_SRC_UNPACK_ELEM0__MASK;
+}
+#define MDP4_PIPE_SRC_UNPACK_ELEM1__MASK			0x0000ff00
+#define MDP4_PIPE_SRC_UNPACK_ELEM1__SHIFT			8
+static inline uint32_t MDP4_PIPE_SRC_UNPACK_ELEM1(uint32_t val)
+{
+	return ((val) << MDP4_PIPE_SRC_UNPACK_ELEM1__SHIFT) & MDP4_PIPE_SRC_UNPACK_ELEM1__MASK;
+}
+#define MDP4_PIPE_SRC_UNPACK_ELEM2__MASK			0x00ff0000
+#define MDP4_PIPE_SRC_UNPACK_ELEM2__SHIFT			16
+static inline uint32_t MDP4_PIPE_SRC_UNPACK_ELEM2(uint32_t val)
+{
+	return ((val) << MDP4_PIPE_SRC_UNPACK_ELEM2__SHIFT) & MDP4_PIPE_SRC_UNPACK_ELEM2__MASK;
+}
+#define MDP4_PIPE_SRC_UNPACK_ELEM3__MASK			0xff000000
+#define MDP4_PIPE_SRC_UNPACK_ELEM3__SHIFT			24
+static inline uint32_t MDP4_PIPE_SRC_UNPACK_ELEM3(uint32_t val)
+{
+	return ((val) << MDP4_PIPE_SRC_UNPACK_ELEM3__SHIFT) & MDP4_PIPE_SRC_UNPACK_ELEM3__MASK;
+}
+
+static inline uint32_t REG_MDP4_PIPE_OP_MODE(enum mpd4_pipe i0) { return 0x00020058 + 0x10000*i0; }
+#define MDP4_PIPE_OP_MODE_SCALEX_EN				0x00000001
+#define MDP4_PIPE_OP_MODE_SCALEY_EN				0x00000002
+#define MDP4_PIPE_OP_MODE_SRC_YCBCR				0x00000200
+#define MDP4_PIPE_OP_MODE_DST_YCBCR				0x00000400
+#define MDP4_PIPE_OP_MODE_CSC_EN				0x00000800
+#define MDP4_PIPE_OP_MODE_FLIP_LR				0x00002000
+#define MDP4_PIPE_OP_MODE_FLIP_UD				0x00004000
+#define MDP4_PIPE_OP_MODE_DITHER_EN				0x00008000
+#define MDP4_PIPE_OP_MODE_IGC_LUT_EN				0x00010000
+#define MDP4_PIPE_OP_MODE_DEINT_EN				0x00040000
+#define MDP4_PIPE_OP_MODE_DEINT_ODD_REF				0x00080000
+
+static inline uint32_t REG_MDP4_PIPE_PHASEX_STEP(enum mpd4_pipe i0) { return 0x0002005c + 0x10000*i0; }
+
+static inline uint32_t REG_MDP4_PIPE_PHASEY_STEP(enum mpd4_pipe i0) { return 0x00020060 + 0x10000*i0; }
+
+static inline uint32_t REG_MDP4_PIPE_FETCH_CONFIG(enum mpd4_pipe i0) { return 0x00021004 + 0x10000*i0; }
+
+static inline uint32_t REG_MDP4_PIPE_SOLID_COLOR(enum mpd4_pipe i0) { return 0x00021008 + 0x10000*i0; }
+
+static inline uint32_t REG_MDP4_PIPE_CSC(enum mpd4_pipe i0) { return 0x00024000 + 0x10000*i0; }
+
+
+static inline uint32_t REG_MDP4_PIPE_CSC_MV(enum mpd4_pipe i0, uint32_t i1) { return 0x00024400 + 0x10000*i0 + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_PIPE_CSC_MV_VAL(enum mpd4_pipe i0, uint32_t i1) { return 0x00024400 + 0x10000*i0 + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_PIPE_CSC_PRE_BV(enum mpd4_pipe i0, uint32_t i1) { return 0x00024500 + 0x10000*i0 + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_PIPE_CSC_PRE_BV_VAL(enum mpd4_pipe i0, uint32_t i1) { return 0x00024500 + 0x10000*i0 + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_PIPE_CSC_POST_BV(enum mpd4_pipe i0, uint32_t i1) { return 0x00024580 + 0x10000*i0 + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_PIPE_CSC_POST_BV_VAL(enum mpd4_pipe i0, uint32_t i1) { return 0x00024580 + 0x10000*i0 + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_PIPE_CSC_PRE_LV(enum mpd4_pipe i0, uint32_t i1) { return 0x00024600 + 0x10000*i0 + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_PIPE_CSC_PRE_LV_VAL(enum mpd4_pipe i0, uint32_t i1) { return 0x00024600 + 0x10000*i0 + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_PIPE_CSC_POST_LV(enum mpd4_pipe i0, uint32_t i1) { return 0x00024680 + 0x10000*i0 + 0x4*i1; }
+
+static inline uint32_t REG_MDP4_PIPE_CSC_POST_LV_VAL(enum mpd4_pipe i0, uint32_t i1) { return 0x00024680 + 0x10000*i0 + 0x4*i1; }
+
+#define REG_MDP4_LCDC						0x000c0000
+
+#define REG_MDP4_LCDC_ENABLE					0x000c0000
+
+#define REG_MDP4_LCDC_HSYNC_CTRL				0x000c0004
+#define MDP4_LCDC_HSYNC_CTRL_PULSEW__MASK			0x0000ffff
+#define MDP4_LCDC_HSYNC_CTRL_PULSEW__SHIFT			0
+static inline uint32_t MDP4_LCDC_HSYNC_CTRL_PULSEW(uint32_t val)
+{
+	return ((val) << MDP4_LCDC_HSYNC_CTRL_PULSEW__SHIFT) & MDP4_LCDC_HSYNC_CTRL_PULSEW__MASK;
+}
+#define MDP4_LCDC_HSYNC_CTRL_PERIOD__MASK			0xffff0000
+#define MDP4_LCDC_HSYNC_CTRL_PERIOD__SHIFT			16
+static inline uint32_t MDP4_LCDC_HSYNC_CTRL_PERIOD(uint32_t val)
+{
+	return ((val) << MDP4_LCDC_HSYNC_CTRL_PERIOD__SHIFT) & MDP4_LCDC_HSYNC_CTRL_PERIOD__MASK;
+}
+
+#define REG_MDP4_LCDC_VSYNC_PERIOD				0x000c0008
+
+#define REG_MDP4_LCDC_VSYNC_LEN					0x000c000c
+
+#define REG_MDP4_LCDC_DISPLAY_HCTRL				0x000c0010
+#define MDP4_LCDC_DISPLAY_HCTRL_START__MASK			0x0000ffff
+#define MDP4_LCDC_DISPLAY_HCTRL_START__SHIFT			0
+static inline uint32_t MDP4_LCDC_DISPLAY_HCTRL_START(uint32_t val)
+{
+	return ((val) << MDP4_LCDC_DISPLAY_HCTRL_START__SHIFT) & MDP4_LCDC_DISPLAY_HCTRL_START__MASK;
+}
+#define MDP4_LCDC_DISPLAY_HCTRL_END__MASK			0xffff0000
+#define MDP4_LCDC_DISPLAY_HCTRL_END__SHIFT			16
+static inline uint32_t MDP4_LCDC_DISPLAY_HCTRL_END(uint32_t val)
+{
+	return ((val) << MDP4_LCDC_DISPLAY_HCTRL_END__SHIFT) & MDP4_LCDC_DISPLAY_HCTRL_END__MASK;
+}
+
+#define REG_MDP4_LCDC_DISPLAY_VSTART				0x000c0014
+
+#define REG_MDP4_LCDC_DISPLAY_VEND				0x000c0018
+
+#define REG_MDP4_LCDC_ACTIVE_HCTL				0x000c001c
+#define MDP4_LCDC_ACTIVE_HCTL_START__MASK			0x00007fff
+#define MDP4_LCDC_ACTIVE_HCTL_START__SHIFT			0
+static inline uint32_t MDP4_LCDC_ACTIVE_HCTL_START(uint32_t val)
+{
+	return ((val) << MDP4_LCDC_ACTIVE_HCTL_START__SHIFT) & MDP4_LCDC_ACTIVE_HCTL_START__MASK;
+}
+#define MDP4_LCDC_ACTIVE_HCTL_END__MASK				0x7fff0000
+#define MDP4_LCDC_ACTIVE_HCTL_END__SHIFT			16
+static inline uint32_t MDP4_LCDC_ACTIVE_HCTL_END(uint32_t val)
+{
+	return ((val) << MDP4_LCDC_ACTIVE_HCTL_END__SHIFT) & MDP4_LCDC_ACTIVE_HCTL_END__MASK;
+}
+#define MDP4_LCDC_ACTIVE_HCTL_ACTIVE_START_X			0x80000000
+
+#define REG_MDP4_LCDC_ACTIVE_VSTART				0x000c0020
+
+#define REG_MDP4_LCDC_ACTIVE_VEND				0x000c0024
+
+#define REG_MDP4_LCDC_BORDER_CLR				0x000c0028
+
+#define REG_MDP4_LCDC_UNDERFLOW_CLR				0x000c002c
+#define MDP4_LCDC_UNDERFLOW_CLR_COLOR__MASK			0x00ffffff
+#define MDP4_LCDC_UNDERFLOW_CLR_COLOR__SHIFT			0
+static inline uint32_t MDP4_LCDC_UNDERFLOW_CLR_COLOR(uint32_t val)
+{
+	return ((val) << MDP4_LCDC_UNDERFLOW_CLR_COLOR__SHIFT) & MDP4_LCDC_UNDERFLOW_CLR_COLOR__MASK;
+}
+#define MDP4_LCDC_UNDERFLOW_CLR_ENABLE_RECOVERY			0x80000000
+
+#define REG_MDP4_LCDC_HSYNC_SKEW				0x000c0030
+
+#define REG_MDP4_LCDC_TEST_CNTL					0x000c0034
+
+#define REG_MDP4_LCDC_CTRL_POLARITY				0x000c0038
+#define MDP4_LCDC_CTRL_POLARITY_HSYNC_LOW			0x00000001
+#define MDP4_LCDC_CTRL_POLARITY_VSYNC_LOW			0x00000002
+#define MDP4_LCDC_CTRL_POLARITY_DATA_EN_LOW			0x00000004
+
+#define REG_MDP4_DTV						0x000d0000
+
+#define REG_MDP4_DTV_ENABLE					0x000d0000
+
+#define REG_MDP4_DTV_HSYNC_CTRL					0x000d0004
+#define MDP4_DTV_HSYNC_CTRL_PULSEW__MASK			0x0000ffff
+#define MDP4_DTV_HSYNC_CTRL_PULSEW__SHIFT			0
+static inline uint32_t MDP4_DTV_HSYNC_CTRL_PULSEW(uint32_t val)
+{
+	return ((val) << MDP4_DTV_HSYNC_CTRL_PULSEW__SHIFT) & MDP4_DTV_HSYNC_CTRL_PULSEW__MASK;
+}
+#define MDP4_DTV_HSYNC_CTRL_PERIOD__MASK			0xffff0000
+#define MDP4_DTV_HSYNC_CTRL_PERIOD__SHIFT			16
+static inline uint32_t MDP4_DTV_HSYNC_CTRL_PERIOD(uint32_t val)
+{
+	return ((val) << MDP4_DTV_HSYNC_CTRL_PERIOD__SHIFT) & MDP4_DTV_HSYNC_CTRL_PERIOD__MASK;
+}
+
+#define REG_MDP4_DTV_VSYNC_PERIOD				0x000d0008
+
+#define REG_MDP4_DTV_VSYNC_LEN					0x000d000c
+
+#define REG_MDP4_DTV_DISPLAY_HCTRL				0x000d0018
+#define MDP4_DTV_DISPLAY_HCTRL_START__MASK			0x0000ffff
+#define MDP4_DTV_DISPLAY_HCTRL_START__SHIFT			0
+static inline uint32_t MDP4_DTV_DISPLAY_HCTRL_START(uint32_t val)
+{
+	return ((val) << MDP4_DTV_DISPLAY_HCTRL_START__SHIFT) & MDP4_DTV_DISPLAY_HCTRL_START__MASK;
+}
+#define MDP4_DTV_DISPLAY_HCTRL_END__MASK			0xffff0000
+#define MDP4_DTV_DISPLAY_HCTRL_END__SHIFT			16
+static inline uint32_t MDP4_DTV_DISPLAY_HCTRL_END(uint32_t val)
+{
+	return ((val) << MDP4_DTV_DISPLAY_HCTRL_END__SHIFT) & MDP4_DTV_DISPLAY_HCTRL_END__MASK;
+}
+
+#define REG_MDP4_DTV_DISPLAY_VSTART				0x000d001c
+
+#define REG_MDP4_DTV_DISPLAY_VEND				0x000d0020
+
+#define REG_MDP4_DTV_ACTIVE_HCTL				0x000d002c
+#define MDP4_DTV_ACTIVE_HCTL_START__MASK			0x00007fff
+#define MDP4_DTV_ACTIVE_HCTL_START__SHIFT			0
+static inline uint32_t MDP4_DTV_ACTIVE_HCTL_START(uint32_t val)
+{
+	return ((val) << MDP4_DTV_ACTIVE_HCTL_START__SHIFT) & MDP4_DTV_ACTIVE_HCTL_START__MASK;
+}
+#define MDP4_DTV_ACTIVE_HCTL_END__MASK				0x7fff0000
+#define MDP4_DTV_ACTIVE_HCTL_END__SHIFT				16
+static inline uint32_t MDP4_DTV_ACTIVE_HCTL_END(uint32_t val)
+{
+	return ((val) << MDP4_DTV_ACTIVE_HCTL_END__SHIFT) & MDP4_DTV_ACTIVE_HCTL_END__MASK;
+}
+#define MDP4_DTV_ACTIVE_HCTL_ACTIVE_START_X			0x80000000
+
+#define REG_MDP4_DTV_ACTIVE_VSTART				0x000d0030
+
+#define REG_MDP4_DTV_ACTIVE_VEND				0x000d0038
+
+#define REG_MDP4_DTV_BORDER_CLR					0x000d0040
+
+#define REG_MDP4_DTV_UNDERFLOW_CLR				0x000d0044
+#define MDP4_DTV_UNDERFLOW_CLR_COLOR__MASK			0x00ffffff
+#define MDP4_DTV_UNDERFLOW_CLR_COLOR__SHIFT			0
+static inline uint32_t MDP4_DTV_UNDERFLOW_CLR_COLOR(uint32_t val)
+{
+	return ((val) << MDP4_DTV_UNDERFLOW_CLR_COLOR__SHIFT) & MDP4_DTV_UNDERFLOW_CLR_COLOR__MASK;
+}
+#define MDP4_DTV_UNDERFLOW_CLR_ENABLE_RECOVERY			0x80000000
+
+#define REG_MDP4_DTV_HSYNC_SKEW					0x000d0048
+
+#define REG_MDP4_DTV_TEST_CNTL					0x000d004c
+
+#define REG_MDP4_DTV_CTRL_POLARITY				0x000d0050
+#define MDP4_DTV_CTRL_POLARITY_HSYNC_LOW			0x00000001
+#define MDP4_DTV_CTRL_POLARITY_VSYNC_LOW			0x00000002
+#define MDP4_DTV_CTRL_POLARITY_DATA_EN_LOW			0x00000004
+
+#define REG_MDP4_DSI						0x000e0000
+
+#define REG_MDP4_DSI_ENABLE					0x000e0000
+
+#define REG_MDP4_DSI_HSYNC_CTRL					0x000e0004
+#define MDP4_DSI_HSYNC_CTRL_PULSEW__MASK			0x0000ffff
+#define MDP4_DSI_HSYNC_CTRL_PULSEW__SHIFT			0
+static inline uint32_t MDP4_DSI_HSYNC_CTRL_PULSEW(uint32_t val)
+{
+	return ((val) << MDP4_DSI_HSYNC_CTRL_PULSEW__SHIFT) & MDP4_DSI_HSYNC_CTRL_PULSEW__MASK;
+}
+#define MDP4_DSI_HSYNC_CTRL_PERIOD__MASK			0xffff0000
+#define MDP4_DSI_HSYNC_CTRL_PERIOD__SHIFT			16
+static inline uint32_t MDP4_DSI_HSYNC_CTRL_PERIOD(uint32_t val)
+{
+	return ((val) << MDP4_DSI_HSYNC_CTRL_PERIOD__SHIFT) & MDP4_DSI_HSYNC_CTRL_PERIOD__MASK;
+}
+
+#define REG_MDP4_DSI_VSYNC_PERIOD				0x000e0008
+
+#define REG_MDP4_DSI_VSYNC_LEN					0x000e000c
+
+#define REG_MDP4_DSI_DISPLAY_HCTRL				0x000e0010
+#define MDP4_DSI_DISPLAY_HCTRL_START__MASK			0x0000ffff
+#define MDP4_DSI_DISPLAY_HCTRL_START__SHIFT			0
+static inline uint32_t MDP4_DSI_DISPLAY_HCTRL_START(uint32_t val)
+{
+	return ((val) << MDP4_DSI_DISPLAY_HCTRL_START__SHIFT) & MDP4_DSI_DISPLAY_HCTRL_START__MASK;
+}
+#define MDP4_DSI_DISPLAY_HCTRL_END__MASK			0xffff0000
+#define MDP4_DSI_DISPLAY_HCTRL_END__SHIFT			16
+static inline uint32_t MDP4_DSI_DISPLAY_HCTRL_END(uint32_t val)
+{
+	return ((val) << MDP4_DSI_DISPLAY_HCTRL_END__SHIFT) & MDP4_DSI_DISPLAY_HCTRL_END__MASK;
+}
+
+#define REG_MDP4_DSI_DISPLAY_VSTART				0x000e0014
+
+#define REG_MDP4_DSI_DISPLAY_VEND				0x000e0018
+
+#define REG_MDP4_DSI_ACTIVE_HCTL				0x000e001c
+#define MDP4_DSI_ACTIVE_HCTL_START__MASK			0x00007fff
+#define MDP4_DSI_ACTIVE_HCTL_START__SHIFT			0
+static inline uint32_t MDP4_DSI_ACTIVE_HCTL_START(uint32_t val)
+{
+	return ((val) << MDP4_DSI_ACTIVE_HCTL_START__SHIFT) & MDP4_DSI_ACTIVE_HCTL_START__MASK;
+}
+#define MDP4_DSI_ACTIVE_HCTL_END__MASK				0x7fff0000
+#define MDP4_DSI_ACTIVE_HCTL_END__SHIFT				16
+static inline uint32_t MDP4_DSI_ACTIVE_HCTL_END(uint32_t val)
+{
+	return ((val) << MDP4_DSI_ACTIVE_HCTL_END__SHIFT) & MDP4_DSI_ACTIVE_HCTL_END__MASK;
+}
+#define MDP4_DSI_ACTIVE_HCTL_ACTIVE_START_X			0x80000000
+
+#define REG_MDP4_DSI_ACTIVE_VSTART				0x000e0020
+
+#define REG_MDP4_DSI_ACTIVE_VEND				0x000e0024
+
+#define REG_MDP4_DSI_BORDER_CLR					0x000e0028
+
+#define REG_MDP4_DSI_UNDERFLOW_CLR				0x000e002c
+#define MDP4_DSI_UNDERFLOW_CLR_COLOR__MASK			0x00ffffff
+#define MDP4_DSI_UNDERFLOW_CLR_COLOR__SHIFT			0
+static inline uint32_t MDP4_DSI_UNDERFLOW_CLR_COLOR(uint32_t val)
+{
+	return ((val) << MDP4_DSI_UNDERFLOW_CLR_COLOR__SHIFT) & MDP4_DSI_UNDERFLOW_CLR_COLOR__MASK;
+}
+#define MDP4_DSI_UNDERFLOW_CLR_ENABLE_RECOVERY			0x80000000
+
+#define REG_MDP4_DSI_HSYNC_SKEW					0x000e0030
+
+#define REG_MDP4_DSI_TEST_CNTL					0x000e0034
+
+#define REG_MDP4_DSI_CTRL_POLARITY				0x000e0038
+#define MDP4_DSI_CTRL_POLARITY_HSYNC_LOW			0x00000001
+#define MDP4_DSI_CTRL_POLARITY_VSYNC_LOW			0x00000002
+#define MDP4_DSI_CTRL_POLARITY_DATA_EN_LOW			0x00000004
+
+
+#endif /* MDP4_XML */
diff --git a/drivers/gpu/drm/msm/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/mdp4/mdp4_crtc.c
new file mode 100644
index 0000000..de6bea2
--- /dev/null
+++ b/drivers/gpu/drm/msm/mdp4/mdp4_crtc.c
@@ -0,0 +1,685 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "mdp4_kms.h"
+
+#include <drm/drm_mode.h>
+#include "drm_crtc.h"
+#include "drm_crtc_helper.h"
+#include "drm_flip_work.h"
+
+struct mdp4_crtc {
+	struct drm_crtc base;
+	char name[8];
+	struct drm_plane *plane;
+	int id;
+	int ovlp;
+	enum mdp4_dma dma;
+	bool enabled;
+
+	/* which mixer/encoder we route output to: */
+	int mixer;
+
+	struct {
+		spinlock_t lock;
+		bool stale;
+		uint32_t width, height;
+
+		/* next cursor to scan-out: */
+		uint32_t next_iova;
+		struct drm_gem_object *next_bo;
+
+		/* current cursor being scanned out: */
+		struct drm_gem_object *scanout_bo;
+	} cursor;
+
+
+	/* if there is a pending flip, these will be non-null: */
+	struct drm_pending_vblank_event *event;
+	struct work_struct pageflip_work;
+
+	/* the fb that we currently hold a scanout ref to: */
+	struct drm_framebuffer *fb;
+
+	/* for unref'ing framebuffers after scanout completes: */
+	struct drm_flip_work unref_fb_work;
+
+	/* for unref'ing cursor bo's after scanout completes: */
+	struct drm_flip_work unref_cursor_work;
+
+	struct mdp4_irq vblank;
+	struct mdp4_irq err;
+};
+#define to_mdp4_crtc(x) container_of(x, struct mdp4_crtc, base)
+
+static struct mdp4_kms *get_kms(struct drm_crtc *crtc)
+{
+	struct msm_drm_private *priv = crtc->dev->dev_private;
+	return to_mdp4_kms(priv->kms);
+}
+
+static void update_fb(struct drm_crtc *crtc, bool async,
+		struct drm_framebuffer *new_fb)
+{
+	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
+	struct drm_framebuffer *old_fb = mdp4_crtc->fb;
+
+	if (old_fb)
+		drm_flip_work_queue(&mdp4_crtc->unref_fb_work, old_fb);
+
+	/* grab reference to incoming scanout fb: */
+	drm_framebuffer_reference(new_fb);
+	mdp4_crtc->base.fb = new_fb;
+	mdp4_crtc->fb = new_fb;
+
+	if (!async) {
+		/* enable vblank to pick up the old_fb */
+		mdp4_irq_register(get_kms(crtc), &mdp4_crtc->vblank);
+	}
+}
+
+static void complete_flip(struct drm_crtc *crtc, bool canceled)
+{
+	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct drm_pending_vblank_event *event;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->event_lock, flags);
+	event = mdp4_crtc->event;
+	if (event) {
+		mdp4_crtc->event = NULL;
+		if (canceled)
+			event->base.destroy(&event->base);
+		else
+			drm_send_vblank_event(dev, mdp4_crtc->id, event);
+	}
+	spin_unlock_irqrestore(&dev->event_lock, flags);
+}
+
+static void crtc_flush(struct drm_crtc *crtc)
+{
+	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
+	struct mdp4_kms *mdp4_kms = get_kms(crtc);
+	uint32_t flush = 0;
+
+	flush |= pipe2flush(mdp4_plane_pipe(mdp4_crtc->plane));
+	flush |= ovlp2flush(mdp4_crtc->ovlp);
+
+	DBG("%s: flush=%08x", mdp4_crtc->name, flush);
+
+	mdp4_write(mdp4_kms, REG_MDP4_OVERLAY_FLUSH, flush);
+}
+
+static void pageflip_worker(struct work_struct *work)
+{
+	struct mdp4_crtc *mdp4_crtc =
+		container_of(work, struct mdp4_crtc, pageflip_work);
+	struct drm_crtc *crtc = &mdp4_crtc->base;
+
+	mdp4_plane_set_scanout(mdp4_crtc->plane, crtc->fb);
+	crtc_flush(crtc);
+
+	/* enable vblank to complete flip: */
+	mdp4_irq_register(get_kms(crtc), &mdp4_crtc->vblank);
+}
+
+static void unref_fb_worker(struct drm_flip_work *work, void *val)
+{
+	struct mdp4_crtc *mdp4_crtc =
+		container_of(work, struct mdp4_crtc, unref_fb_work);
+	struct drm_device *dev = mdp4_crtc->base.dev;
+
+	mutex_lock(&dev->mode_config.mutex);
+	drm_framebuffer_unreference(val);
+	mutex_unlock(&dev->mode_config.mutex);
+}
+
+static void unref_cursor_worker(struct drm_flip_work *work, void *val)
+{
+	struct mdp4_crtc *mdp4_crtc =
+		container_of(work, struct mdp4_crtc, unref_cursor_work);
+	struct mdp4_kms *mdp4_kms = get_kms(&mdp4_crtc->base);
+
+	msm_gem_put_iova(val, mdp4_kms->id);
+	drm_gem_object_unreference_unlocked(val);
+}
+
+static void mdp4_crtc_destroy(struct drm_crtc *crtc)
+{
+	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
+
+	mdp4_crtc->plane->funcs->destroy(mdp4_crtc->plane);
+
+	drm_crtc_cleanup(crtc);
+	drm_flip_work_cleanup(&mdp4_crtc->unref_fb_work);
+	drm_flip_work_cleanup(&mdp4_crtc->unref_cursor_work);
+
+	kfree(mdp4_crtc);
+}
+
+static void mdp4_crtc_dpms(struct drm_crtc *crtc, int mode)
+{
+	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
+	struct mdp4_kms *mdp4_kms = get_kms(crtc);
+	bool enabled = (mode == DRM_MODE_DPMS_ON);
+
+	DBG("%s: mode=%d", mdp4_crtc->name, mode);
+
+	if (enabled != mdp4_crtc->enabled) {
+		if (enabled) {
+			mdp4_enable(mdp4_kms);
+			mdp4_irq_register(mdp4_kms, &mdp4_crtc->err);
+		} else {
+			mdp4_irq_unregister(mdp4_kms, &mdp4_crtc->err);
+			mdp4_disable(mdp4_kms);
+		}
+		mdp4_crtc->enabled = enabled;
+	}
+}
+
+static bool mdp4_crtc_mode_fixup(struct drm_crtc *crtc,
+		const struct drm_display_mode *mode,
+		struct drm_display_mode *adjusted_mode)
+{
+	return true;
+}
+
+static void blend_setup(struct drm_crtc *crtc)
+{
+	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
+	struct mdp4_kms *mdp4_kms = get_kms(crtc);
+	int i, ovlp = mdp4_crtc->ovlp;
+	uint32_t mixer_cfg = 0;
+
+	/*
+	 * This probably would also need to be triggered by any attached
+	 * plane when it changes.. for now since we are only using a single
+	 * private plane, the configuration is hard-coded:
+	 */
+
+	mdp4_write(mdp4_kms, REG_MDP4_OVLP_TRANSP_LOW0(ovlp), 0);
+	mdp4_write(mdp4_kms, REG_MDP4_OVLP_TRANSP_LOW1(ovlp), 0);
+	mdp4_write(mdp4_kms, REG_MDP4_OVLP_TRANSP_HIGH0(ovlp), 0);
+	mdp4_write(mdp4_kms, REG_MDP4_OVLP_TRANSP_HIGH1(ovlp), 0);
+
+	for (i = 0; i < 4; i++) {
+		mdp4_write(mdp4_kms, REG_MDP4_OVLP_STAGE_FG_ALPHA(ovlp, i), 0);
+		mdp4_write(mdp4_kms, REG_MDP4_OVLP_STAGE_BG_ALPHA(ovlp, i), 0);
+		mdp4_write(mdp4_kms, REG_MDP4_OVLP_STAGE_OP(ovlp, i),
+				MDP4_OVLP_STAGE_OP_FG_ALPHA(FG_CONST) |
+				MDP4_OVLP_STAGE_OP_BG_ALPHA(BG_CONST));
+		mdp4_write(mdp4_kms, REG_MDP4_OVLP_STAGE_CO3(ovlp, i), 0);
+		mdp4_write(mdp4_kms, REG_MDP4_OVLP_STAGE_TRANSP_LOW0(ovlp, i), 0);
+		mdp4_write(mdp4_kms, REG_MDP4_OVLP_STAGE_TRANSP_LOW1(ovlp, i), 0);
+		mdp4_write(mdp4_kms, REG_MDP4_OVLP_STAGE_TRANSP_HIGH0(ovlp, i), 0);
+		mdp4_write(mdp4_kms, REG_MDP4_OVLP_STAGE_TRANSP_HIGH1(ovlp, i), 0);
+	}
+
+	/* TODO single register for all CRTCs, so this won't work properly
+	 * when multiple CRTCs are active..
+	 */
+	switch (mdp4_plane_pipe(mdp4_crtc->plane)) {
+	case VG1:
+		mixer_cfg = MDP4_LAYERMIXER_IN_CFG_PIPE0(STAGE_BASE) |
+			COND(mdp4_crtc->mixer == 1, MDP4_LAYERMIXER_IN_CFG_PIPE0_MIXER1);
+		break;
+	case VG2:
+		mixer_cfg = MDP4_LAYERMIXER_IN_CFG_PIPE1(STAGE_BASE) |
+			COND(mdp4_crtc->mixer == 1, MDP4_LAYERMIXER_IN_CFG_PIPE1_MIXER1);
+		break;
+	case RGB1:
+		mixer_cfg = MDP4_LAYERMIXER_IN_CFG_PIPE2(STAGE_BASE) |
+			COND(mdp4_crtc->mixer == 1, MDP4_LAYERMIXER_IN_CFG_PIPE2_MIXER1);
+		break;
+	case RGB2:
+		mixer_cfg = MDP4_LAYERMIXER_IN_CFG_PIPE3(STAGE_BASE) |
+			COND(mdp4_crtc->mixer == 1, MDP4_LAYERMIXER_IN_CFG_PIPE3_MIXER1);
+		break;
+	case RGB3:
+		mixer_cfg = MDP4_LAYERMIXER_IN_CFG_PIPE4(STAGE_BASE) |
+			COND(mdp4_crtc->mixer == 1, MDP4_LAYERMIXER_IN_CFG_PIPE4_MIXER1);
+		break;
+	case VG3:
+		mixer_cfg = MDP4_LAYERMIXER_IN_CFG_PIPE5(STAGE_BASE) |
+			COND(mdp4_crtc->mixer == 1, MDP4_LAYERMIXER_IN_CFG_PIPE5_MIXER1);
+		break;
+	case VG4:
+		mixer_cfg = MDP4_LAYERMIXER_IN_CFG_PIPE6(STAGE_BASE) |
+			COND(mdp4_crtc->mixer == 1, MDP4_LAYERMIXER_IN_CFG_PIPE6_MIXER1);
+		break;
+	default:
+		WARN_ON("invalid pipe");
+		break;
+	}
+	mdp4_write(mdp4_kms, REG_MDP4_LAYERMIXER_IN_CFG, mixer_cfg);
+}
+
+static int mdp4_crtc_mode_set(struct drm_crtc *crtc,
+		struct drm_display_mode *mode,
+		struct drm_display_mode *adjusted_mode,
+		int x, int y,
+		struct drm_framebuffer *old_fb)
+{
+	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
+	struct mdp4_kms *mdp4_kms = get_kms(crtc);
+	enum mdp4_dma dma = mdp4_crtc->dma;
+	int ret, ovlp = mdp4_crtc->ovlp;
+
+	mode = adjusted_mode;
+
+	DBG("%s: set mode: %d:\"%s\" %d %d %d %d %d %d %d %d %d %d 0x%x 0x%x",
+			mdp4_crtc->name, mode->base.id, mode->name,
+			mode->vrefresh, mode->clock,
+			mode->hdisplay, mode->hsync_start,
+			mode->hsync_end, mode->htotal,
+			mode->vdisplay, mode->vsync_start,
+			mode->vsync_end, mode->vtotal,
+			mode->type, mode->flags);
+
+	mdp4_write(mdp4_kms, REG_MDP4_DMA_SRC_SIZE(dma),
+			MDP4_DMA_SRC_SIZE_WIDTH(mode->hdisplay) |
+			MDP4_DMA_SRC_SIZE_HEIGHT(mode->vdisplay));
+
+	/* take data from pipe: */
+	mdp4_write(mdp4_kms, REG_MDP4_DMA_SRC_BASE(dma), 0);
+	mdp4_write(mdp4_kms, REG_MDP4_DMA_SRC_STRIDE(dma),
+			crtc->fb->pitches[0]);
+	mdp4_write(mdp4_kms, REG_MDP4_DMA_DST_SIZE(dma),
+			MDP4_DMA_DST_SIZE_WIDTH(0) |
+			MDP4_DMA_DST_SIZE_HEIGHT(0));
+
+	mdp4_write(mdp4_kms, REG_MDP4_OVLP_BASE(ovlp), 0);
+	mdp4_write(mdp4_kms, REG_MDP4_OVLP_SIZE(ovlp),
+			MDP4_OVLP_SIZE_WIDTH(mode->hdisplay) |
+			MDP4_OVLP_SIZE_HEIGHT(mode->vdisplay));
+	mdp4_write(mdp4_kms, REG_MDP4_OVLP_STRIDE(ovlp),
+			crtc->fb->pitches[0]);
+
+	mdp4_write(mdp4_kms, REG_MDP4_OVLP_CFG(ovlp), 1);
+
+	update_fb(crtc, false, crtc->fb);
+
+	ret = mdp4_plane_mode_set(mdp4_crtc->plane, crtc, crtc->fb,
+			0, 0, mode->hdisplay, mode->vdisplay,
+			x << 16, y << 16,
+			mode->hdisplay << 16, mode->vdisplay << 16);
+	if (ret) {
+		dev_err(crtc->dev->dev, "%s: failed to set mode on plane: %d\n",
+				mdp4_crtc->name, ret);
+		return ret;
+	}
+
+	if (dma == DMA_E) {
+		mdp4_write(mdp4_kms, REG_MDP4_DMA_E_QUANT(0), 0x00ff0000);
+		mdp4_write(mdp4_kms, REG_MDP4_DMA_E_QUANT(1), 0x00ff0000);
+		mdp4_write(mdp4_kms, REG_MDP4_DMA_E_QUANT(2), 0x00ff0000);
+	}
+
+	return 0;
+}
+
+static void mdp4_crtc_prepare(struct drm_crtc *crtc)
+{
+	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
+	DBG("%s", mdp4_crtc->name);
+	/* make sure we hold a ref to mdp clks while setting up mode: */
+	mdp4_enable(get_kms(crtc));
+	mdp4_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
+}
+
+static void mdp4_crtc_commit(struct drm_crtc *crtc)
+{
+	mdp4_crtc_dpms(crtc, DRM_MODE_DPMS_ON);
+	crtc_flush(crtc);
+	/* drop the ref to mdp clk's that we got in prepare: */
+	mdp4_disable(get_kms(crtc));
+}
+
+static int mdp4_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
+		struct drm_framebuffer *old_fb)
+{
+	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
+	struct drm_plane *plane = mdp4_crtc->plane;
+	struct drm_display_mode *mode = &crtc->mode;
+
+	update_fb(crtc, false, crtc->fb);
+
+	return mdp4_plane_mode_set(plane, crtc, crtc->fb,
+			0, 0, mode->hdisplay, mode->vdisplay,
+			x << 16, y << 16,
+			mode->hdisplay << 16, mode->vdisplay << 16);
+}
+
+static void mdp4_crtc_load_lut(struct drm_crtc *crtc)
+{
+}
+
+static int mdp4_crtc_page_flip(struct drm_crtc *crtc,
+		struct drm_framebuffer *new_fb,
+		struct drm_pending_vblank_event *event,
+		uint32_t page_flip_flags)
+{
+	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct drm_gem_object *obj;
+
+	if (mdp4_crtc->event) {
+		dev_err(dev->dev, "already pending flip!\n");
+		return -EBUSY;
+	}
+
+	obj = msm_framebuffer_bo(new_fb, 0);
+
+	mdp4_crtc->event = event;
+	update_fb(crtc, true, new_fb);
+
+	return msm_gem_queue_inactive_work(obj,
+			&mdp4_crtc->pageflip_work);
+}
+
+static int mdp4_crtc_set_property(struct drm_crtc *crtc,
+		struct drm_property *property, uint64_t val)
+{
+	// XXX
+	return -EINVAL;
+}
+
+#define CURSOR_WIDTH 64
+#define CURSOR_HEIGHT 64
+
+/* called from IRQ to update cursor related registers (if needed).  The
+ * cursor registers, other than x/y position, appear not to be double
+ * buffered, and changing them other than from vblank seems to trigger
+ * underflow.
+ */
+static void update_cursor(struct drm_crtc *crtc)
+{
+	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
+	enum mdp4_dma dma = mdp4_crtc->dma;
+	unsigned long flags;
+
+	spin_lock_irqsave(&mdp4_crtc->cursor.lock, flags);
+	if (mdp4_crtc->cursor.stale) {
+		struct mdp4_kms *mdp4_kms = get_kms(crtc);
+		struct drm_gem_object *next_bo = mdp4_crtc->cursor.next_bo;
+		struct drm_gem_object *prev_bo = mdp4_crtc->cursor.scanout_bo;
+		uint32_t iova = mdp4_crtc->cursor.next_iova;
+
+		if (next_bo) {
+			/* take a obj ref + iova ref when we start scanning out: */
+			drm_gem_object_reference(next_bo);
+			msm_gem_get_iova_locked(next_bo, mdp4_kms->id, &iova);
+
+			/* enable cursor: */
+			mdp4_write(mdp4_kms, REG_MDP4_DMA_CURSOR_SIZE(dma),
+					MDP4_DMA_CURSOR_SIZE_WIDTH(mdp4_crtc->cursor.width) |
+					MDP4_DMA_CURSOR_SIZE_HEIGHT(mdp4_crtc->cursor.height));
+			mdp4_write(mdp4_kms, REG_MDP4_DMA_CURSOR_BASE(dma), iova);
+			mdp4_write(mdp4_kms, REG_MDP4_DMA_CURSOR_BLEND_CONFIG(dma),
+					MDP4_DMA_CURSOR_BLEND_CONFIG_FORMAT(CURSOR_ARGB) |
+					MDP4_DMA_CURSOR_BLEND_CONFIG_CURSOR_EN);
+		} else {
+			/* disable cursor: */
+			mdp4_write(mdp4_kms, REG_MDP4_DMA_CURSOR_BASE(dma), 0);
+			mdp4_write(mdp4_kms, REG_MDP4_DMA_CURSOR_BLEND_CONFIG(dma),
+					MDP4_DMA_CURSOR_BLEND_CONFIG_FORMAT(CURSOR_ARGB));
+		}
+
+		/* and drop the iova ref + obj rev when done scanning out: */
+		if (prev_bo)
+			drm_flip_work_queue(&mdp4_crtc->unref_cursor_work, prev_bo);
+
+		mdp4_crtc->cursor.scanout_bo = next_bo;
+		mdp4_crtc->cursor.stale = false;
+	}
+	spin_unlock_irqrestore(&mdp4_crtc->cursor.lock, flags);
+}
+
+static int mdp4_crtc_cursor_set(struct drm_crtc *crtc,
+		struct drm_file *file_priv, uint32_t handle,
+		uint32_t width, uint32_t height)
+{
+	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
+	struct mdp4_kms *mdp4_kms = get_kms(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct drm_gem_object *cursor_bo, *old_bo;
+	unsigned long flags;
+	uint32_t iova;
+	int ret;
+
+	if ((width > CURSOR_WIDTH) || (height > CURSOR_HEIGHT)) {
+		dev_err(dev->dev, "bad cursor size: %dx%d\n", width, height);
+		return -EINVAL;
+	}
+
+	if (handle) {
+		cursor_bo = drm_gem_object_lookup(dev, file_priv, handle);
+		if (!cursor_bo)
+			return -ENOENT;
+	} else {
+		cursor_bo = NULL;
+	}
+
+	if (cursor_bo) {
+		ret = msm_gem_get_iova(cursor_bo, mdp4_kms->id, &iova);
+		if (ret)
+			goto fail;
+	} else {
+		iova = 0;
+	}
+
+	spin_lock_irqsave(&mdp4_crtc->cursor.lock, flags);
+	old_bo = mdp4_crtc->cursor.next_bo;
+	mdp4_crtc->cursor.next_bo   = cursor_bo;
+	mdp4_crtc->cursor.next_iova = iova;
+	mdp4_crtc->cursor.width     = width;
+	mdp4_crtc->cursor.height    = height;
+	mdp4_crtc->cursor.stale     = true;
+	spin_unlock_irqrestore(&mdp4_crtc->cursor.lock, flags);
+
+	if (old_bo) {
+		/* drop our previous reference: */
+		msm_gem_put_iova(old_bo, mdp4_kms->id);
+		drm_gem_object_unreference_unlocked(old_bo);
+	}
+
+	return 0;
+
+fail:
+	drm_gem_object_unreference_unlocked(cursor_bo);
+	return ret;
+}
+
+static int mdp4_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
+{
+	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
+	struct mdp4_kms *mdp4_kms = get_kms(crtc);
+	enum mdp4_dma dma = mdp4_crtc->dma;
+
+	mdp4_write(mdp4_kms, REG_MDP4_DMA_CURSOR_POS(dma),
+			MDP4_DMA_CURSOR_POS_X(x) |
+			MDP4_DMA_CURSOR_POS_Y(y));
+
+	return 0;
+}
+
+static const struct drm_crtc_funcs mdp4_crtc_funcs = {
+	.set_config = drm_crtc_helper_set_config,
+	.destroy = mdp4_crtc_destroy,
+	.page_flip = mdp4_crtc_page_flip,
+	.set_property = mdp4_crtc_set_property,
+	.cursor_set = mdp4_crtc_cursor_set,
+	.cursor_move = mdp4_crtc_cursor_move,
+};
+
+static const struct drm_crtc_helper_funcs mdp4_crtc_helper_funcs = {
+	.dpms = mdp4_crtc_dpms,
+	.mode_fixup = mdp4_crtc_mode_fixup,
+	.mode_set = mdp4_crtc_mode_set,
+	.prepare = mdp4_crtc_prepare,
+	.commit = mdp4_crtc_commit,
+	.mode_set_base = mdp4_crtc_mode_set_base,
+	.load_lut = mdp4_crtc_load_lut,
+};
+
+static void mdp4_crtc_vblank_irq(struct mdp4_irq *irq, uint32_t irqstatus)
+{
+	struct mdp4_crtc *mdp4_crtc = container_of(irq, struct mdp4_crtc, vblank);
+	struct drm_crtc *crtc = &mdp4_crtc->base;
+	struct msm_drm_private *priv = crtc->dev->dev_private;
+
+	update_cursor(crtc);
+	complete_flip(crtc, false);
+	mdp4_irq_unregister(get_kms(crtc), &mdp4_crtc->vblank);
+
+	drm_flip_work_commit(&mdp4_crtc->unref_fb_work, priv->wq);
+	drm_flip_work_commit(&mdp4_crtc->unref_cursor_work, priv->wq);
+}
+
+static void mdp4_crtc_err_irq(struct mdp4_irq *irq, uint32_t irqstatus)
+{
+	struct mdp4_crtc *mdp4_crtc = container_of(irq, struct mdp4_crtc, err);
+	struct drm_crtc *crtc = &mdp4_crtc->base;
+	DBG("%s: error: %08x", mdp4_crtc->name, irqstatus);
+	crtc_flush(crtc);
+}
+
+uint32_t mdp4_crtc_vblank(struct drm_crtc *crtc)
+{
+	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
+	return mdp4_crtc->vblank.irqmask;
+}
+
+void mdp4_crtc_cancel_pending_flip(struct drm_crtc *crtc)
+{
+	complete_flip(crtc, true);
+}
+
+/* set dma config, ie. the format the encoder wants. */
+void mdp4_crtc_set_config(struct drm_crtc *crtc, uint32_t config)
+{
+	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
+	struct mdp4_kms *mdp4_kms = get_kms(crtc);
+
+	mdp4_write(mdp4_kms, REG_MDP4_DMA_CONFIG(mdp4_crtc->dma), config);
+}
+
+/* set interface for routing crtc->encoder: */
+void mdp4_crtc_set_intf(struct drm_crtc *crtc, enum mdp4_intf intf)
+{
+	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
+	struct mdp4_kms *mdp4_kms = get_kms(crtc);
+	uint32_t intf_sel;
+
+	intf_sel = mdp4_read(mdp4_kms, REG_MDP4_DISP_INTF_SEL);
+
+	switch (mdp4_crtc->dma) {
+	case DMA_P:
+		intf_sel &= ~MDP4_DISP_INTF_SEL_PRIM__MASK;
+		intf_sel |= MDP4_DISP_INTF_SEL_PRIM(intf);
+		break;
+	case DMA_S:
+		intf_sel &= ~MDP4_DISP_INTF_SEL_SEC__MASK;
+		intf_sel |= MDP4_DISP_INTF_SEL_SEC(intf);
+		break;
+	case DMA_E:
+		intf_sel &= ~MDP4_DISP_INTF_SEL_EXT__MASK;
+		intf_sel |= MDP4_DISP_INTF_SEL_EXT(intf);
+		break;
+	}
+
+	if (intf == INTF_DSI_VIDEO) {
+		intf_sel &= ~MDP4_DISP_INTF_SEL_DSI_CMD;
+		intf_sel |= MDP4_DISP_INTF_SEL_DSI_VIDEO;
+		mdp4_crtc->mixer = 0;
+	} else if (intf == INTF_DSI_CMD) {
+		intf_sel &= ~MDP4_DISP_INTF_SEL_DSI_VIDEO;
+		intf_sel |= MDP4_DISP_INTF_SEL_DSI_CMD;
+		mdp4_crtc->mixer = 0;
+	} else if (intf == INTF_LCDC_DTV){
+		mdp4_crtc->mixer = 1;
+	}
+
+	blend_setup(crtc);
+
+	DBG("%s: intf_sel=%08x", mdp4_crtc->name, intf_sel);
+
+	mdp4_write(mdp4_kms, REG_MDP4_DISP_INTF_SEL, intf_sel);
+}
+
+static const char *dma_names[] = {
+		"DMA_P", "DMA_S", "DMA_E",
+};
+
+/* initialize crtc */
+struct drm_crtc *mdp4_crtc_init(struct drm_device *dev,
+		struct drm_plane *plane, int id, int ovlp_id,
+		enum mdp4_dma dma_id)
+{
+	struct drm_crtc *crtc = NULL;
+	struct mdp4_crtc *mdp4_crtc;
+	int ret;
+
+	mdp4_crtc = kzalloc(sizeof(*mdp4_crtc), GFP_KERNEL);
+	if (!mdp4_crtc) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	crtc = &mdp4_crtc->base;
+
+	mdp4_crtc->plane = plane;
+	mdp4_crtc->plane->crtc = crtc;
+
+	mdp4_crtc->ovlp = ovlp_id;
+	mdp4_crtc->dma = dma_id;
+
+	mdp4_crtc->vblank.irqmask = dma2irq(mdp4_crtc->dma);
+	mdp4_crtc->vblank.irq = mdp4_crtc_vblank_irq;
+
+	mdp4_crtc->err.irqmask = dma2err(mdp4_crtc->dma);
+	mdp4_crtc->err.irq = mdp4_crtc_err_irq;
+
+	snprintf(mdp4_crtc->name, sizeof(mdp4_crtc->name), "%s:%d",
+			dma_names[dma_id], ovlp_id);
+
+	spin_lock_init(&mdp4_crtc->cursor.lock);
+
+	ret = drm_flip_work_init(&mdp4_crtc->unref_fb_work, 16,
+			"unref fb", unref_fb_worker);
+	if (ret)
+		goto fail;
+
+	ret = drm_flip_work_init(&mdp4_crtc->unref_cursor_work, 64,
+			"unref cursor", unref_cursor_worker);
+
+	INIT_WORK(&mdp4_crtc->pageflip_work, pageflip_worker);
+
+	drm_crtc_init(dev, crtc, &mdp4_crtc_funcs);
+	drm_crtc_helper_add(crtc, &mdp4_crtc_helper_funcs);
+
+	mdp4_plane_install_properties(mdp4_crtc->plane, &crtc->base);
+
+	return crtc;
+
+fail:
+	if (crtc)
+		mdp4_crtc_destroy(crtc);
+
+	return ERR_PTR(ret);
+}
diff --git a/drivers/gpu/drm/msm/mdp4/mdp4_dtv_encoder.c b/drivers/gpu/drm/msm/mdp4/mdp4_dtv_encoder.c
new file mode 100644
index 0000000..5e0dcae
--- /dev/null
+++ b/drivers/gpu/drm/msm/mdp4/mdp4_dtv_encoder.c
@@ -0,0 +1,305 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <mach/clk.h>
+
+#include "mdp4_kms.h"
+
+#include "drm_crtc.h"
+#include "drm_crtc_helper.h"
+
+
+struct mdp4_dtv_encoder {
+	struct drm_encoder base;
+	struct clk *src_clk;
+	struct clk *hdmi_clk;
+	struct clk *mdp_clk;
+	unsigned long int pixclock;
+	bool enabled;
+	uint32_t bsc;
+};
+#define to_mdp4_dtv_encoder(x) container_of(x, struct mdp4_dtv_encoder, base)
+
+static struct mdp4_kms *get_kms(struct drm_encoder *encoder)
+{
+	struct msm_drm_private *priv = encoder->dev->dev_private;
+	return to_mdp4_kms(priv->kms);
+}
+
+#ifdef CONFIG_MSM_BUS_SCALING
+#include <mach/board.h>
+/* not ironically named at all.. no, really.. */
+static void bs_init(struct mdp4_dtv_encoder *mdp4_dtv_encoder)
+{
+	struct drm_device *dev = mdp4_dtv_encoder->base.dev;
+	struct lcdc_platform_data *dtv_pdata = mdp4_find_pdata("dtv.0");
+
+	if (!dtv_pdata) {
+		dev_err(dev->dev, "could not find dtv pdata\n");
+		return;
+	}
+
+	if (dtv_pdata->bus_scale_table) {
+		mdp4_dtv_encoder->bsc = msm_bus_scale_register_client(
+				dtv_pdata->bus_scale_table);
+		DBG("bus scale client: %08x", mdp4_dtv_encoder->bsc);
+		DBG("lcdc_power_save: %p", dtv_pdata->lcdc_power_save);
+		if (dtv_pdata->lcdc_power_save)
+			dtv_pdata->lcdc_power_save(1);
+	}
+}
+
+static void bs_fini(struct mdp4_dtv_encoder *mdp4_dtv_encoder)
+{
+	if (mdp4_dtv_encoder->bsc) {
+		msm_bus_scale_unregister_client(mdp4_dtv_encoder->bsc);
+		mdp4_dtv_encoder->bsc = 0;
+	}
+}
+
+static void bs_set(struct mdp4_dtv_encoder *mdp4_dtv_encoder, int idx)
+{
+	if (mdp4_dtv_encoder->bsc) {
+		DBG("set bus scaling: %d", idx);
+		msm_bus_scale_client_update_request(mdp4_dtv_encoder->bsc, idx);
+	}
+}
+#else
+static void bs_init(struct mdp4_dtv_encoder *mdp4_dtv_encoder) {}
+static void bs_fini(struct mdp4_dtv_encoder *mdp4_dtv_encoder) {}
+static void bs_set(struct mdp4_dtv_encoder *mdp4_dtv_encoder, int idx) {}
+#endif
+
+static void mdp4_dtv_encoder_destroy(struct drm_encoder *encoder)
+{
+	struct mdp4_dtv_encoder *mdp4_dtv_encoder = to_mdp4_dtv_encoder(encoder);
+	bs_fini(mdp4_dtv_encoder);
+	drm_encoder_cleanup(encoder);
+	kfree(mdp4_dtv_encoder);
+}
+
+static const struct drm_encoder_funcs mdp4_dtv_encoder_funcs = {
+	.destroy = mdp4_dtv_encoder_destroy,
+};
+
+static void mdp4_dtv_encoder_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct mdp4_dtv_encoder *mdp4_dtv_encoder = to_mdp4_dtv_encoder(encoder);
+	struct mdp4_kms *mdp4_kms = get_kms(encoder);
+	bool enabled = (mode == DRM_MODE_DPMS_ON);
+
+	DBG("mode=%d", mode);
+
+	if (enabled == mdp4_dtv_encoder->enabled)
+		return;
+
+	if (enabled) {
+		unsigned long pc = mdp4_dtv_encoder->pixclock;
+		int ret;
+
+		bs_set(mdp4_dtv_encoder, 1);
+
+		DBG("setting src_clk=%lu", pc);
+
+		ret = clk_set_rate(mdp4_dtv_encoder->src_clk, pc);
+		if (ret)
+			dev_err(dev->dev, "failed to set src_clk to %lu: %d\n", pc, ret);
+		clk_prepare_enable(mdp4_dtv_encoder->src_clk);
+		ret = clk_prepare_enable(mdp4_dtv_encoder->hdmi_clk);
+		if (ret)
+			dev_err(dev->dev, "failed to enable hdmi_clk: %d\n", ret);
+		ret = clk_prepare_enable(mdp4_dtv_encoder->mdp_clk);
+		if (ret)
+			dev_err(dev->dev, "failed to enabled mdp_clk: %d\n", ret);
+
+		mdp4_write(mdp4_kms, REG_MDP4_DTV_ENABLE, 1);
+	} else {
+		mdp4_write(mdp4_kms, REG_MDP4_DTV_ENABLE, 0);
+
+		/*
+		 * Wait for a vsync so we know the ENABLE=0 latched before
+		 * the (connector) source of the vsync's gets disabled,
+		 * otherwise we end up in a funny state if we re-enable
+		 * before the disable latches, which results that some of
+		 * the settings changes for the new modeset (like new
+		 * scanout buffer) don't latch properly..
+		 */
+		mdp4_irq_wait(mdp4_kms, MDP4_IRQ_EXTERNAL_VSYNC);
+
+		clk_disable_unprepare(mdp4_dtv_encoder->src_clk);
+		clk_disable_unprepare(mdp4_dtv_encoder->hdmi_clk);
+		clk_disable_unprepare(mdp4_dtv_encoder->mdp_clk);
+
+		bs_set(mdp4_dtv_encoder, 0);
+	}
+
+	mdp4_dtv_encoder->enabled = enabled;
+}
+
+static bool mdp4_dtv_encoder_mode_fixup(struct drm_encoder *encoder,
+		const struct drm_display_mode *mode,
+		struct drm_display_mode *adjusted_mode)
+{
+	return true;
+}
+
+static void mdp4_dtv_encoder_mode_set(struct drm_encoder *encoder,
+		struct drm_display_mode *mode,
+		struct drm_display_mode *adjusted_mode)
+{
+	struct mdp4_dtv_encoder *mdp4_dtv_encoder = to_mdp4_dtv_encoder(encoder);
+	struct mdp4_kms *mdp4_kms = get_kms(encoder);
+	uint32_t dtv_hsync_skew, vsync_period, vsync_len, ctrl_pol;
+	uint32_t display_v_start, display_v_end;
+	uint32_t hsync_start_x, hsync_end_x;
+
+	mode = adjusted_mode;
+
+	DBG("set mode: %d:\"%s\" %d %d %d %d %d %d %d %d %d %d 0x%x 0x%x",
+			mode->base.id, mode->name,
+			mode->vrefresh, mode->clock,
+			mode->hdisplay, mode->hsync_start,
+			mode->hsync_end, mode->htotal,
+			mode->vdisplay, mode->vsync_start,
+			mode->vsync_end, mode->vtotal,
+			mode->type, mode->flags);
+
+	mdp4_dtv_encoder->pixclock = mode->clock * 1000;
+
+	DBG("pixclock=%lu", mdp4_dtv_encoder->pixclock);
+
+	ctrl_pol = 0;
+	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+		ctrl_pol |= MDP4_DTV_CTRL_POLARITY_HSYNC_LOW;
+	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+		ctrl_pol |= MDP4_DTV_CTRL_POLARITY_VSYNC_LOW;
+	/* probably need to get DATA_EN polarity from panel.. */
+
+	dtv_hsync_skew = 0;  /* get this from panel? */
+
+	hsync_start_x = (mode->htotal - mode->hsync_start);
+	hsync_end_x = mode->htotal - (mode->hsync_start - mode->hdisplay) - 1;
+
+	vsync_period = mode->vtotal * mode->htotal;
+	vsync_len = (mode->vsync_end - mode->vsync_start) * mode->htotal;
+	display_v_start = (mode->vtotal - mode->vsync_start) * mode->htotal + dtv_hsync_skew;
+	display_v_end = vsync_period - ((mode->vsync_start - mode->vdisplay) * mode->htotal) + dtv_hsync_skew - 1;
+
+	mdp4_write(mdp4_kms, REG_MDP4_DTV_HSYNC_CTRL,
+			MDP4_DTV_HSYNC_CTRL_PULSEW(mode->hsync_end - mode->hsync_start) |
+			MDP4_DTV_HSYNC_CTRL_PERIOD(mode->htotal));
+	mdp4_write(mdp4_kms, REG_MDP4_DTV_VSYNC_PERIOD, vsync_period);
+	mdp4_write(mdp4_kms, REG_MDP4_DTV_VSYNC_LEN, vsync_len);
+	mdp4_write(mdp4_kms, REG_MDP4_DTV_DISPLAY_HCTRL,
+			MDP4_DTV_DISPLAY_HCTRL_START(hsync_start_x) |
+			MDP4_DTV_DISPLAY_HCTRL_END(hsync_end_x));
+	mdp4_write(mdp4_kms, REG_MDP4_DTV_DISPLAY_VSTART, display_v_start);
+	mdp4_write(mdp4_kms, REG_MDP4_DTV_DISPLAY_VEND, display_v_end);
+	mdp4_write(mdp4_kms, REG_MDP4_DTV_BORDER_CLR, 0);
+	mdp4_write(mdp4_kms, REG_MDP4_DTV_UNDERFLOW_CLR,
+			MDP4_DTV_UNDERFLOW_CLR_ENABLE_RECOVERY |
+			MDP4_DTV_UNDERFLOW_CLR_COLOR(0xff));
+	mdp4_write(mdp4_kms, REG_MDP4_DTV_HSYNC_SKEW, dtv_hsync_skew);
+	mdp4_write(mdp4_kms, REG_MDP4_DTV_CTRL_POLARITY, ctrl_pol);
+	mdp4_write(mdp4_kms, REG_MDP4_DTV_ACTIVE_HCTL,
+			MDP4_DTV_ACTIVE_HCTL_START(0) |
+			MDP4_DTV_ACTIVE_HCTL_END(0));
+	mdp4_write(mdp4_kms, REG_MDP4_DTV_ACTIVE_VSTART, 0);
+	mdp4_write(mdp4_kms, REG_MDP4_DTV_ACTIVE_VEND, 0);
+}
+
+static void mdp4_dtv_encoder_prepare(struct drm_encoder *encoder)
+{
+	mdp4_dtv_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
+}
+
+static void mdp4_dtv_encoder_commit(struct drm_encoder *encoder)
+{
+	mdp4_crtc_set_config(encoder->crtc,
+			MDP4_DMA_CONFIG_R_BPC(BPC8) |
+			MDP4_DMA_CONFIG_G_BPC(BPC8) |
+			MDP4_DMA_CONFIG_B_BPC(BPC8) |
+			MDP4_DMA_CONFIG_PACK(0x21));
+	mdp4_crtc_set_intf(encoder->crtc, INTF_LCDC_DTV);
+	mdp4_dtv_encoder_dpms(encoder, DRM_MODE_DPMS_ON);
+}
+
+static const struct drm_encoder_helper_funcs mdp4_dtv_encoder_helper_funcs = {
+	.dpms = mdp4_dtv_encoder_dpms,
+	.mode_fixup = mdp4_dtv_encoder_mode_fixup,
+	.mode_set = mdp4_dtv_encoder_mode_set,
+	.prepare = mdp4_dtv_encoder_prepare,
+	.commit = mdp4_dtv_encoder_commit,
+};
+
+long mdp4_dtv_round_pixclk(struct drm_encoder *encoder, unsigned long rate)
+{
+	struct mdp4_dtv_encoder *mdp4_dtv_encoder = to_mdp4_dtv_encoder(encoder);
+	return clk_round_rate(mdp4_dtv_encoder->src_clk, rate);
+}
+
+/* initialize encoder */
+struct drm_encoder *mdp4_dtv_encoder_init(struct drm_device *dev)
+{
+	struct drm_encoder *encoder = NULL;
+	struct mdp4_dtv_encoder *mdp4_dtv_encoder;
+	int ret;
+
+	mdp4_dtv_encoder = kzalloc(sizeof(*mdp4_dtv_encoder), GFP_KERNEL);
+	if (!mdp4_dtv_encoder) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	encoder = &mdp4_dtv_encoder->base;
+
+	drm_encoder_init(dev, encoder, &mdp4_dtv_encoder_funcs,
+			 DRM_MODE_ENCODER_TMDS);
+	drm_encoder_helper_add(encoder, &mdp4_dtv_encoder_helper_funcs);
+
+	mdp4_dtv_encoder->src_clk = devm_clk_get(dev->dev, "src_clk");
+	if (IS_ERR(mdp4_dtv_encoder->src_clk)) {
+		dev_err(dev->dev, "failed to get src_clk\n");
+		ret = PTR_ERR(mdp4_dtv_encoder->src_clk);
+		goto fail;
+	}
+
+	mdp4_dtv_encoder->hdmi_clk = devm_clk_get(dev->dev, "hdmi_clk");
+	if (IS_ERR(mdp4_dtv_encoder->hdmi_clk)) {
+		dev_err(dev->dev, "failed to get hdmi_clk\n");
+		ret = PTR_ERR(mdp4_dtv_encoder->hdmi_clk);
+		goto fail;
+	}
+
+	mdp4_dtv_encoder->mdp_clk = devm_clk_get(dev->dev, "mdp_clk");
+	if (IS_ERR(mdp4_dtv_encoder->mdp_clk)) {
+		dev_err(dev->dev, "failed to get mdp_clk\n");
+		ret = PTR_ERR(mdp4_dtv_encoder->mdp_clk);
+		goto fail;
+	}
+
+	bs_init(mdp4_dtv_encoder);
+
+	return encoder;
+
+fail:
+	if (encoder)
+		mdp4_dtv_encoder_destroy(encoder);
+
+	return ERR_PTR(ret);
+}
diff --git a/drivers/gpu/drm/msm/mdp4/mdp4_format.c b/drivers/gpu/drm/msm/mdp4/mdp4_format.c
new file mode 100644
index 0000000..7b645f2
--- /dev/null
+++ b/drivers/gpu/drm/msm/mdp4/mdp4_format.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#include "msm_drv.h"
+#include "mdp4_kms.h"
+
+#define FMT(name, a, r, g, b, e0, e1, e2, e3, alpha, tight, c, cnt) { \
+		.base = { .pixel_format = DRM_FORMAT_ ## name }, \
+		.bpc_a = BPC ## a ## A,                          \
+		.bpc_r = BPC ## r,                               \
+		.bpc_g = BPC ## g,                               \
+		.bpc_b = BPC ## b,                               \
+		.unpack = { e0, e1, e2, e3 },                    \
+		.alpha_enable = alpha,                           \
+		.unpack_tight = tight,                           \
+		.cpp = c,                                        \
+		.unpack_count = cnt,                             \
+	}
+
+#define BPC0A 0
+
+static const struct mdp4_format formats[] = {
+	/*  name      a  r  g  b   e0 e1 e2 e3  alpha   tight  cpp cnt */
+	FMT(ARGB8888, 8, 8, 8, 8,  1, 0, 2, 3,  true,   true,  4,  4),
+	FMT(XRGB8888, 8, 8, 8, 8,  1, 0, 2, 3,  false,  true,  4,  4),
+	FMT(RGB888,   0, 8, 8, 8,  1, 0, 2, 0,  false,  true,  3,  3),
+	FMT(BGR888,   0, 8, 8, 8,  2, 0, 1, 0,  false,  true,  3,  3),
+	FMT(RGB565,   0, 5, 6, 5,  1, 0, 2, 0,  false,  true,  2,  3),
+	FMT(BGR565,   0, 5, 6, 5,  2, 0, 1, 0,  false,  true,  2,  3),
+};
+
+const struct msm_format *mdp4_get_format(struct msm_kms *kms, uint32_t format)
+{
+	int i;
+	for (i = 0; i < ARRAY_SIZE(formats); i++) {
+		const struct mdp4_format *f = &formats[i];
+		if (f->base.pixel_format == format)
+			return &f->base;
+	}
+	return NULL;
+}
diff --git a/drivers/gpu/drm/msm/mdp4/mdp4_irq.c b/drivers/gpu/drm/msm/mdp4/mdp4_irq.c
new file mode 100644
index 0000000..5c6b7fc
--- /dev/null
+++ b/drivers/gpu/drm/msm/mdp4/mdp4_irq.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#include "msm_drv.h"
+#include "mdp4_kms.h"
+
+
+struct mdp4_irq_wait {
+	struct mdp4_irq irq;
+	int count;
+};
+
+static DECLARE_WAIT_QUEUE_HEAD(wait_event);
+
+static DEFINE_SPINLOCK(list_lock);
+
+static void update_irq(struct mdp4_kms *mdp4_kms)
+{
+	struct mdp4_irq *irq;
+	uint32_t irqmask = mdp4_kms->vblank_mask;
+
+	BUG_ON(!spin_is_locked(&list_lock));
+
+	list_for_each_entry(irq, &mdp4_kms->irq_list, node)
+		irqmask |= irq->irqmask;
+
+	mdp4_write(mdp4_kms, REG_MDP4_INTR_ENABLE, irqmask);
+}
+
+static void update_irq_unlocked(struct mdp4_kms *mdp4_kms)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&list_lock, flags);
+	update_irq(mdp4_kms);
+	spin_unlock_irqrestore(&list_lock, flags);
+}
+
+static void mdp4_irq_error_handler(struct mdp4_irq *irq, uint32_t irqstatus)
+{
+	DRM_ERROR("errors: %08x\n", irqstatus);
+}
+
+void mdp4_irq_preinstall(struct msm_kms *kms)
+{
+	struct mdp4_kms *mdp4_kms = to_mdp4_kms(kms);
+	mdp4_write(mdp4_kms, REG_MDP4_INTR_CLEAR, 0xffffffff);
+}
+
+int mdp4_irq_postinstall(struct msm_kms *kms)
+{
+	struct mdp4_kms *mdp4_kms = to_mdp4_kms(kms);
+	struct mdp4_irq *error_handler = &mdp4_kms->error_handler;
+
+	INIT_LIST_HEAD(&mdp4_kms->irq_list);
+
+	error_handler->irq = mdp4_irq_error_handler;
+	error_handler->irqmask = MDP4_IRQ_PRIMARY_INTF_UDERRUN |
+			MDP4_IRQ_EXTERNAL_INTF_UDERRUN;
+
+	mdp4_irq_register(mdp4_kms, error_handler);
+
+	return 0;
+}
+
+void mdp4_irq_uninstall(struct msm_kms *kms)
+{
+	struct mdp4_kms *mdp4_kms = to_mdp4_kms(kms);
+	mdp4_write(mdp4_kms, REG_MDP4_INTR_ENABLE, 0x00000000);
+}
+
+irqreturn_t mdp4_irq(struct msm_kms *kms)
+{
+	struct mdp4_kms *mdp4_kms = to_mdp4_kms(kms);
+	struct drm_device *dev = mdp4_kms->dev;
+	struct msm_drm_private *priv = dev->dev_private;
+	struct mdp4_irq *handler, *n;
+	unsigned long flags;
+	unsigned int id;
+	uint32_t status;
+
+	status = mdp4_read(mdp4_kms, REG_MDP4_INTR_STATUS);
+	mdp4_write(mdp4_kms, REG_MDP4_INTR_CLEAR, status);
+
+	VERB("status=%08x", status);
+
+	for (id = 0; id < priv->num_crtcs; id++)
+		if (status & mdp4_crtc_vblank(priv->crtcs[id]))
+			drm_handle_vblank(dev, id);
+
+	spin_lock_irqsave(&list_lock, flags);
+	mdp4_kms->in_irq = true;
+	list_for_each_entry_safe(handler, n, &mdp4_kms->irq_list, node) {
+		if (handler->irqmask & status) {
+			spin_unlock_irqrestore(&list_lock, flags);
+			handler->irq(handler, handler->irqmask & status);
+			spin_lock_irqsave(&list_lock, flags);
+		}
+	}
+	mdp4_kms->in_irq = false;
+	update_irq(mdp4_kms);
+	spin_unlock_irqrestore(&list_lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+int mdp4_enable_vblank(struct msm_kms *kms, struct drm_crtc *crtc)
+{
+	struct mdp4_kms *mdp4_kms = to_mdp4_kms(kms);
+	unsigned long flags;
+
+	spin_lock_irqsave(&list_lock, flags);
+	mdp4_kms->vblank_mask |= mdp4_crtc_vblank(crtc);
+	update_irq(mdp4_kms);
+	spin_unlock_irqrestore(&list_lock, flags);
+
+	return 0;
+}
+
+void mdp4_disable_vblank(struct msm_kms *kms, struct drm_crtc *crtc)
+{
+	struct mdp4_kms *mdp4_kms = to_mdp4_kms(kms);
+	unsigned long flags;
+
+	spin_lock_irqsave(&list_lock, flags);
+	mdp4_kms->vblank_mask &= ~mdp4_crtc_vblank(crtc);
+	update_irq(mdp4_kms);
+	spin_unlock_irqrestore(&list_lock, flags);
+}
+
+static void wait_irq(struct mdp4_irq *irq, uint32_t irqstatus)
+{
+	struct mdp4_irq_wait *wait =
+			container_of(irq, struct mdp4_irq_wait, irq);
+	wait->count--;
+	wake_up_all(&wait_event);
+}
+
+void mdp4_irq_wait(struct mdp4_kms *mdp4_kms, uint32_t irqmask)
+{
+	struct mdp4_irq_wait wait = {
+		.irq = {
+			.irq = wait_irq,
+			.irqmask = irqmask,
+		},
+		.count = 1,
+	};
+	mdp4_irq_register(mdp4_kms, &wait.irq);
+	wait_event(wait_event, (wait.count <= 0));
+	mdp4_irq_unregister(mdp4_kms, &wait.irq);
+}
+
+void mdp4_irq_register(struct mdp4_kms *mdp4_kms, struct mdp4_irq *irq)
+{
+	unsigned long flags;
+	bool needs_update = false;
+
+	spin_lock_irqsave(&list_lock, flags);
+
+	if (!irq->registered) {
+		irq->registered = true;
+		list_add(&irq->node, &mdp4_kms->irq_list);
+		needs_update = !mdp4_kms->in_irq;
+	}
+
+	spin_unlock_irqrestore(&list_lock, flags);
+
+	if (needs_update)
+		update_irq_unlocked(mdp4_kms);
+}
+
+void mdp4_irq_unregister(struct mdp4_kms *mdp4_kms, struct mdp4_irq *irq)
+{
+	unsigned long flags;
+	bool needs_update = false;
+
+	spin_lock_irqsave(&list_lock, flags);
+
+	if (irq->registered) {
+		irq->registered = false;
+		list_del(&irq->node);
+		needs_update = !mdp4_kms->in_irq;
+	}
+
+	spin_unlock_irqrestore(&list_lock, flags);
+
+	if (needs_update)
+		update_irq_unlocked(mdp4_kms);
+}
diff --git a/drivers/gpu/drm/msm/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/mdp4/mdp4_kms.c
new file mode 100644
index 0000000..5db5bba
--- /dev/null
+++ b/drivers/gpu/drm/msm/mdp4/mdp4_kms.c
@@ -0,0 +1,365 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#include "msm_drv.h"
+#include "mdp4_kms.h"
+
+#include <mach/iommu.h>
+
+static struct mdp4_platform_config *mdp4_get_config(struct platform_device *dev);
+
+static int mdp4_hw_init(struct msm_kms *kms)
+{
+	struct mdp4_kms *mdp4_kms = to_mdp4_kms(kms);
+	struct drm_device *dev = mdp4_kms->dev;
+	uint32_t version, major, minor, dmap_cfg, vg_cfg;
+	unsigned long clk;
+	int ret = 0;
+
+	pm_runtime_get_sync(dev->dev);
+
+	version = mdp4_read(mdp4_kms, REG_MDP4_VERSION);
+
+	major = FIELD(version, MDP4_VERSION_MAJOR);
+	minor = FIELD(version, MDP4_VERSION_MINOR);
+
+	DBG("found MDP version v%d.%d", major, minor);
+
+	if (major != 4) {
+		dev_err(dev->dev, "unexpected MDP version: v%d.%d\n",
+				major, minor);
+		ret = -ENXIO;
+		goto out;
+	}
+
+	mdp4_kms->rev = minor;
+
+	if (mdp4_kms->dsi_pll_vdda) {
+		if ((mdp4_kms->rev == 2) || (mdp4_kms->rev == 4)) {
+			ret = regulator_set_voltage(mdp4_kms->dsi_pll_vdda,
+					1200000, 1200000);
+			if (ret) {
+				dev_err(dev->dev,
+					"failed to set dsi_pll_vdda voltage: %d\n", ret);
+				goto out;
+			}
+		}
+	}
+
+	if (mdp4_kms->dsi_pll_vddio) {
+		if (mdp4_kms->rev == 2) {
+			ret = regulator_set_voltage(mdp4_kms->dsi_pll_vddio,
+					1800000, 1800000);
+			if (ret) {
+				dev_err(dev->dev,
+					"failed to set dsi_pll_vddio voltage: %d\n", ret);
+				goto out;
+			}
+		}
+	}
+
+	if (mdp4_kms->rev > 1) {
+		mdp4_write(mdp4_kms, REG_MDP4_CS_CONTROLLER0, 0x0707ffff);
+		mdp4_write(mdp4_kms, REG_MDP4_CS_CONTROLLER1, 0x03073f3f);
+	}
+
+	mdp4_write(mdp4_kms, REG_MDP4_PORTMAP_MODE, 0x3);
+
+	/* max read pending cmd config, 3 pending requests: */
+	mdp4_write(mdp4_kms, REG_MDP4_READ_CNFG, 0x02222);
+
+	clk = clk_get_rate(mdp4_kms->clk);
+
+	if ((mdp4_kms->rev >= 1) || (clk >= 90000000)) {
+		dmap_cfg = 0x47;     /* 16 bytes-burst x 8 req */
+		vg_cfg = 0x47;       /* 16 bytes-burs x 8 req */
+	} else {
+		dmap_cfg = 0x27;     /* 8 bytes-burst x 8 req */
+		vg_cfg = 0x43;       /* 16 bytes-burst x 4 req */
+	}
+
+	DBG("fetch config: dmap=%02x, vg=%02x", dmap_cfg, vg_cfg);
+
+	mdp4_write(mdp4_kms, REG_MDP4_DMA_FETCH_CONFIG(DMA_P), dmap_cfg);
+	mdp4_write(mdp4_kms, REG_MDP4_DMA_FETCH_CONFIG(DMA_E), dmap_cfg);
+
+	mdp4_write(mdp4_kms, REG_MDP4_PIPE_FETCH_CONFIG(VG1), vg_cfg);
+	mdp4_write(mdp4_kms, REG_MDP4_PIPE_FETCH_CONFIG(VG2), vg_cfg);
+	mdp4_write(mdp4_kms, REG_MDP4_PIPE_FETCH_CONFIG(RGB1), vg_cfg);
+	mdp4_write(mdp4_kms, REG_MDP4_PIPE_FETCH_CONFIG(RGB2), vg_cfg);
+
+	if (mdp4_kms->rev >= 2)
+		mdp4_write(mdp4_kms, REG_MDP4_LAYERMIXER_IN_CFG_UPDATE_METHOD, 1);
+
+	/* disable CSC matrix / YUV by default: */
+	mdp4_write(mdp4_kms, REG_MDP4_PIPE_OP_MODE(VG1), 0);
+	mdp4_write(mdp4_kms, REG_MDP4_PIPE_OP_MODE(VG2), 0);
+	mdp4_write(mdp4_kms, REG_MDP4_DMA_P_OP_MODE, 0);
+	mdp4_write(mdp4_kms, REG_MDP4_DMA_S_OP_MODE, 0);
+	mdp4_write(mdp4_kms, REG_MDP4_OVLP_CSC_CONFIG(1), 0);
+	mdp4_write(mdp4_kms, REG_MDP4_OVLP_CSC_CONFIG(2), 0);
+
+	if (mdp4_kms->rev > 1)
+		mdp4_write(mdp4_kms, REG_MDP4_RESET_STATUS, 1);
+
+out:
+	pm_runtime_put_sync(dev->dev);
+
+	return ret;
+}
+
+static long mdp4_round_pixclk(struct msm_kms *kms, unsigned long rate,
+		struct drm_encoder *encoder)
+{
+	/* if we had >1 encoder, we'd need something more clever: */
+	return mdp4_dtv_round_pixclk(encoder, rate);
+}
+
+static void mdp4_preclose(struct msm_kms *kms, struct drm_file *file)
+{
+	struct mdp4_kms *mdp4_kms = to_mdp4_kms(kms);
+	struct msm_drm_private *priv = mdp4_kms->dev->dev_private;
+	unsigned i;
+
+	for (i = 0; i < priv->num_crtcs; i++)
+		mdp4_crtc_cancel_pending_flip(priv->crtcs[i]);
+}
+
+static void mdp4_destroy(struct msm_kms *kms)
+{
+	struct mdp4_kms *mdp4_kms = to_mdp4_kms(kms);
+	kfree(mdp4_kms);
+}
+
+static const struct msm_kms_funcs kms_funcs = {
+		.hw_init         = mdp4_hw_init,
+		.irq_preinstall  = mdp4_irq_preinstall,
+		.irq_postinstall = mdp4_irq_postinstall,
+		.irq_uninstall   = mdp4_irq_uninstall,
+		.irq             = mdp4_irq,
+		.enable_vblank   = mdp4_enable_vblank,
+		.disable_vblank  = mdp4_disable_vblank,
+		.get_format      = mdp4_get_format,
+		.round_pixclk    = mdp4_round_pixclk,
+		.preclose        = mdp4_preclose,
+		.destroy         = mdp4_destroy,
+};
+
+int mdp4_disable(struct mdp4_kms *mdp4_kms)
+{
+	DBG("");
+
+	clk_disable_unprepare(mdp4_kms->clk);
+	if (mdp4_kms->pclk)
+		clk_disable_unprepare(mdp4_kms->pclk);
+	clk_disable_unprepare(mdp4_kms->lut_clk);
+
+	return 0;
+}
+
+int mdp4_enable(struct mdp4_kms *mdp4_kms)
+{
+	DBG("");
+
+	clk_prepare_enable(mdp4_kms->clk);
+	if (mdp4_kms->pclk)
+		clk_prepare_enable(mdp4_kms->pclk);
+	clk_prepare_enable(mdp4_kms->lut_clk);
+
+	return 0;
+}
+
+static int modeset_init(struct mdp4_kms *mdp4_kms)
+{
+	struct drm_device *dev = mdp4_kms->dev;
+	struct msm_drm_private *priv = dev->dev_private;
+	struct drm_plane *plane;
+	struct drm_crtc *crtc;
+	struct drm_encoder *encoder;
+	int ret;
+
+	/*
+	 *  NOTE: this is a bit simplistic until we add support
+	 * for more than just RGB1->DMA_E->DTV->HDMI
+	 */
+
+	/* the CRTCs get constructed with a private plane: */
+	plane = mdp4_plane_init(dev, RGB1, true);
+	if (IS_ERR(plane)) {
+		dev_err(dev->dev, "failed to construct plane for RGB1\n");
+		ret = PTR_ERR(plane);
+		goto fail;
+	}
+
+	crtc  = mdp4_crtc_init(dev, plane, priv->num_crtcs, 1, DMA_E);
+	if (IS_ERR(crtc)) {
+		dev_err(dev->dev, "failed to construct crtc for DMA_E\n");
+		ret = PTR_ERR(crtc);
+		goto fail;
+	}
+	priv->crtcs[priv->num_crtcs++] = crtc;
+
+	encoder = mdp4_dtv_encoder_init(dev);
+	if (IS_ERR(encoder)) {
+		dev_err(dev->dev, "failed to construct DTV encoder\n");
+		ret = PTR_ERR(encoder);
+		goto fail;
+	}
+	encoder->possible_crtcs = 0x1;     /* DTV can be hooked to DMA_E */
+	priv->encoders[priv->num_encoders++] = encoder;
+
+	ret = hdmi_init(dev, encoder);
+	if (ret) {
+		dev_err(dev->dev, "failed to initialize HDMI\n");
+		goto fail;
+	}
+
+	return 0;
+
+fail:
+	return ret;
+}
+
+static const char *iommu_ports[] = {
+		"mdp_port0_cb0", "mdp_port1_cb0",
+};
+
+struct msm_kms *mdp4_kms_init(struct drm_device *dev)
+{
+	struct platform_device *pdev = dev->platformdev;
+	struct mdp4_platform_config *config = mdp4_get_config(pdev);
+	struct mdp4_kms *mdp4_kms;
+	struct msm_kms *kms = NULL;
+	int ret;
+
+	mdp4_kms = kzalloc(sizeof(*mdp4_kms), GFP_KERNEL);
+	if (!mdp4_kms) {
+		dev_err(dev->dev, "failed to allocate kms\n");
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	kms = &mdp4_kms->base;
+	kms->funcs = &kms_funcs;
+
+	mdp4_kms->dev = dev;
+
+	mdp4_kms->mmio = msm_ioremap(pdev, NULL, "MDP4");
+	if (IS_ERR(mdp4_kms->mmio)) {
+		ret = PTR_ERR(mdp4_kms->mmio);
+		goto fail;
+	}
+
+	mdp4_kms->dsi_pll_vdda = devm_regulator_get(&pdev->dev, "dsi_pll_vdda");
+	if (IS_ERR(mdp4_kms->dsi_pll_vdda))
+		mdp4_kms->dsi_pll_vdda = NULL;
+
+	mdp4_kms->dsi_pll_vddio = devm_regulator_get(&pdev->dev, "dsi_pll_vddio");
+	if (IS_ERR(mdp4_kms->dsi_pll_vddio))
+		mdp4_kms->dsi_pll_vddio = NULL;
+
+	mdp4_kms->vdd = devm_regulator_get(&pdev->dev, "vdd");
+	if (IS_ERR(mdp4_kms->vdd))
+		mdp4_kms->vdd = NULL;
+
+	if (mdp4_kms->vdd) {
+		ret = regulator_enable(mdp4_kms->vdd);
+		if (ret) {
+			dev_err(dev->dev, "failed to enable regulator vdd: %d\n", ret);
+			goto fail;
+		}
+	}
+
+	mdp4_kms->clk = devm_clk_get(&pdev->dev, "core_clk");
+	if (IS_ERR(mdp4_kms->clk)) {
+		dev_err(dev->dev, "failed to get core_clk\n");
+		ret = PTR_ERR(mdp4_kms->clk);
+		goto fail;
+	}
+
+	mdp4_kms->pclk = devm_clk_get(&pdev->dev, "iface_clk");
+	if (IS_ERR(mdp4_kms->pclk))
+		mdp4_kms->pclk = NULL;
+
+	// XXX if (rev >= MDP_REV_42) { ???
+	mdp4_kms->lut_clk = devm_clk_get(&pdev->dev, "lut_clk");
+	if (IS_ERR(mdp4_kms->lut_clk)) {
+		dev_err(dev->dev, "failed to get lut_clk\n");
+		ret = PTR_ERR(mdp4_kms->lut_clk);
+		goto fail;
+	}
+
+	clk_set_rate(mdp4_kms->clk, config->max_clk);
+	clk_set_rate(mdp4_kms->lut_clk, config->max_clk);
+
+	if (!config->iommu) {
+		dev_err(dev->dev, "no iommu\n");
+		ret = -ENXIO;
+		goto fail;
+	}
+
+	/* make sure things are off before attaching iommu (bootloader could
+	 * have left things on, in which case we'll start getting faults if
+	 * we don't disable):
+	 */
+	mdp4_write(mdp4_kms, REG_MDP4_DTV_ENABLE, 0);
+	mdp4_write(mdp4_kms, REG_MDP4_LCDC_ENABLE, 0);
+	mdp4_write(mdp4_kms, REG_MDP4_DSI_ENABLE, 0);
+	mdelay(16);
+
+	ret = msm_iommu_attach(dev, config->iommu,
+			iommu_ports, ARRAY_SIZE(iommu_ports));
+	if (ret)
+		goto fail;
+
+	mdp4_kms->id = msm_register_iommu(dev, config->iommu);
+	if (mdp4_kms->id < 0) {
+		ret = mdp4_kms->id;
+		dev_err(dev->dev, "failed to register mdp4 iommu: %d\n", ret);
+		goto fail;
+	}
+
+	ret = modeset_init(mdp4_kms);
+	if (ret) {
+		dev_err(dev->dev, "modeset_init failed: %d\n", ret);
+		goto fail;
+	}
+
+	return kms;
+
+fail:
+	if (kms)
+		mdp4_destroy(kms);
+	return ERR_PTR(ret);
+}
+
+static struct mdp4_platform_config *mdp4_get_config(struct platform_device *dev)
+{
+	static struct mdp4_platform_config config = {};
+#ifdef CONFIG_OF
+	/* TODO */
+#else
+	if (cpu_is_apq8064())
+		config.max_clk = 266667000;
+	else
+		config.max_clk = 200000000;
+
+	config.iommu = msm_get_iommu_domain(DISPLAY_READ_DOMAIN);
+#endif
+	return &config;
+}
diff --git a/drivers/gpu/drm/msm/mdp4/mdp4_kms.h b/drivers/gpu/drm/msm/mdp4/mdp4_kms.h
new file mode 100644
index 0000000..1e83554
--- /dev/null
+++ b/drivers/gpu/drm/msm/mdp4/mdp4_kms.h
@@ -0,0 +1,194 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __MDP4_KMS_H__
+#define __MDP4_KMS_H__
+
+#include <linux/clk.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/consumer.h>
+
+#include "msm_drv.h"
+#include "mdp4.xml.h"
+
+
+/* For transiently registering for different MDP4 irqs that various parts
+ * of the KMS code need during setup/configuration.  We these are not
+ * necessarily the same as what drm_vblank_get/put() are requesting, and
+ * the hysteresis in drm_vblank_put() is not necessarily desirable for
+ * internal housekeeping related irq usage.
+ */
+struct mdp4_irq {
+	struct list_head node;
+	uint32_t irqmask;
+	bool registered;
+	void (*irq)(struct mdp4_irq *irq, uint32_t irqstatus);
+};
+
+struct mdp4_kms {
+	struct msm_kms base;
+
+	struct drm_device *dev;
+
+	int rev;
+
+	/* mapper-id used to request GEM buffer mapped for scanout: */
+	int id;
+
+	void __iomem *mmio;
+
+	struct regulator *dsi_pll_vdda;
+	struct regulator *dsi_pll_vddio;
+	struct regulator *vdd;
+
+	struct clk *clk;
+	struct clk *pclk;
+	struct clk *lut_clk;
+
+	/* irq handling: */
+	bool in_irq;
+	struct list_head irq_list;    /* list of mdp4_irq */
+	uint32_t vblank_mask;         /* irq bits set for userspace vblank */
+	struct mdp4_irq error_handler;
+};
+#define to_mdp4_kms(x) container_of(x, struct mdp4_kms, base)
+
+/* platform config data (ie. from DT, or pdata) */
+struct mdp4_platform_config {
+	struct iommu_domain *iommu;
+	uint32_t max_clk;
+};
+
+struct mdp4_format {
+	struct msm_format base;
+	enum mpd4_bpc bpc_r, bpc_g, bpc_b;
+	enum mpd4_bpc_alpha bpc_a;
+	uint8_t unpack[4];
+	bool alpha_enable, unpack_tight;
+	uint8_t cpp, unpack_count;
+};
+#define to_mdp4_format(x) container_of(x, struct mdp4_format, base)
+
+static inline void mdp4_write(struct mdp4_kms *mdp4_kms, u32 reg, u32 data)
+{
+	msm_writel(data, mdp4_kms->mmio + reg);
+}
+
+static inline u32 mdp4_read(struct mdp4_kms *mdp4_kms, u32 reg)
+{
+	return msm_readl(mdp4_kms->mmio + reg);
+}
+
+static inline uint32_t pipe2flush(enum mpd4_pipe pipe)
+{
+	switch (pipe) {
+	case VG1:      return MDP4_OVERLAY_FLUSH_VG1;
+	case VG2:      return MDP4_OVERLAY_FLUSH_VG2;
+	case RGB1:     return MDP4_OVERLAY_FLUSH_RGB1;
+	case RGB2:     return MDP4_OVERLAY_FLUSH_RGB1;
+	default:       return 0;
+	}
+}
+
+static inline uint32_t ovlp2flush(int ovlp)
+{
+	switch (ovlp) {
+	case 0:        return MDP4_OVERLAY_FLUSH_OVLP0;
+	case 1:        return MDP4_OVERLAY_FLUSH_OVLP1;
+	default:       return 0;
+	}
+}
+
+static inline uint32_t dma2irq(enum mdp4_dma dma)
+{
+	switch (dma) {
+	case DMA_P:    return MDP4_IRQ_DMA_P_DONE;
+	case DMA_S:    return MDP4_IRQ_DMA_S_DONE;
+	case DMA_E:    return MDP4_IRQ_DMA_E_DONE;
+	default:       return 0;
+	}
+}
+
+static inline uint32_t dma2err(enum mdp4_dma dma)
+{
+	switch (dma) {
+	case DMA_P:    return MDP4_IRQ_PRIMARY_INTF_UDERRUN;
+	case DMA_S:    return 0;  // ???
+	case DMA_E:    return MDP4_IRQ_EXTERNAL_INTF_UDERRUN;
+	default:       return 0;
+	}
+}
+
+int mdp4_disable(struct mdp4_kms *mdp4_kms);
+int mdp4_enable(struct mdp4_kms *mdp4_kms);
+
+void mdp4_irq_preinstall(struct msm_kms *kms);
+int mdp4_irq_postinstall(struct msm_kms *kms);
+void mdp4_irq_uninstall(struct msm_kms *kms);
+irqreturn_t mdp4_irq(struct msm_kms *kms);
+void mdp4_irq_wait(struct mdp4_kms *mdp4_kms, uint32_t irqmask);
+void mdp4_irq_register(struct mdp4_kms *mdp4_kms, struct mdp4_irq *irq);
+void mdp4_irq_unregister(struct mdp4_kms *mdp4_kms, struct mdp4_irq *irq);
+int mdp4_enable_vblank(struct msm_kms *kms, struct drm_crtc *crtc);
+void mdp4_disable_vblank(struct msm_kms *kms, struct drm_crtc *crtc);
+
+const struct msm_format *mdp4_get_format(struct msm_kms *kms, uint32_t format);
+
+void mdp4_plane_install_properties(struct drm_plane *plane,
+		struct drm_mode_object *obj);
+void mdp4_plane_set_scanout(struct drm_plane *plane,
+		struct drm_framebuffer *fb);
+int mdp4_plane_mode_set(struct drm_plane *plane,
+		struct drm_crtc *crtc, struct drm_framebuffer *fb,
+		int crtc_x, int crtc_y,
+		unsigned int crtc_w, unsigned int crtc_h,
+		uint32_t src_x, uint32_t src_y,
+		uint32_t src_w, uint32_t src_h);
+enum mpd4_pipe mdp4_plane_pipe(struct drm_plane *plane);
+struct drm_plane *mdp4_plane_init(struct drm_device *dev,
+		enum mpd4_pipe pipe_id, bool private_plane);
+
+uint32_t mdp4_crtc_vblank(struct drm_crtc *crtc);
+void mdp4_crtc_cancel_pending_flip(struct drm_crtc *crtc);
+void mdp4_crtc_set_config(struct drm_crtc *crtc, uint32_t config);
+void mdp4_crtc_set_intf(struct drm_crtc *crtc, enum mdp4_intf intf);
+struct drm_crtc *mdp4_crtc_init(struct drm_device *dev,
+		struct drm_plane *plane, int id, int ovlp_id,
+		enum mdp4_dma dma_id);
+
+long mdp4_dtv_round_pixclk(struct drm_encoder *encoder, unsigned long rate);
+struct drm_encoder *mdp4_dtv_encoder_init(struct drm_device *dev);
+
+#ifdef CONFIG_MSM_BUS_SCALING
+static inline int match_dev_name(struct device *dev, void *data)
+{
+	return !strcmp(dev_name(dev), data);
+}
+/* bus scaling data is associated with extra pointless platform devices,
+ * "dtv", etc.. this is a bit of a hack, but we need a way for encoders
+ * to find their pdata to make the bus-scaling stuff work.
+ */
+static inline void *mdp4_find_pdata(const char *devname)
+{
+	struct device *dev;
+	dev = bus_find_device(&platform_bus_type, NULL,
+			(void *)devname, match_dev_name);
+	return dev ? dev->platform_data : NULL;
+}
+#endif
+
+#endif /* __MDP4_KMS_H__ */
diff --git a/drivers/gpu/drm/msm/mdp4/mdp4_plane.c b/drivers/gpu/drm/msm/mdp4/mdp4_plane.c
new file mode 100644
index 0000000..3468229
--- /dev/null
+++ b/drivers/gpu/drm/msm/mdp4/mdp4_plane.c
@@ -0,0 +1,243 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "mdp4_kms.h"
+
+
+struct mdp4_plane {
+	struct drm_plane base;
+	const char *name;
+
+	enum mpd4_pipe pipe;
+
+	uint32_t nformats;
+	uint32_t formats[32];
+
+	bool enabled;
+};
+#define to_mdp4_plane(x) container_of(x, struct mdp4_plane, base)
+
+static struct mdp4_kms *get_kms(struct drm_plane *plane)
+{
+	struct msm_drm_private *priv = plane->dev->dev_private;
+	return to_mdp4_kms(priv->kms);
+}
+
+static int mdp4_plane_update(struct drm_plane *plane,
+		struct drm_crtc *crtc, struct drm_framebuffer *fb,
+		int crtc_x, int crtc_y,
+		unsigned int crtc_w, unsigned int crtc_h,
+		uint32_t src_x, uint32_t src_y,
+		uint32_t src_w, uint32_t src_h)
+{
+	struct mdp4_plane *mdp4_plane = to_mdp4_plane(plane);
+
+	mdp4_plane->enabled = true;
+
+	if (plane->fb)
+		drm_framebuffer_unreference(plane->fb);
+
+	drm_framebuffer_reference(fb);
+
+	return mdp4_plane_mode_set(plane, crtc, fb,
+			crtc_x, crtc_y, crtc_w, crtc_h,
+			src_x, src_y, src_w, src_h);
+}
+
+static int mdp4_plane_disable(struct drm_plane *plane)
+{
+	struct mdp4_plane *mdp4_plane = to_mdp4_plane(plane);
+	DBG("%s: TODO", mdp4_plane->name); // XXX
+	return 0;
+}
+
+static void mdp4_plane_destroy(struct drm_plane *plane)
+{
+	struct mdp4_plane *mdp4_plane = to_mdp4_plane(plane);
+
+	mdp4_plane_disable(plane);
+	drm_plane_cleanup(plane);
+
+	kfree(mdp4_plane);
+}
+
+/* helper to install properties which are common to planes and crtcs */
+void mdp4_plane_install_properties(struct drm_plane *plane,
+		struct drm_mode_object *obj)
+{
+	// XXX
+}
+
+int mdp4_plane_set_property(struct drm_plane *plane,
+		struct drm_property *property, uint64_t val)
+{
+	// XXX
+	return -EINVAL;
+}
+
+static const struct drm_plane_funcs mdp4_plane_funcs = {
+		.update_plane = mdp4_plane_update,
+		.disable_plane = mdp4_plane_disable,
+		.destroy = mdp4_plane_destroy,
+		.set_property = mdp4_plane_set_property,
+};
+
+void mdp4_plane_set_scanout(struct drm_plane *plane,
+		struct drm_framebuffer *fb)
+{
+	struct mdp4_plane *mdp4_plane = to_mdp4_plane(plane);
+	struct mdp4_kms *mdp4_kms = get_kms(plane);
+	enum mpd4_pipe pipe = mdp4_plane->pipe;
+	uint32_t iova;
+
+	mdp4_write(mdp4_kms, REG_MDP4_PIPE_SRC_STRIDE_A(pipe),
+			MDP4_PIPE_SRC_STRIDE_A_P0(fb->pitches[0]) |
+			MDP4_PIPE_SRC_STRIDE_A_P1(fb->pitches[1]));
+
+	mdp4_write(mdp4_kms, REG_MDP4_PIPE_SRC_STRIDE_B(pipe),
+			MDP4_PIPE_SRC_STRIDE_B_P2(fb->pitches[2]) |
+			MDP4_PIPE_SRC_STRIDE_B_P3(fb->pitches[3]));
+
+	msm_gem_get_iova(msm_framebuffer_bo(fb, 0), mdp4_kms->id, &iova);
+	mdp4_write(mdp4_kms, REG_MDP4_PIPE_SRCP0_BASE(pipe), iova);
+
+	plane->fb = fb;
+}
+
+#define MDP4_VG_PHASE_STEP_DEFAULT	0x20000000
+
+int mdp4_plane_mode_set(struct drm_plane *plane,
+		struct drm_crtc *crtc, struct drm_framebuffer *fb,
+		int crtc_x, int crtc_y,
+		unsigned int crtc_w, unsigned int crtc_h,
+		uint32_t src_x, uint32_t src_y,
+		uint32_t src_w, uint32_t src_h)
+{
+	struct mdp4_plane *mdp4_plane = to_mdp4_plane(plane);
+	struct mdp4_kms *mdp4_kms = get_kms(plane);
+	enum mpd4_pipe pipe = mdp4_plane->pipe;
+	const struct mdp4_format *format;
+	uint32_t op_mode = 0;
+	uint32_t phasex_step = MDP4_VG_PHASE_STEP_DEFAULT;
+	uint32_t phasey_step = MDP4_VG_PHASE_STEP_DEFAULT;
+
+	/* src values are in Q16 fixed point, convert to integer: */
+	src_x = src_x >> 16;
+	src_y = src_y >> 16;
+	src_w = src_w >> 16;
+	src_h = src_h >> 16;
+
+	if (src_w != crtc_w) {
+		op_mode |= MDP4_PIPE_OP_MODE_SCALEX_EN;
+		/* TODO calc phasex_step */
+	}
+
+	if (src_h != crtc_h) {
+		op_mode |= MDP4_PIPE_OP_MODE_SCALEY_EN;
+		/* TODO calc phasey_step */
+	}
+
+	mdp4_write(mdp4_kms, REG_MDP4_PIPE_SRC_SIZE(pipe),
+			MDP4_PIPE_SRC_SIZE_WIDTH(src_w) |
+			MDP4_PIPE_SRC_SIZE_HEIGHT(src_h));
+
+	mdp4_write(mdp4_kms, REG_MDP4_PIPE_SRC_XY(pipe),
+			MDP4_PIPE_SRC_XY_X(src_x) |
+			MDP4_PIPE_SRC_XY_Y(src_y));
+
+	mdp4_write(mdp4_kms, REG_MDP4_PIPE_DST_SIZE(pipe),
+			MDP4_PIPE_DST_SIZE_WIDTH(crtc_w) |
+			MDP4_PIPE_DST_SIZE_HEIGHT(crtc_h));
+
+	mdp4_write(mdp4_kms, REG_MDP4_PIPE_DST_XY(pipe),
+			MDP4_PIPE_SRC_XY_X(crtc_x) |
+			MDP4_PIPE_SRC_XY_Y(crtc_y));
+
+	mdp4_plane_set_scanout(plane, fb);
+
+	format = to_mdp4_format(msm_framebuffer_format(fb));
+
+	mdp4_write(mdp4_kms, REG_MDP4_PIPE_SRC_FORMAT(pipe),
+			MDP4_PIPE_SRC_FORMAT_A_BPC(format->bpc_a) |
+			MDP4_PIPE_SRC_FORMAT_R_BPC(format->bpc_r) |
+			MDP4_PIPE_SRC_FORMAT_G_BPC(format->bpc_g) |
+			MDP4_PIPE_SRC_FORMAT_B_BPC(format->bpc_b) |
+			COND(format->alpha_enable, MDP4_PIPE_SRC_FORMAT_ALPHA_ENABLE) |
+			MDP4_PIPE_SRC_FORMAT_CPP(format->cpp - 1) |
+			MDP4_PIPE_SRC_FORMAT_UNPACK_COUNT(format->unpack_count - 1) |
+			COND(format->unpack_tight, MDP4_PIPE_SRC_FORMAT_UNPACK_TIGHT));
+
+	mdp4_write(mdp4_kms, REG_MDP4_PIPE_SRC_UNPACK(pipe),
+			MDP4_PIPE_SRC_UNPACK_ELEM0(format->unpack[0]) |
+			MDP4_PIPE_SRC_UNPACK_ELEM1(format->unpack[1]) |
+			MDP4_PIPE_SRC_UNPACK_ELEM2(format->unpack[2]) |
+			MDP4_PIPE_SRC_UNPACK_ELEM3(format->unpack[3]));
+
+	mdp4_write(mdp4_kms, REG_MDP4_PIPE_OP_MODE(pipe), op_mode);
+	mdp4_write(mdp4_kms, REG_MDP4_PIPE_PHASEX_STEP(pipe), phasex_step);
+	mdp4_write(mdp4_kms, REG_MDP4_PIPE_PHASEY_STEP(pipe), phasey_step);
+
+	plane->crtc = crtc;
+
+	return 0;
+}
+
+static const char *pipe_names[] = {
+		"VG1", "VG2",
+		"RGB1", "RGB2", "RGB3",
+		"VG3", "VG4",
+};
+
+enum mpd4_pipe mdp4_plane_pipe(struct drm_plane *plane)
+{
+	struct mdp4_plane *mdp4_plane = to_mdp4_plane(plane);
+	return mdp4_plane->pipe;
+}
+
+/* initialize plane */
+struct drm_plane *mdp4_plane_init(struct drm_device *dev,
+		enum mpd4_pipe pipe_id, bool private_plane)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct drm_plane *plane = NULL;
+	struct mdp4_plane *mdp4_plane;
+	int ret;
+
+	mdp4_plane = kzalloc(sizeof(*mdp4_plane), GFP_KERNEL);
+	if (!mdp4_plane) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	plane = &mdp4_plane->base;
+
+	mdp4_plane->pipe = pipe_id;
+	mdp4_plane->name = pipe_names[pipe_id];
+
+	drm_plane_init(dev, plane, (1 << priv->num_crtcs) - 1, &mdp4_plane_funcs,
+			mdp4_plane->formats, mdp4_plane->nformats, private_plane);
+
+	mdp4_plane_install_properties(plane, &plane->base);
+
+	return plane;
+
+fail:
+	if (plane)
+		mdp4_plane_destroy(plane);
+
+	return ERR_PTR(ret);
+}
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
new file mode 100644
index 0000000..864c977
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -0,0 +1,776 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "msm_drv.h"
+#include "msm_gpu.h"
+
+#include <mach/iommu.h>
+
+static void msm_fb_output_poll_changed(struct drm_device *dev)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	if (priv->fbdev)
+		drm_fb_helper_hotplug_event(priv->fbdev);
+}
+
+static const struct drm_mode_config_funcs mode_config_funcs = {
+	.fb_create = msm_framebuffer_create,
+	.output_poll_changed = msm_fb_output_poll_changed,
+};
+
+static int msm_fault_handler(struct iommu_domain *iommu, struct device *dev,
+		unsigned long iova, int flags, void *arg)
+{
+	DBG("*** fault: iova=%08lx, flags=%d", iova, flags);
+	return 0;
+}
+
+int msm_register_iommu(struct drm_device *dev, struct iommu_domain *iommu)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	int idx = priv->num_iommus++;
+
+	if (WARN_ON(idx >= ARRAY_SIZE(priv->iommus)))
+		return -EINVAL;
+
+	priv->iommus[idx] = iommu;
+
+	iommu_set_fault_handler(iommu, msm_fault_handler, dev);
+
+	/* need to iommu_attach_device() somewhere??  on resume?? */
+
+	return idx;
+}
+
+int msm_iommu_attach(struct drm_device *dev, struct iommu_domain *iommu,
+		const char **names, int cnt)
+{
+	int i, ret;
+
+	for (i = 0; i < cnt; i++) {
+		struct device *ctx = msm_iommu_get_ctx(names[i]);
+		if (!ctx)
+			continue;
+		ret = iommu_attach_device(iommu, ctx);
+		if (ret) {
+			dev_warn(dev->dev, "could not attach iommu to %s", names[i]);
+			return ret;
+		}
+	}
+	return 0;
+}
+
+#ifdef CONFIG_DRM_MSM_REGISTER_LOGGING
+static bool reglog = false;
+MODULE_PARM_DESC(reglog, "Enable register read/write logging");
+module_param(reglog, bool, 0600);
+#else
+#define reglog 0
+#endif
+
+void __iomem *msm_ioremap(struct platform_device *pdev, const char *name,
+		const char *dbgname)
+{
+	struct resource *res;
+	unsigned long size;
+	void __iomem *ptr;
+
+	if (name)
+		res = platform_get_resource_byname(pdev, IORESOURCE_MEM, name);
+	else
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+	if (!res) {
+		dev_err(&pdev->dev, "failed to get memory resource: %s\n", name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	size = resource_size(res);
+
+	ptr = devm_ioremap_nocache(&pdev->dev, res->start, size);
+	if (!ptr) {
+		dev_err(&pdev->dev, "failed to ioremap: %s\n", name);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	if (reglog)
+		printk(KERN_DEBUG "IO:region %s %08x %08lx\n", dbgname, (u32)ptr, size);
+
+	return ptr;
+}
+
+void msm_writel(u32 data, void __iomem *addr)
+{
+	if (reglog)
+		printk(KERN_DEBUG "IO:W %08x %08x\n", (u32)addr, data);
+	writel(data, addr);
+}
+
+u32 msm_readl(const void __iomem *addr)
+{
+	u32 val = readl(addr);
+	if (reglog)
+		printk(KERN_ERR "IO:R %08x %08x\n", (u32)addr, val);
+	return val;
+}
+
+/*
+ * DRM operations:
+ */
+
+static int msm_unload(struct drm_device *dev)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct msm_kms *kms = priv->kms;
+	struct msm_gpu *gpu = priv->gpu;
+
+	drm_kms_helper_poll_fini(dev);
+	drm_mode_config_cleanup(dev);
+	drm_vblank_cleanup(dev);
+
+	pm_runtime_get_sync(dev->dev);
+	drm_irq_uninstall(dev);
+	pm_runtime_put_sync(dev->dev);
+
+	flush_workqueue(priv->wq);
+	destroy_workqueue(priv->wq);
+
+	if (kms) {
+		pm_runtime_disable(dev->dev);
+		kms->funcs->destroy(kms);
+	}
+
+	if (gpu) {
+		mutex_lock(&dev->struct_mutex);
+		gpu->funcs->pm_suspend(gpu);
+		gpu->funcs->destroy(gpu);
+		mutex_unlock(&dev->struct_mutex);
+	}
+
+	dev->dev_private = NULL;
+
+	kfree(priv);
+
+	return 0;
+}
+
+static int msm_load(struct drm_device *dev, unsigned long flags)
+{
+	struct platform_device *pdev = dev->platformdev;
+	struct msm_drm_private *priv;
+	struct msm_kms *kms;
+	int ret;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		dev_err(dev->dev, "failed to allocate private data\n");
+		return -ENOMEM;
+	}
+
+	dev->dev_private = priv;
+
+	priv->wq = alloc_ordered_workqueue("msm", 0);
+	init_waitqueue_head(&priv->fence_event);
+
+	INIT_LIST_HEAD(&priv->inactive_list);
+
+	drm_mode_config_init(dev);
+
+	kms = mdp4_kms_init(dev);
+	if (IS_ERR(kms)) {
+		/*
+		 * NOTE: once we have GPU support, having no kms should not
+		 * be considered fatal.. ideally we would still support gpu
+		 * and (for example) use dmabuf/prime to share buffers with
+		 * imx drm driver on iMX5
+		 */
+		dev_err(dev->dev, "failed to load kms\n");
+		ret = PTR_ERR(priv->kms);
+		goto fail;
+	}
+
+	priv->kms = kms;
+
+	if (kms) {
+		pm_runtime_enable(dev->dev);
+		ret = kms->funcs->hw_init(kms);
+		if (ret) {
+			dev_err(dev->dev, "kms hw init failed: %d\n", ret);
+			goto fail;
+		}
+	}
+
+	dev->mode_config.min_width = 0;
+	dev->mode_config.min_height = 0;
+	dev->mode_config.max_width = 2048;
+	dev->mode_config.max_height = 2048;
+	dev->mode_config.funcs = &mode_config_funcs;
+
+	ret = drm_vblank_init(dev, 1);
+	if (ret < 0) {
+		dev_err(dev->dev, "failed to initialize vblank\n");
+		goto fail;
+	}
+
+	pm_runtime_get_sync(dev->dev);
+	ret = drm_irq_install(dev);
+	pm_runtime_put_sync(dev->dev);
+	if (ret < 0) {
+		dev_err(dev->dev, "failed to install IRQ handler\n");
+		goto fail;
+	}
+
+	platform_set_drvdata(pdev, dev);
+
+#ifdef CONFIG_DRM_MSM_FBDEV
+	priv->fbdev = msm_fbdev_init(dev);
+#endif
+
+	drm_kms_helper_poll_init(dev);
+
+	return 0;
+
+fail:
+	msm_unload(dev);
+	return ret;
+}
+
+static void load_gpu(struct drm_device *dev)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct msm_gpu *gpu;
+
+	if (priv->gpu)
+		return;
+
+	mutex_lock(&dev->struct_mutex);
+	gpu = a3xx_gpu_init(dev);
+	if (IS_ERR(gpu)) {
+		dev_warn(dev->dev, "failed to load a3xx gpu\n");
+		gpu = NULL;
+		/* not fatal */
+	}
+	mutex_unlock(&dev->struct_mutex);
+
+	if (gpu) {
+		int ret;
+		gpu->funcs->pm_resume(gpu);
+		ret = gpu->funcs->hw_init(gpu);
+		if (ret) {
+			dev_err(dev->dev, "gpu hw init failed: %d\n", ret);
+			gpu->funcs->destroy(gpu);
+			gpu = NULL;
+		}
+	}
+
+	priv->gpu = gpu;
+}
+
+static int msm_open(struct drm_device *dev, struct drm_file *file)
+{
+	struct msm_file_private *ctx;
+
+	/* For now, load gpu on open.. to avoid the requirement of having
+	 * firmware in the initrd.
+	 */
+	load_gpu(dev);
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	file->driver_priv = ctx;
+
+	return 0;
+}
+
+static void msm_preclose(struct drm_device *dev, struct drm_file *file)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct msm_file_private *ctx = file->driver_priv;
+	struct msm_kms *kms = priv->kms;
+
+	if (kms)
+		kms->funcs->preclose(kms, file);
+
+	mutex_lock(&dev->struct_mutex);
+	if (ctx == priv->lastctx)
+		priv->lastctx = NULL;
+	mutex_unlock(&dev->struct_mutex);
+
+	kfree(ctx);
+}
+
+static void msm_lastclose(struct drm_device *dev)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	if (priv->fbdev) {
+		drm_modeset_lock_all(dev);
+		drm_fb_helper_restore_fbdev_mode(priv->fbdev);
+		drm_modeset_unlock_all(dev);
+	}
+}
+
+static irqreturn_t msm_irq(DRM_IRQ_ARGS)
+{
+	struct drm_device *dev = arg;
+	struct msm_drm_private *priv = dev->dev_private;
+	struct msm_kms *kms = priv->kms;
+	BUG_ON(!kms);
+	return kms->funcs->irq(kms);
+}
+
+static void msm_irq_preinstall(struct drm_device *dev)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct msm_kms *kms = priv->kms;
+	BUG_ON(!kms);
+	kms->funcs->irq_preinstall(kms);
+}
+
+static int msm_irq_postinstall(struct drm_device *dev)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct msm_kms *kms = priv->kms;
+	BUG_ON(!kms);
+	return kms->funcs->irq_postinstall(kms);
+}
+
+static void msm_irq_uninstall(struct drm_device *dev)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct msm_kms *kms = priv->kms;
+	BUG_ON(!kms);
+	kms->funcs->irq_uninstall(kms);
+}
+
+static int msm_enable_vblank(struct drm_device *dev, int crtc_id)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct msm_kms *kms = priv->kms;
+	if (!kms)
+		return -ENXIO;
+	DBG("dev=%p, crtc=%d", dev, crtc_id);
+	return kms->funcs->enable_vblank(kms, priv->crtcs[crtc_id]);
+}
+
+static void msm_disable_vblank(struct drm_device *dev, int crtc_id)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct msm_kms *kms = priv->kms;
+	if (!kms)
+		return;
+	DBG("dev=%p, crtc=%d", dev, crtc_id);
+	kms->funcs->disable_vblank(kms, priv->crtcs[crtc_id]);
+}
+
+/*
+ * DRM debugfs:
+ */
+
+#ifdef CONFIG_DEBUG_FS
+static int msm_gpu_show(struct drm_device *dev, struct seq_file *m)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct msm_gpu *gpu = priv->gpu;
+
+	if (gpu) {
+		seq_printf(m, "%s Status:\n", gpu->name);
+		gpu->funcs->show(gpu, m);
+	}
+
+	return 0;
+}
+
+static int msm_gem_show(struct drm_device *dev, struct seq_file *m)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct msm_gpu *gpu = priv->gpu;
+
+	if (gpu) {
+		seq_printf(m, "Active Objects (%s):\n", gpu->name);
+		msm_gem_describe_objects(&gpu->active_list, m);
+	}
+
+	seq_printf(m, "Inactive Objects:\n");
+	msm_gem_describe_objects(&priv->inactive_list, m);
+
+	return 0;
+}
+
+static int msm_mm_show(struct drm_device *dev, struct seq_file *m)
+{
+	return drm_mm_dump_table(m, dev->mm_private);
+}
+
+static int msm_fb_show(struct drm_device *dev, struct seq_file *m)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct drm_framebuffer *fb, *fbdev_fb = NULL;
+
+	if (priv->fbdev) {
+		seq_printf(m, "fbcon ");
+		fbdev_fb = priv->fbdev->fb;
+		msm_framebuffer_describe(fbdev_fb, m);
+	}
+
+	mutex_lock(&dev->mode_config.fb_lock);
+	list_for_each_entry(fb, &dev->mode_config.fb_list, head) {
+		if (fb == fbdev_fb)
+			continue;
+
+		seq_printf(m, "user ");
+		msm_framebuffer_describe(fb, m);
+	}
+	mutex_unlock(&dev->mode_config.fb_lock);
+
+	return 0;
+}
+
+static int show_locked(struct seq_file *m, void *arg)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	int (*show)(struct drm_device *dev, struct seq_file *m) =
+			node->info_ent->data;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	ret = show(dev, m);
+
+	mutex_unlock(&dev->struct_mutex);
+
+	return ret;
+}
+
+static struct drm_info_list msm_debugfs_list[] = {
+		{"gpu", show_locked, 0, msm_gpu_show},
+		{"gem", show_locked, 0, msm_gem_show},
+		{ "mm", show_locked, 0, msm_mm_show },
+		{ "fb", show_locked, 0, msm_fb_show },
+};
+
+static int msm_debugfs_init(struct drm_minor *minor)
+{
+	struct drm_device *dev = minor->dev;
+	int ret;
+
+	ret = drm_debugfs_create_files(msm_debugfs_list,
+			ARRAY_SIZE(msm_debugfs_list),
+			minor->debugfs_root, minor);
+
+	if (ret) {
+		dev_err(dev->dev, "could not install msm_debugfs_list\n");
+		return ret;
+	}
+
+	return ret;
+}
+
+static void msm_debugfs_cleanup(struct drm_minor *minor)
+{
+	drm_debugfs_remove_files(msm_debugfs_list,
+			ARRAY_SIZE(msm_debugfs_list), minor);
+}
+#endif
+
+/*
+ * Fences:
+ */
+
+int msm_wait_fence_interruptable(struct drm_device *dev, uint32_t fence,
+		struct timespec *timeout)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	unsigned long timeout_jiffies = timespec_to_jiffies(timeout);
+	unsigned long start_jiffies = jiffies;
+	unsigned long remaining_jiffies;
+	int ret;
+
+	if (time_after(start_jiffies, timeout_jiffies))
+		remaining_jiffies = 0;
+	else
+		remaining_jiffies = timeout_jiffies - start_jiffies;
+
+	ret = wait_event_interruptible_timeout(priv->fence_event,
+			priv->completed_fence >= fence,
+			remaining_jiffies);
+	if (ret == 0) {
+		DBG("timeout waiting for fence: %u (completed: %u)",
+				fence, priv->completed_fence);
+		ret = -ETIMEDOUT;
+	} else if (ret != -ERESTARTSYS) {
+		ret = 0;
+	}
+
+	return ret;
+}
+
+/* call under struct_mutex */
+void msm_update_fence(struct drm_device *dev, uint32_t fence)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+
+	if (fence > priv->completed_fence) {
+		priv->completed_fence = fence;
+		wake_up_all(&priv->fence_event);
+	}
+}
+
+/*
+ * DRM ioctls:
+ */
+
+static int msm_ioctl_get_param(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct drm_msm_param *args = data;
+	struct msm_gpu *gpu;
+
+	/* for now, we just have 3d pipe.. eventually this would need to
+	 * be more clever to dispatch to appropriate gpu module:
+	 */
+	if (args->pipe != MSM_PIPE_3D0)
+		return -EINVAL;
+
+	gpu = priv->gpu;
+
+	if (!gpu)
+		return -ENXIO;
+
+	return gpu->funcs->get_param(gpu, args->param, &args->value);
+}
+
+static int msm_ioctl_gem_new(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	struct drm_msm_gem_new *args = data;
+	return msm_gem_new_handle(dev, file, args->size,
+			args->flags, &args->handle);
+}
+
+#define TS(t) ((struct timespec){ .tv_sec = (t).tv_sec, .tv_nsec = (t).tv_nsec })
+
+static int msm_ioctl_gem_cpu_prep(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	struct drm_msm_gem_cpu_prep *args = data;
+	struct drm_gem_object *obj;
+	int ret;
+
+	obj = drm_gem_object_lookup(dev, file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	ret = msm_gem_cpu_prep(obj, args->op, &TS(args->timeout));
+
+	drm_gem_object_unreference_unlocked(obj);
+
+	return ret;
+}
+
+static int msm_ioctl_gem_cpu_fini(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	struct drm_msm_gem_cpu_fini *args = data;
+	struct drm_gem_object *obj;
+	int ret;
+
+	obj = drm_gem_object_lookup(dev, file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	ret = msm_gem_cpu_fini(obj);
+
+	drm_gem_object_unreference_unlocked(obj);
+
+	return ret;
+}
+
+static int msm_ioctl_gem_info(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	struct drm_msm_gem_info *args = data;
+	struct drm_gem_object *obj;
+	int ret = 0;
+
+	if (args->pad)
+		return -EINVAL;
+
+	obj = drm_gem_object_lookup(dev, file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	args->offset = msm_gem_mmap_offset(obj);
+
+	drm_gem_object_unreference_unlocked(obj);
+
+	return ret;
+}
+
+static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	struct drm_msm_wait_fence *args = data;
+	return msm_wait_fence_interruptable(dev, args->fence, &TS(args->timeout));
+}
+
+static const struct drm_ioctl_desc msm_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(MSM_GET_PARAM,    msm_ioctl_get_param,    DRM_UNLOCKED|DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(MSM_GEM_NEW,      msm_ioctl_gem_new,      DRM_UNLOCKED|DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(MSM_GEM_INFO,     msm_ioctl_gem_info,     DRM_UNLOCKED|DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(MSM_GEM_CPU_PREP, msm_ioctl_gem_cpu_prep, DRM_UNLOCKED|DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(MSM_GEM_CPU_FINI, msm_ioctl_gem_cpu_fini, DRM_UNLOCKED|DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(MSM_GEM_SUBMIT,   msm_ioctl_gem_submit,   DRM_UNLOCKED|DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(MSM_WAIT_FENCE,   msm_ioctl_wait_fence,   DRM_UNLOCKED|DRM_AUTH),
+};
+
+static const struct vm_operations_struct vm_ops = {
+	.fault = msm_gem_fault,
+	.open = drm_gem_vm_open,
+	.close = drm_gem_vm_close,
+};
+
+static const struct file_operations fops = {
+	.owner              = THIS_MODULE,
+	.open               = drm_open,
+	.release            = drm_release,
+	.unlocked_ioctl     = drm_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl       = drm_compat_ioctl,
+#endif
+	.poll               = drm_poll,
+	.read               = drm_read,
+	.llseek             = no_llseek,
+	.mmap               = msm_gem_mmap,
+};
+
+static struct drm_driver msm_driver = {
+	.driver_features    = DRIVER_HAVE_IRQ | DRIVER_GEM | DRIVER_MODESET,
+	.load               = msm_load,
+	.unload             = msm_unload,
+	.open               = msm_open,
+	.preclose           = msm_preclose,
+	.lastclose          = msm_lastclose,
+	.irq_handler        = msm_irq,
+	.irq_preinstall     = msm_irq_preinstall,
+	.irq_postinstall    = msm_irq_postinstall,
+	.irq_uninstall      = msm_irq_uninstall,
+	.get_vblank_counter = drm_vblank_count,
+	.enable_vblank      = msm_enable_vblank,
+	.disable_vblank     = msm_disable_vblank,
+	.gem_free_object    = msm_gem_free_object,
+	.gem_vm_ops         = &vm_ops,
+	.dumb_create        = msm_gem_dumb_create,
+	.dumb_map_offset    = msm_gem_dumb_map_offset,
+	.dumb_destroy       = msm_gem_dumb_destroy,
+#ifdef CONFIG_DEBUG_FS
+	.debugfs_init       = msm_debugfs_init,
+	.debugfs_cleanup    = msm_debugfs_cleanup,
+#endif
+	.ioctls             = msm_ioctls,
+	.num_ioctls         = DRM_MSM_NUM_IOCTLS,
+	.fops               = &fops,
+	.name               = "msm",
+	.desc               = "MSM Snapdragon DRM",
+	.date               = "20130625",
+	.major              = 1,
+	.minor              = 0,
+};
+
+#ifdef CONFIG_PM_SLEEP
+static int msm_pm_suspend(struct device *dev)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+
+	drm_kms_helper_poll_disable(ddev);
+
+	return 0;
+}
+
+static int msm_pm_resume(struct device *dev)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+
+	drm_kms_helper_poll_enable(ddev);
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops msm_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(msm_pm_suspend, msm_pm_resume)
+};
+
+/*
+ * Platform driver:
+ */
+
+static int msm_pdev_probe(struct platform_device *pdev)
+{
+	return drm_platform_init(&msm_driver, pdev);
+}
+
+static int msm_pdev_remove(struct platform_device *pdev)
+{
+	drm_platform_exit(&msm_driver, pdev);
+
+	return 0;
+}
+
+static const struct platform_device_id msm_id[] = {
+	{ "mdp", 0 },
+	{ }
+};
+
+static struct platform_driver msm_platform_driver = {
+	.probe      = msm_pdev_probe,
+	.remove     = msm_pdev_remove,
+	.driver     = {
+		.owner  = THIS_MODULE,
+		.name   = "msm",
+		.pm     = &msm_pm_ops,
+	},
+	.id_table   = msm_id,
+};
+
+static int __init msm_drm_register(void)
+{
+	DBG("init");
+	hdmi_register();
+	a3xx_register();
+	return platform_driver_register(&msm_platform_driver);
+}
+
+static void __exit msm_drm_unregister(void)
+{
+	DBG("fini");
+	platform_driver_unregister(&msm_platform_driver);
+	hdmi_unregister();
+	a3xx_unregister();
+}
+
+module_init(msm_drm_register);
+module_exit(msm_drm_unregister);
+
+MODULE_AUTHOR("Rob Clark <robdclark@gmail.com");
+MODULE_DESCRIPTION("MSM DRM Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
new file mode 100644
index 0000000..80d7509
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -0,0 +1,213 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __MSM_DRV_H__
+#define __MSM_DRV_H__
+
+#include <linux/kernel.h>
+#include <linux/clk.h>
+#include <linux/cpufreq.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/iommu.h>
+#include <linux/types.h>
+#include <asm/sizes.h>
+
+#ifndef CONFIG_OF
+#include <mach/board.h>
+#include <mach/socinfo.h>
+#include <mach/iommu_domains.h>
+#endif
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/msm_drm.h>
+
+struct msm_kms;
+struct msm_gpu;
+
+#define NUM_DOMAINS 2    /* one for KMS, then one per gpu core (?) */
+
+struct msm_file_private {
+	/* currently we don't do anything useful with this.. but when
+	 * per-context address spaces are supported we'd keep track of
+	 * the context's page-tables here.
+	 */
+	int dummy;
+};
+
+struct msm_drm_private {
+
+	struct msm_kms *kms;
+
+	/* when we have more than one 'msm_gpu' these need to be an array: */
+	struct msm_gpu *gpu;
+	struct msm_file_private *lastctx;
+
+	struct drm_fb_helper *fbdev;
+
+	uint32_t next_fence, completed_fence;
+	wait_queue_head_t fence_event;
+
+	/* list of GEM objects: */
+	struct list_head inactive_list;
+
+	struct workqueue_struct *wq;
+
+	/* registered IOMMU domains: */
+	unsigned int num_iommus;
+	struct iommu_domain *iommus[NUM_DOMAINS];
+
+	unsigned int num_crtcs;
+	struct drm_crtc *crtcs[8];
+
+	unsigned int num_encoders;
+	struct drm_encoder *encoders[8];
+
+	unsigned int num_bridges;
+	struct drm_bridge *bridges[8];
+
+	unsigned int num_connectors;
+	struct drm_connector *connectors[8];
+};
+
+struct msm_format {
+	uint32_t pixel_format;
+};
+
+/* As there are different display controller blocks depending on the
+ * snapdragon version, the kms support is split out and the appropriate
+ * implementation is loaded at runtime.  The kms module is responsible
+ * for constructing the appropriate planes/crtcs/encoders/connectors.
+ */
+struct msm_kms_funcs {
+	/* hw initialization: */
+	int (*hw_init)(struct msm_kms *kms);
+	/* irq handling: */
+	void (*irq_preinstall)(struct msm_kms *kms);
+	int (*irq_postinstall)(struct msm_kms *kms);
+	void (*irq_uninstall)(struct msm_kms *kms);
+	irqreturn_t (*irq)(struct msm_kms *kms);
+	int (*enable_vblank)(struct msm_kms *kms, struct drm_crtc *crtc);
+	void (*disable_vblank)(struct msm_kms *kms, struct drm_crtc *crtc);
+	/* misc: */
+	const struct msm_format *(*get_format)(struct msm_kms *kms, uint32_t format);
+	long (*round_pixclk)(struct msm_kms *kms, unsigned long rate,
+			struct drm_encoder *encoder);
+	/* cleanup: */
+	void (*preclose)(struct msm_kms *kms, struct drm_file *file);
+	void (*destroy)(struct msm_kms *kms);
+};
+
+struct msm_kms {
+	const struct msm_kms_funcs *funcs;
+};
+
+struct msm_kms *mdp4_kms_init(struct drm_device *dev);
+
+int msm_register_iommu(struct drm_device *dev, struct iommu_domain *iommu);
+int msm_iommu_attach(struct drm_device *dev, struct iommu_domain *iommu,
+		const char **names, int cnt);
+
+int msm_wait_fence_interruptable(struct drm_device *dev, uint32_t fence,
+		struct timespec *timeout);
+void msm_update_fence(struct drm_device *dev, uint32_t fence);
+
+int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
+		struct drm_file *file);
+
+int msm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
+int msm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj);
+int msm_gem_get_iova_locked(struct drm_gem_object *obj, int id,
+		uint32_t *iova);
+int msm_gem_get_iova(struct drm_gem_object *obj, int id, uint32_t *iova);
+void msm_gem_put_iova(struct drm_gem_object *obj, int id);
+int msm_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
+		struct drm_mode_create_dumb *args);
+int msm_gem_dumb_destroy(struct drm_file *file, struct drm_device *dev,
+		uint32_t handle);
+int msm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev,
+		uint32_t handle, uint64_t *offset);
+void *msm_gem_vaddr_locked(struct drm_gem_object *obj);
+void *msm_gem_vaddr(struct drm_gem_object *obj);
+int msm_gem_queue_inactive_work(struct drm_gem_object *obj,
+		struct work_struct *work);
+void msm_gem_move_to_active(struct drm_gem_object *obj,
+		struct msm_gpu *gpu, uint32_t fence);
+void msm_gem_move_to_inactive(struct drm_gem_object *obj);
+int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op,
+		struct timespec *timeout);
+int msm_gem_cpu_fini(struct drm_gem_object *obj);
+void msm_gem_free_object(struct drm_gem_object *obj);
+int msm_gem_new_handle(struct drm_device *dev, struct drm_file *file,
+		uint32_t size, uint32_t flags, uint32_t *handle);
+struct drm_gem_object *msm_gem_new(struct drm_device *dev,
+		uint32_t size, uint32_t flags);
+
+struct drm_gem_object *msm_framebuffer_bo(struct drm_framebuffer *fb, int plane);
+const struct msm_format *msm_framebuffer_format(struct drm_framebuffer *fb);
+struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev,
+		struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos);
+struct drm_framebuffer *msm_framebuffer_create(struct drm_device *dev,
+		struct drm_file *file, struct drm_mode_fb_cmd2 *mode_cmd);
+
+struct drm_fb_helper *msm_fbdev_init(struct drm_device *dev);
+
+int hdmi_init(struct drm_device *dev, struct drm_encoder *encoder);
+void __init hdmi_register(void);
+void __exit hdmi_unregister(void);
+
+#ifdef CONFIG_DEBUG_FS
+void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m);
+void msm_gem_describe_objects(struct list_head *list, struct seq_file *m);
+void msm_framebuffer_describe(struct drm_framebuffer *fb, struct seq_file *m);
+#endif
+
+void __iomem *msm_ioremap(struct platform_device *pdev, const char *name,
+		const char *dbgname);
+void msm_writel(u32 data, void __iomem *addr);
+u32 msm_readl(const void __iomem *addr);
+
+#define DBG(fmt, ...) DRM_DEBUG(fmt"\n", ##__VA_ARGS__)
+#define VERB(fmt, ...) if (0) DRM_DEBUG(fmt"\n", ##__VA_ARGS__)
+
+static inline int align_pitch(int width, int bpp)
+{
+	int bytespp = (bpp + 7) / 8;
+	/* adreno needs pitch aligned to 32 pixels: */
+	return bytespp * ALIGN(width, 32);
+}
+
+/* for the generated headers: */
+#define INVALID_IDX(idx) ({BUG(); 0;})
+#define fui(x)                ({BUG(); 0;})
+#define util_float_to_half(x) ({BUG(); 0;})
+
+
+#define FIELD(val, name) (((val) & name ## __MASK) >> name ## __SHIFT)
+
+/* for conditionally setting boolean flag(s): */
+#define COND(bool, val) ((bool) ? (val) : 0)
+
+
+#endif /* __MSM_DRV_H__ */
diff --git a/drivers/gpu/drm/msm/msm_fb.c b/drivers/gpu/drm/msm/msm_fb.c
new file mode 100644
index 0000000..0286c0e
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_fb.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "msm_drv.h"
+
+#include "drm_crtc.h"
+#include "drm_crtc_helper.h"
+
+struct msm_framebuffer {
+	struct drm_framebuffer base;
+	const struct msm_format *format;
+	struct drm_gem_object *planes[2];
+};
+#define to_msm_framebuffer(x) container_of(x, struct msm_framebuffer, base)
+
+
+static int msm_framebuffer_create_handle(struct drm_framebuffer *fb,
+		struct drm_file *file_priv,
+		unsigned int *handle)
+{
+	struct msm_framebuffer *msm_fb = to_msm_framebuffer(fb);
+	return drm_gem_handle_create(file_priv,
+			msm_fb->planes[0], handle);
+}
+
+static void msm_framebuffer_destroy(struct drm_framebuffer *fb)
+{
+	struct msm_framebuffer *msm_fb = to_msm_framebuffer(fb);
+	int i, n = drm_format_num_planes(fb->pixel_format);
+
+	DBG("destroy: FB ID: %d (%p)", fb->base.id, fb);
+
+	drm_framebuffer_cleanup(fb);
+
+	for (i = 0; i < n; i++) {
+		struct drm_gem_object *bo = msm_fb->planes[i];
+		if (bo)
+			drm_gem_object_unreference_unlocked(bo);
+	}
+
+	kfree(msm_fb);
+}
+
+static int msm_framebuffer_dirty(struct drm_framebuffer *fb,
+		struct drm_file *file_priv, unsigned flags, unsigned color,
+		struct drm_clip_rect *clips, unsigned num_clips)
+{
+	return 0;
+}
+
+static const struct drm_framebuffer_funcs msm_framebuffer_funcs = {
+	.create_handle = msm_framebuffer_create_handle,
+	.destroy = msm_framebuffer_destroy,
+	.dirty = msm_framebuffer_dirty,
+};
+
+#ifdef CONFIG_DEBUG_FS
+void msm_framebuffer_describe(struct drm_framebuffer *fb, struct seq_file *m)
+{
+	struct msm_framebuffer *msm_fb = to_msm_framebuffer(fb);
+	int i, n = drm_format_num_planes(fb->pixel_format);
+
+	seq_printf(m, "fb: %dx%d@%4.4s (%2d, ID:%d)\n",
+			fb->width, fb->height, (char *)&fb->pixel_format,
+			fb->refcount.refcount.counter, fb->base.id);
+
+	for (i = 0; i < n; i++) {
+		seq_printf(m, "   %d: offset=%d pitch=%d, obj: ",
+				i, fb->offsets[i], fb->pitches[i]);
+		msm_gem_describe(msm_fb->planes[i], m);
+	}
+}
+#endif
+
+struct drm_gem_object *msm_framebuffer_bo(struct drm_framebuffer *fb, int plane)
+{
+	struct msm_framebuffer *msm_fb = to_msm_framebuffer(fb);
+	return msm_fb->planes[plane];
+}
+
+const struct msm_format *msm_framebuffer_format(struct drm_framebuffer *fb)
+{
+	struct msm_framebuffer *msm_fb = to_msm_framebuffer(fb);
+	return msm_fb->format;
+}
+
+struct drm_framebuffer *msm_framebuffer_create(struct drm_device *dev,
+		struct drm_file *file, struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct drm_gem_object *bos[4] = {0};
+	struct drm_framebuffer *fb;
+	int ret, i, n = drm_format_num_planes(mode_cmd->pixel_format);
+
+	for (i = 0; i < n; i++) {
+		bos[i] = drm_gem_object_lookup(dev, file,
+				mode_cmd->handles[i]);
+		if (!bos[i]) {
+			ret = -ENXIO;
+			goto out_unref;
+		}
+	}
+
+	fb = msm_framebuffer_init(dev, mode_cmd, bos);
+	if (IS_ERR(fb)) {
+		ret = PTR_ERR(fb);
+		goto out_unref;
+	}
+
+	return fb;
+
+out_unref:
+	for (i = 0; i < n; i++)
+		drm_gem_object_unreference_unlocked(bos[i]);
+	return ERR_PTR(ret);
+}
+
+struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev,
+		struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct msm_kms *kms = priv->kms;
+	struct msm_framebuffer *msm_fb;
+	struct drm_framebuffer *fb = NULL;
+	const struct msm_format *format;
+	int ret, i, n;
+	unsigned int hsub, vsub;
+
+	DBG("create framebuffer: dev=%p, mode_cmd=%p (%dx%d@%4.4s)",
+			dev, mode_cmd, mode_cmd->width, mode_cmd->height,
+			(char *)&mode_cmd->pixel_format);
+
+	n = drm_format_num_planes(mode_cmd->pixel_format);
+	hsub = drm_format_horz_chroma_subsampling(mode_cmd->pixel_format);
+	vsub = drm_format_vert_chroma_subsampling(mode_cmd->pixel_format);
+
+	format = kms->funcs->get_format(kms, mode_cmd->pixel_format);
+	if (!format) {
+		dev_err(dev->dev, "unsupported pixel format: %4.4s\n",
+				(char *)&mode_cmd->pixel_format);
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	msm_fb = kzalloc(sizeof(*msm_fb), GFP_KERNEL);
+	if (!msm_fb) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	fb = &msm_fb->base;
+
+	msm_fb->format = format;
+
+	for (i = 0; i < n; i++) {
+		unsigned int width = mode_cmd->width / (i ? hsub : 1);
+		unsigned int height = mode_cmd->height / (i ? vsub : 1);
+		unsigned int min_size;
+
+		min_size = (height - 1) * mode_cmd->pitches[i]
+			 + width * drm_format_plane_cpp(mode_cmd->pixel_format, i)
+			 + mode_cmd->offsets[i];
+
+		if (bos[i]->size < min_size) {
+			ret = -EINVAL;
+			goto fail;
+		}
+
+		msm_fb->planes[i] = bos[i];
+	}
+
+	drm_helper_mode_fill_fb_struct(fb, mode_cmd);
+
+	ret = drm_framebuffer_init(dev, fb, &msm_framebuffer_funcs);
+	if (ret) {
+		dev_err(dev->dev, "framebuffer init failed: %d\n", ret);
+		goto fail;
+	}
+
+	DBG("create: FB ID: %d (%p)", fb->base.id, fb);
+
+	return fb;
+
+fail:
+	if (fb)
+		msm_framebuffer_destroy(fb);
+
+	return ERR_PTR(ret);
+}
diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c
new file mode 100644
index 0000000..6c6d7d4
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_fbdev.c
@@ -0,0 +1,258 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "msm_drv.h"
+
+#include "drm_crtc.h"
+#include "drm_fb_helper.h"
+
+/*
+ * fbdev funcs, to implement legacy fbdev interface on top of drm driver
+ */
+
+#define to_msm_fbdev(x) container_of(x, struct msm_fbdev, base)
+
+struct msm_fbdev {
+	struct drm_fb_helper base;
+	struct drm_framebuffer *fb;
+	struct drm_gem_object *bo;
+};
+
+static struct fb_ops msm_fb_ops = {
+	.owner = THIS_MODULE,
+
+	/* Note: to properly handle manual update displays, we wrap the
+	 * basic fbdev ops which write to the framebuffer
+	 */
+	.fb_read = fb_sys_read,
+	.fb_write = fb_sys_write,
+	.fb_fillrect = sys_fillrect,
+	.fb_copyarea = sys_copyarea,
+	.fb_imageblit = sys_imageblit,
+
+	.fb_check_var = drm_fb_helper_check_var,
+	.fb_set_par = drm_fb_helper_set_par,
+	.fb_pan_display = drm_fb_helper_pan_display,
+	.fb_blank = drm_fb_helper_blank,
+	.fb_setcmap = drm_fb_helper_setcmap,
+};
+
+static int msm_fbdev_create(struct drm_fb_helper *helper,
+		struct drm_fb_helper_surface_size *sizes)
+{
+	struct msm_fbdev *fbdev = to_msm_fbdev(helper);
+	struct drm_device *dev = helper->dev;
+	struct drm_framebuffer *fb = NULL;
+	struct fb_info *fbi = NULL;
+	struct drm_mode_fb_cmd2 mode_cmd = {0};
+	dma_addr_t paddr;
+	int ret, size;
+
+	/* only doing ARGB32 since this is what is needed to alpha-blend
+	 * with video overlays:
+	 */
+	sizes->surface_bpp = 32;
+	sizes->surface_depth = 32;
+
+	DBG("create fbdev: %dx%d@%d (%dx%d)", sizes->surface_width,
+			sizes->surface_height, sizes->surface_bpp,
+			sizes->fb_width, sizes->fb_height);
+
+	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
+			sizes->surface_depth);
+
+	mode_cmd.width = sizes->surface_width;
+	mode_cmd.height = sizes->surface_height;
+
+	mode_cmd.pitches[0] = align_pitch(
+			mode_cmd.width, sizes->surface_bpp);
+
+	/* allocate backing bo */
+	size = mode_cmd.pitches[0] * mode_cmd.height;
+	DBG("allocating %d bytes for fb %d", size, dev->primary->index);
+	mutex_lock(&dev->struct_mutex);
+	fbdev->bo = msm_gem_new(dev, size, MSM_BO_SCANOUT | MSM_BO_WC);
+	mutex_unlock(&dev->struct_mutex);
+	if (IS_ERR(fbdev->bo)) {
+		ret = PTR_ERR(fbdev->bo);
+		fbdev->bo = NULL;
+		dev_err(dev->dev, "failed to allocate buffer object: %d\n", ret);
+		goto fail;
+	}
+
+	fb = msm_framebuffer_init(dev, &mode_cmd, &fbdev->bo);
+	if (IS_ERR(fb)) {
+		dev_err(dev->dev, "failed to allocate fb\n");
+		/* note: if fb creation failed, we can't rely on fb destroy
+		 * to unref the bo:
+		 */
+		drm_gem_object_unreference(fbdev->bo);
+		ret = PTR_ERR(fb);
+		goto fail;
+	}
+
+	mutex_lock(&dev->struct_mutex);
+
+	/* TODO implement our own fb_mmap so we don't need this: */
+	msm_gem_get_iova_locked(fbdev->bo, 0, &paddr);
+
+	fbi = framebuffer_alloc(0, dev->dev);
+	if (!fbi) {
+		dev_err(dev->dev, "failed to allocate fb info\n");
+		ret = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	DBG("fbi=%p, dev=%p", fbi, dev);
+
+	fbdev->fb = fb;
+	helper->fb = fb;
+	helper->fbdev = fbi;
+
+	fbi->par = helper;
+	fbi->flags = FBINFO_DEFAULT;
+	fbi->fbops = &msm_fb_ops;
+
+	strcpy(fbi->fix.id, "msm");
+
+	ret = fb_alloc_cmap(&fbi->cmap, 256, 0);
+	if (ret) {
+		ret = -ENOMEM;
+		goto fail_unlock;
+	}
+
+	drm_fb_helper_fill_fix(fbi, fb->pitches[0], fb->depth);
+	drm_fb_helper_fill_var(fbi, helper, sizes->fb_width, sizes->fb_height);
+
+	dev->mode_config.fb_base = paddr;
+
+	fbi->screen_base = msm_gem_vaddr_locked(fbdev->bo);
+	fbi->screen_size = fbdev->bo->size;
+	fbi->fix.smem_start = paddr;
+	fbi->fix.smem_len = fbdev->bo->size;
+
+	DBG("par=%p, %dx%d", fbi->par, fbi->var.xres, fbi->var.yres);
+	DBG("allocated %dx%d fb", fbdev->fb->width, fbdev->fb->height);
+
+	mutex_unlock(&dev->struct_mutex);
+
+	return 0;
+
+fail_unlock:
+	mutex_unlock(&dev->struct_mutex);
+fail:
+
+	if (ret) {
+		if (fbi)
+			framebuffer_release(fbi);
+		if (fb) {
+			drm_framebuffer_unregister_private(fb);
+			drm_framebuffer_remove(fb);
+		}
+	}
+
+	return ret;
+}
+
+static void msm_crtc_fb_gamma_set(struct drm_crtc *crtc,
+		u16 red, u16 green, u16 blue, int regno)
+{
+	DBG("fbdev: set gamma");
+}
+
+static void msm_crtc_fb_gamma_get(struct drm_crtc *crtc,
+		u16 *red, u16 *green, u16 *blue, int regno)
+{
+	DBG("fbdev: get gamma");
+}
+
+static struct drm_fb_helper_funcs msm_fb_helper_funcs = {
+	.gamma_set = msm_crtc_fb_gamma_set,
+	.gamma_get = msm_crtc_fb_gamma_get,
+	.fb_probe = msm_fbdev_create,
+};
+
+/* initialize fbdev helper */
+struct drm_fb_helper *msm_fbdev_init(struct drm_device *dev)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct msm_fbdev *fbdev = NULL;
+	struct drm_fb_helper *helper;
+	int ret = 0;
+
+	fbdev = kzalloc(sizeof(*fbdev), GFP_KERNEL);
+	if (!fbdev)
+		goto fail;
+
+	helper = &fbdev->base;
+
+	helper->funcs = &msm_fb_helper_funcs;
+
+	ret = drm_fb_helper_init(dev, helper,
+			priv->num_crtcs, priv->num_connectors);
+	if (ret) {
+		dev_err(dev->dev, "could not init fbdev: ret=%d\n", ret);
+		goto fail;
+	}
+
+	drm_fb_helper_single_add_all_connectors(helper);
+
+	/* disable all the possible outputs/crtcs before entering KMS mode */
+	drm_helper_disable_unused_functions(dev);
+
+	drm_fb_helper_initial_config(helper, 32);
+
+	priv->fbdev = helper;
+
+	return helper;
+
+fail:
+	kfree(fbdev);
+	return NULL;
+}
+
+void msm_fbdev_free(struct drm_device *dev)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct drm_fb_helper *helper = priv->fbdev;
+	struct msm_fbdev *fbdev;
+	struct fb_info *fbi;
+
+	DBG();
+
+	fbi = helper->fbdev;
+
+	/* only cleanup framebuffer if it is present */
+	if (fbi) {
+		unregister_framebuffer(fbi);
+		framebuffer_release(fbi);
+	}
+
+	drm_fb_helper_fini(helper);
+
+	fbdev = to_msm_fbdev(priv->fbdev);
+
+	/* this will free the backing object */
+	if (fbdev->fb) {
+		drm_framebuffer_unregister_private(fbdev->fb);
+		drm_framebuffer_remove(fbdev->fb);
+	}
+
+	kfree(fbdev);
+
+	priv->fbdev = NULL;
+}
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
new file mode 100644
index 0000000..6b5a6c8
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -0,0 +1,597 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/shmem_fs.h>
+
+#include "msm_drv.h"
+#include "msm_gem.h"
+#include "msm_gpu.h"
+
+
+/* called with dev->struct_mutex held */
+static struct page **get_pages(struct drm_gem_object *obj)
+{
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+
+	if (!msm_obj->pages) {
+		struct drm_device *dev = obj->dev;
+		struct page **p = drm_gem_get_pages(obj, 0);
+		int npages = obj->size >> PAGE_SHIFT;
+
+		if (IS_ERR(p)) {
+			dev_err(dev->dev, "could not get pages: %ld\n",
+					PTR_ERR(p));
+			return p;
+		}
+
+		msm_obj->sgt = drm_prime_pages_to_sg(p, npages);
+		if (!msm_obj->sgt) {
+			dev_err(dev->dev, "failed to allocate sgt\n");
+			return ERR_PTR(-ENOMEM);
+		}
+
+		msm_obj->pages = p;
+
+		/* For non-cached buffers, ensure the new pages are clean
+		 * because display controller, GPU, etc. are not coherent:
+		 */
+		if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED))
+			dma_map_sg(dev->dev, msm_obj->sgt->sgl,
+					msm_obj->sgt->nents, DMA_BIDIRECTIONAL);
+	}
+
+	return msm_obj->pages;
+}
+
+static void put_pages(struct drm_gem_object *obj)
+{
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+
+	if (msm_obj->pages) {
+		/* For non-cached buffers, ensure the new pages are clean
+		 * because display controller, GPU, etc. are not coherent:
+		 */
+		if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED))
+			dma_unmap_sg(obj->dev->dev, msm_obj->sgt->sgl,
+					msm_obj->sgt->nents, DMA_BIDIRECTIONAL);
+		sg_free_table(msm_obj->sgt);
+		kfree(msm_obj->sgt);
+
+		drm_gem_put_pages(obj, msm_obj->pages, true, false);
+		msm_obj->pages = NULL;
+	}
+}
+
+int msm_gem_mmap_obj(struct drm_gem_object *obj,
+		struct vm_area_struct *vma)
+{
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+
+	vma->vm_flags &= ~VM_PFNMAP;
+	vma->vm_flags |= VM_MIXEDMAP;
+
+	if (msm_obj->flags & MSM_BO_WC) {
+		vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+	} else if (msm_obj->flags & MSM_BO_UNCACHED) {
+		vma->vm_page_prot = pgprot_noncached(vm_get_page_prot(vma->vm_flags));
+	} else {
+		/*
+		 * Shunt off cached objs to shmem file so they have their own
+		 * address_space (so unmap_mapping_range does what we want,
+		 * in particular in the case of mmap'd dmabufs)
+		 */
+		fput(vma->vm_file);
+		get_file(obj->filp);
+		vma->vm_pgoff = 0;
+		vma->vm_file  = obj->filp;
+
+		vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+	}
+
+	return 0;
+}
+
+int msm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	int ret;
+
+	ret = drm_gem_mmap(filp, vma);
+	if (ret) {
+		DBG("mmap failed: %d", ret);
+		return ret;
+	}
+
+	return msm_gem_mmap_obj(vma->vm_private_data, vma);
+}
+
+int msm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+	struct drm_device *dev = obj->dev;
+	struct page **pages;
+	unsigned long pfn;
+	pgoff_t pgoff;
+	int ret;
+
+	/* Make sure we don't parallel update on a fault, nor move or remove
+	 * something from beneath our feet
+	 */
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		goto out;
+
+	/* make sure we have pages attached now */
+	pages = get_pages(obj);
+	if (IS_ERR(pages)) {
+		ret = PTR_ERR(pages);
+		goto out_unlock;
+	}
+
+	/* We don't use vmf->pgoff since that has the fake offset: */
+	pgoff = ((unsigned long)vmf->virtual_address -
+			vma->vm_start) >> PAGE_SHIFT;
+
+	pfn = page_to_pfn(msm_obj->pages[pgoff]);
+
+	VERB("Inserting %p pfn %lx, pa %lx", vmf->virtual_address,
+			pfn, pfn << PAGE_SHIFT);
+
+	ret = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, pfn);
+
+out_unlock:
+	mutex_unlock(&dev->struct_mutex);
+out:
+	switch (ret) {
+	case -EAGAIN:
+		set_need_resched();
+	case 0:
+	case -ERESTARTSYS:
+	case -EINTR:
+		return VM_FAULT_NOPAGE;
+	case -ENOMEM:
+		return VM_FAULT_OOM;
+	default:
+		return VM_FAULT_SIGBUS;
+	}
+}
+
+/** get mmap offset */
+static uint64_t mmap_offset(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	int ret;
+
+	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+
+	/* Make it mmapable */
+	ret = drm_gem_create_mmap_offset(obj);
+
+	if (ret) {
+		dev_err(dev->dev, "could not allocate mmap offset\n");
+		return 0;
+	}
+
+	return drm_vma_node_offset_addr(&obj->vma_node);
+}
+
+uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj)
+{
+	uint64_t offset;
+	mutex_lock(&obj->dev->struct_mutex);
+	offset = mmap_offset(obj);
+	mutex_unlock(&obj->dev->struct_mutex);
+	return offset;
+}
+
+/* helpers for dealing w/ iommu: */
+static int map_range(struct iommu_domain *domain, unsigned int iova,
+		struct sg_table *sgt, unsigned int len, int prot)
+{
+	struct scatterlist *sg;
+	unsigned int da = iova;
+	unsigned int i, j;
+	int ret;
+
+	if (!domain || !sgt)
+		return -EINVAL;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		u32 pa = sg_phys(sg) - sg->offset;
+		size_t bytes = sg->length + sg->offset;
+
+		VERB("map[%d]: %08x %08x(%x)", i, iova, pa, bytes);
+
+		ret = iommu_map(domain, da, pa, bytes, prot);
+		if (ret)
+			goto fail;
+
+		da += bytes;
+	}
+
+	return 0;
+
+fail:
+	da = iova;
+
+	for_each_sg(sgt->sgl, sg, i, j) {
+		size_t bytes = sg->length + sg->offset;
+		iommu_unmap(domain, da, bytes);
+		da += bytes;
+	}
+	return ret;
+}
+
+static void unmap_range(struct iommu_domain *domain, unsigned int iova,
+		struct sg_table *sgt, unsigned int len)
+{
+	struct scatterlist *sg;
+	unsigned int da = iova;
+	int i;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		size_t bytes = sg->length + sg->offset;
+		size_t unmapped;
+
+		unmapped = iommu_unmap(domain, da, bytes);
+		if (unmapped < bytes)
+			break;
+
+		VERB("unmap[%d]: %08x(%x)", i, iova, bytes);
+
+		BUG_ON(!IS_ALIGNED(bytes, PAGE_SIZE));
+
+		da += bytes;
+	}
+}
+
+/* should be called under struct_mutex.. although it can be called
+ * from atomic context without struct_mutex to acquire an extra
+ * iova ref if you know one is already held.
+ *
+ * That means when I do eventually need to add support for unpinning
+ * the refcnt counter needs to be atomic_t.
+ */
+int msm_gem_get_iova_locked(struct drm_gem_object *obj, int id,
+		uint32_t *iova)
+{
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+	int ret = 0;
+
+	if (!msm_obj->domain[id].iova) {
+		struct msm_drm_private *priv = obj->dev->dev_private;
+		uint32_t offset = (uint32_t)mmap_offset(obj);
+		struct page **pages;
+		pages = get_pages(obj);
+		if (IS_ERR(pages))
+			return PTR_ERR(pages);
+		// XXX ideally we would not map buffers writable when not needed...
+		ret = map_range(priv->iommus[id], offset, msm_obj->sgt,
+				obj->size, IOMMU_READ | IOMMU_WRITE);
+		msm_obj->domain[id].iova = offset;
+	}
+
+	if (!ret)
+		*iova = msm_obj->domain[id].iova;
+
+	return ret;
+}
+
+int msm_gem_get_iova(struct drm_gem_object *obj, int id, uint32_t *iova)
+{
+	int ret;
+	mutex_lock(&obj->dev->struct_mutex);
+	ret = msm_gem_get_iova_locked(obj, id, iova);
+	mutex_unlock(&obj->dev->struct_mutex);
+	return ret;
+}
+
+void msm_gem_put_iova(struct drm_gem_object *obj, int id)
+{
+	// XXX TODO ..
+	// NOTE: probably don't need a _locked() version.. we wouldn't
+	// normally unmap here, but instead just mark that it could be
+	// unmapped (if the iova refcnt drops to zero), but then later
+	// if another _get_iova_locked() fails we can start unmapping
+	// things that are no longer needed..
+}
+
+int msm_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
+		struct drm_mode_create_dumb *args)
+{
+	args->pitch = align_pitch(args->width, args->bpp);
+	args->size  = PAGE_ALIGN(args->pitch * args->height);
+	return msm_gem_new_handle(dev, file, args->size,
+			MSM_BO_SCANOUT | MSM_BO_WC, &args->handle);
+}
+
+int msm_gem_dumb_destroy(struct drm_file *file, struct drm_device *dev,
+		uint32_t handle)
+{
+	/* No special work needed, drop the reference and see what falls out */
+	return drm_gem_handle_delete(file, handle);
+}
+
+int msm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev,
+		uint32_t handle, uint64_t *offset)
+{
+	struct drm_gem_object *obj;
+	int ret = 0;
+
+	/* GEM does all our handle to object mapping */
+	obj = drm_gem_object_lookup(dev, file, handle);
+	if (obj == NULL) {
+		ret = -ENOENT;
+		goto fail;
+	}
+
+	*offset = msm_gem_mmap_offset(obj);
+
+	drm_gem_object_unreference_unlocked(obj);
+
+fail:
+	return ret;
+}
+
+void *msm_gem_vaddr_locked(struct drm_gem_object *obj)
+{
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+	WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex));
+	if (!msm_obj->vaddr) {
+		struct page **pages = get_pages(obj);
+		if (IS_ERR(pages))
+			return ERR_CAST(pages);
+		msm_obj->vaddr = vmap(pages, obj->size >> PAGE_SHIFT,
+				VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+	}
+	return msm_obj->vaddr;
+}
+
+void *msm_gem_vaddr(struct drm_gem_object *obj)
+{
+	void *ret;
+	mutex_lock(&obj->dev->struct_mutex);
+	ret = msm_gem_vaddr_locked(obj);
+	mutex_unlock(&obj->dev->struct_mutex);
+	return ret;
+}
+
+int msm_gem_queue_inactive_work(struct drm_gem_object *obj,
+		struct work_struct *work)
+{
+	struct drm_device *dev = obj->dev;
+	struct msm_drm_private *priv = dev->dev_private;
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+	int ret = 0;
+
+	mutex_lock(&dev->struct_mutex);
+	if (!list_empty(&work->entry)) {
+		ret = -EINVAL;
+	} else if (is_active(msm_obj)) {
+		list_add_tail(&work->entry, &msm_obj->inactive_work);
+	} else {
+		queue_work(priv->wq, work);
+	}
+	mutex_unlock(&dev->struct_mutex);
+
+	return ret;
+}
+
+void msm_gem_move_to_active(struct drm_gem_object *obj,
+		struct msm_gpu *gpu, uint32_t fence)
+{
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+	msm_obj->gpu = gpu;
+	msm_obj->fence = fence;
+	list_del_init(&msm_obj->mm_list);
+	list_add_tail(&msm_obj->mm_list, &gpu->active_list);
+}
+
+void msm_gem_move_to_inactive(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	struct msm_drm_private *priv = dev->dev_private;
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+
+	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+
+	msm_obj->gpu = NULL;
+	msm_obj->fence = 0;
+	list_del_init(&msm_obj->mm_list);
+	list_add_tail(&msm_obj->mm_list, &priv->inactive_list);
+
+	while (!list_empty(&msm_obj->inactive_work)) {
+		struct work_struct *work;
+
+		work = list_first_entry(&msm_obj->inactive_work,
+				struct work_struct, entry);
+
+		list_del_init(&work->entry);
+		queue_work(priv->wq, work);
+	}
+}
+
+int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op,
+		struct timespec *timeout)
+{
+	struct drm_device *dev = obj->dev;
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+	int ret = 0;
+
+	if (is_active(msm_obj) && !(op & MSM_PREP_NOSYNC))
+		ret = msm_wait_fence_interruptable(dev, msm_obj->fence, timeout);
+
+	/* TODO cache maintenance */
+
+	return ret;
+}
+
+int msm_gem_cpu_fini(struct drm_gem_object *obj)
+{
+	/* TODO cache maintenance */
+	return 0;
+}
+
+#ifdef CONFIG_DEBUG_FS
+void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m)
+{
+	struct drm_device *dev = obj->dev;
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+	uint64_t off = drm_vma_node_start(&obj->vma_node);
+
+	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+	seq_printf(m, "%08x: %c(%d) %2d (%2d) %08llx %p %d\n",
+			msm_obj->flags, is_active(msm_obj) ? 'A' : 'I',
+			msm_obj->fence, obj->name, obj->refcount.refcount.counter,
+			off, msm_obj->vaddr, obj->size);
+}
+
+void msm_gem_describe_objects(struct list_head *list, struct seq_file *m)
+{
+	struct msm_gem_object *msm_obj;
+	int count = 0;
+	size_t size = 0;
+
+	list_for_each_entry(msm_obj, list, mm_list) {
+		struct drm_gem_object *obj = &msm_obj->base;
+		seq_printf(m, "   ");
+		msm_gem_describe(obj, m);
+		count++;
+		size += obj->size;
+	}
+
+	seq_printf(m, "Total %d objects, %zu bytes\n", count, size);
+}
+#endif
+
+void msm_gem_free_object(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+	int id;
+
+	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+
+	/* object should not be on active list: */
+	WARN_ON(is_active(msm_obj));
+
+	list_del(&msm_obj->mm_list);
+
+	for (id = 0; id < ARRAY_SIZE(msm_obj->domain); id++) {
+		if (msm_obj->domain[id].iova) {
+			struct msm_drm_private *priv = obj->dev->dev_private;
+			uint32_t offset = (uint32_t)mmap_offset(obj);
+			unmap_range(priv->iommus[id], offset,
+					msm_obj->sgt, obj->size);
+		}
+	}
+
+	drm_gem_free_mmap_offset(obj);
+
+	if (msm_obj->vaddr)
+		vunmap(msm_obj->vaddr);
+
+	put_pages(obj);
+
+	if (msm_obj->resv == &msm_obj->_resv)
+		reservation_object_fini(msm_obj->resv);
+
+	drm_gem_object_release(obj);
+
+	kfree(msm_obj);
+}
+
+/* convenience method to construct a GEM buffer object, and userspace handle */
+int msm_gem_new_handle(struct drm_device *dev, struct drm_file *file,
+		uint32_t size, uint32_t flags, uint32_t *handle)
+{
+	struct drm_gem_object *obj;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	obj = msm_gem_new(dev, size, flags);
+
+	mutex_unlock(&dev->struct_mutex);
+
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	ret = drm_gem_handle_create(file, obj, handle);
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(obj);
+
+	return ret;
+}
+
+struct drm_gem_object *msm_gem_new(struct drm_device *dev,
+		uint32_t size, uint32_t flags)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct msm_gem_object *msm_obj;
+	struct drm_gem_object *obj = NULL;
+	int ret;
+
+	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+
+	size = PAGE_ALIGN(size);
+
+	switch (flags & MSM_BO_CACHE_MASK) {
+	case MSM_BO_UNCACHED:
+	case MSM_BO_CACHED:
+	case MSM_BO_WC:
+		break;
+	default:
+		dev_err(dev->dev, "invalid cache flag: %x\n",
+				(flags & MSM_BO_CACHE_MASK));
+		ret = -EINVAL;
+		goto fail;
+	}
+
+	msm_obj = kzalloc(sizeof(*msm_obj), GFP_KERNEL);
+	if (!msm_obj) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	obj = &msm_obj->base;
+
+	ret = drm_gem_object_init(dev, obj, size);
+	if (ret)
+		goto fail;
+
+	msm_obj->flags = flags;
+
+	msm_obj->resv = &msm_obj->_resv;
+	reservation_object_init(msm_obj->resv);
+
+	INIT_LIST_HEAD(&msm_obj->submit_entry);
+	INIT_LIST_HEAD(&msm_obj->inactive_work);
+	list_add_tail(&msm_obj->mm_list, &priv->inactive_list);
+
+	return obj;
+
+fail:
+	if (obj)
+		drm_gem_object_unreference_unlocked(obj);
+
+	return ERR_PTR(ret);
+}
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
new file mode 100644
index 0000000..d746f13
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __MSM_GEM_H__
+#define __MSM_GEM_H__
+
+#include <linux/reservation.h>
+#include "msm_drv.h"
+
+struct msm_gem_object {
+	struct drm_gem_object base;
+
+	uint32_t flags;
+
+	/* And object is either:
+	 *  inactive - on priv->inactive_list
+	 *  active   - on one one of the gpu's active_list..  well, at
+	 *     least for now we don't have (I don't think) hw sync between
+	 *     2d and 3d one devices which have both, meaning we need to
+	 *     block on submit if a bo is already on other ring
+	 *
+	 */
+	struct list_head mm_list;
+	struct msm_gpu *gpu;     /* non-null if active */
+	uint32_t fence;
+
+	/* Transiently in the process of submit ioctl, objects associated
+	 * with the submit are on submit->bo_list.. this only lasts for
+	 * the duration of the ioctl, so one bo can never be on multiple
+	 * submit lists.
+	 */
+	struct list_head submit_entry;
+
+	/* work defered until bo is inactive: */
+	struct list_head inactive_work;
+
+	struct page **pages;
+	struct sg_table *sgt;
+	void *vaddr;
+
+	struct {
+		// XXX
+		uint32_t iova;
+	} domain[NUM_DOMAINS];
+
+	/* normally (resv == &_resv) except for imported bo's */
+	struct reservation_object *resv;
+	struct reservation_object _resv;
+};
+#define to_msm_bo(x) container_of(x, struct msm_gem_object, base)
+
+static inline bool is_active(struct msm_gem_object *msm_obj)
+{
+	return msm_obj->gpu != NULL;
+}
+
+#define MAX_CMDS 4
+
+/* Created per submit-ioctl, to track bo's and cmdstream bufs, etc,
+ * associated with the cmdstream submission for synchronization (and
+ * make it easier to unwind when things go wrong, etc).  This only
+ * lasts for the duration of the submit-ioctl.
+ */
+struct msm_gem_submit {
+	struct drm_device *dev;
+	struct msm_gpu *gpu;
+	struct list_head bo_list;
+	struct ww_acquire_ctx ticket;
+	uint32_t fence;
+	bool valid;
+	unsigned int nr_cmds;
+	unsigned int nr_bos;
+	struct {
+		uint32_t type;
+		uint32_t size;  /* in dwords */
+		uint32_t iova;
+	} cmd[MAX_CMDS];
+	struct {
+		uint32_t flags;
+		struct msm_gem_object *obj;
+		uint32_t iova;
+	} bos[0];
+};
+
+#endif /* __MSM_GEM_H__ */
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
new file mode 100644
index 0000000..3e1ef3a
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -0,0 +1,412 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "msm_drv.h"
+#include "msm_gpu.h"
+#include "msm_gem.h"
+
+/*
+ * Cmdstream submission:
+ */
+
+#define BO_INVALID_FLAGS ~(MSM_SUBMIT_BO_READ | MSM_SUBMIT_BO_WRITE)
+/* make sure these don't conflict w/ MSM_SUBMIT_BO_x */
+#define BO_VALID    0x8000
+#define BO_LOCKED   0x4000
+#define BO_PINNED   0x2000
+
+static inline void __user *to_user_ptr(u64 address)
+{
+	return (void __user *)(uintptr_t)address;
+}
+
+static struct msm_gem_submit *submit_create(struct drm_device *dev,
+		struct msm_gpu *gpu, int nr)
+{
+	struct msm_gem_submit *submit;
+	int sz = sizeof(*submit) + (nr * sizeof(submit->bos[0]));
+
+	submit = kmalloc(sz, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+	if (submit) {
+		submit->dev = dev;
+		submit->gpu = gpu;
+
+		/* initially, until copy_from_user() and bo lookup succeeds: */
+		submit->nr_bos = 0;
+		submit->nr_cmds = 0;
+
+		INIT_LIST_HEAD(&submit->bo_list);
+		ww_acquire_init(&submit->ticket, &reservation_ww_class);
+	}
+
+	return submit;
+}
+
+static int submit_lookup_objects(struct msm_gem_submit *submit,
+		struct drm_msm_gem_submit *args, struct drm_file *file)
+{
+	unsigned i;
+	int ret = 0;
+
+	spin_lock(&file->table_lock);
+
+	for (i = 0; i < args->nr_bos; i++) {
+		struct drm_msm_gem_submit_bo submit_bo;
+		struct drm_gem_object *obj;
+		struct msm_gem_object *msm_obj;
+		void __user *userptr =
+			to_user_ptr(args->bos + (i * sizeof(submit_bo)));
+
+		ret = copy_from_user(&submit_bo, userptr, sizeof(submit_bo));
+		if (ret) {
+			ret = -EFAULT;
+			goto out_unlock;
+		}
+
+		if (submit_bo.flags & BO_INVALID_FLAGS) {
+			DBG("invalid flags: %x", submit_bo.flags);
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+
+		submit->bos[i].flags = submit_bo.flags;
+		/* in validate_objects() we figure out if this is true: */
+		submit->bos[i].iova  = submit_bo.presumed;
+
+		/* normally use drm_gem_object_lookup(), but for bulk lookup
+		 * all under single table_lock just hit object_idr directly:
+		 */
+		obj = idr_find(&file->object_idr, submit_bo.handle);
+		if (!obj) {
+			DBG("invalid handle %u at index %u", submit_bo.handle, i);
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+
+		msm_obj = to_msm_bo(obj);
+
+		if (!list_empty(&msm_obj->submit_entry)) {
+			DBG("handle %u at index %u already on submit list",
+					submit_bo.handle, i);
+			ret = -EINVAL;
+			goto out_unlock;
+		}
+
+		drm_gem_object_reference(obj);
+
+		submit->bos[i].obj = msm_obj;
+
+		list_add_tail(&msm_obj->submit_entry, &submit->bo_list);
+	}
+
+out_unlock:
+	submit->nr_bos = i;
+	spin_unlock(&file->table_lock);
+
+	return ret;
+}
+
+static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i)
+{
+	struct msm_gem_object *msm_obj = submit->bos[i].obj;
+
+	if (submit->bos[i].flags & BO_PINNED)
+		msm_gem_put_iova(&msm_obj->base, submit->gpu->id);
+
+	if (submit->bos[i].flags & BO_LOCKED)
+		ww_mutex_unlock(&msm_obj->resv->lock);
+
+	if (!(submit->bos[i].flags & BO_VALID))
+		submit->bos[i].iova = 0;
+
+	submit->bos[i].flags &= ~(BO_LOCKED | BO_PINNED);
+}
+
+/* This is where we make sure all the bo's are reserved and pin'd: */
+static int submit_validate_objects(struct msm_gem_submit *submit)
+{
+	int contended, slow_locked = -1, i, ret = 0;
+
+retry:
+	submit->valid = true;
+
+	for (i = 0; i < submit->nr_bos; i++) {
+		struct msm_gem_object *msm_obj = submit->bos[i].obj;
+		uint32_t iova;
+
+		if (slow_locked == i)
+			slow_locked = -1;
+
+		contended = i;
+
+		if (!(submit->bos[i].flags & BO_LOCKED)) {
+			ret = ww_mutex_lock_interruptible(&msm_obj->resv->lock,
+					&submit->ticket);
+			if (ret)
+				goto fail;
+			submit->bos[i].flags |= BO_LOCKED;
+		}
+
+
+		/* if locking succeeded, pin bo: */
+		ret = msm_gem_get_iova(&msm_obj->base,
+				submit->gpu->id, &iova);
+
+		/* this would break the logic in the fail path.. there is no
+		 * reason for this to happen, but just to be on the safe side
+		 * let's notice if this starts happening in the future:
+		 */
+		WARN_ON(ret == -EDEADLK);
+
+		if (ret)
+			goto fail;
+
+		submit->bos[i].flags |= BO_PINNED;
+
+		if (iova == submit->bos[i].iova) {
+			submit->bos[i].flags |= BO_VALID;
+		} else {
+			submit->bos[i].iova = iova;
+			submit->bos[i].flags &= ~BO_VALID;
+			submit->valid = false;
+		}
+	}
+
+	ww_acquire_done(&submit->ticket);
+
+	return 0;
+
+fail:
+	for (; i >= 0; i--)
+		submit_unlock_unpin_bo(submit, i);
+
+	if (slow_locked > 0)
+		submit_unlock_unpin_bo(submit, slow_locked);
+
+	if (ret == -EDEADLK) {
+		struct msm_gem_object *msm_obj = submit->bos[contended].obj;
+		/* we lost out in a seqno race, lock and retry.. */
+		ret = ww_mutex_lock_slow_interruptible(&msm_obj->resv->lock,
+				&submit->ticket);
+		if (!ret) {
+			submit->bos[contended].flags |= BO_LOCKED;
+			slow_locked = contended;
+			goto retry;
+		}
+	}
+
+	return ret;
+}
+
+static int submit_bo(struct msm_gem_submit *submit, uint32_t idx,
+		struct msm_gem_object **obj, uint32_t *iova, bool *valid)
+{
+	if (idx >= submit->nr_bos) {
+		DBG("invalid buffer index: %u (out of %u)", idx, submit->nr_bos);
+		return EINVAL;
+	}
+
+	if (obj)
+		*obj = submit->bos[idx].obj;
+	if (iova)
+		*iova = submit->bos[idx].iova;
+	if (valid)
+		*valid = !!(submit->bos[idx].flags & BO_VALID);
+
+	return 0;
+}
+
+/* process the reloc's and patch up the cmdstream as needed: */
+static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *obj,
+		uint32_t offset, uint32_t nr_relocs, uint64_t relocs)
+{
+	uint32_t i, last_offset = 0;
+	uint32_t *ptr;
+	int ret;
+
+	if (offset % 4) {
+		DBG("non-aligned cmdstream buffer: %u", offset);
+		return -EINVAL;
+	}
+
+	/* For now, just map the entire thing.  Eventually we probably
+	 * to do it page-by-page, w/ kmap() if not vmap()d..
+	 */
+	ptr = msm_gem_vaddr(&obj->base);
+
+	if (IS_ERR(ptr)) {
+		ret = PTR_ERR(ptr);
+		DBG("failed to map: %d", ret);
+		return ret;
+	}
+
+	for (i = 0; i < nr_relocs; i++) {
+		struct drm_msm_gem_submit_reloc submit_reloc;
+		void __user *userptr =
+			to_user_ptr(relocs + (i * sizeof(submit_reloc)));
+		uint32_t iova, off;
+		bool valid;
+
+		ret = copy_from_user(&submit_reloc, userptr, sizeof(submit_reloc));
+		if (ret)
+			return -EFAULT;
+
+		if (submit_reloc.submit_offset % 4) {
+			DBG("non-aligned reloc offset: %u",
+					submit_reloc.submit_offset);
+			return -EINVAL;
+		}
+
+		/* offset in dwords: */
+		off = submit_reloc.submit_offset / 4;
+
+		if ((off >= (obj->base.size / 4)) ||
+				(off < last_offset)) {
+			DBG("invalid offset %u at reloc %u", off, i);
+			return -EINVAL;
+		}
+
+		ret = submit_bo(submit, submit_reloc.reloc_idx, NULL, &iova, &valid);
+		if (ret)
+			return ret;
+
+		if (valid)
+			continue;
+
+		iova += submit_reloc.reloc_offset;
+
+		if (submit_reloc.shift < 0)
+			iova >>= -submit_reloc.shift;
+		else
+			iova <<= submit_reloc.shift;
+
+		ptr[off] = iova | submit_reloc.or;
+
+		last_offset = off;
+	}
+
+	return 0;
+}
+
+static void submit_cleanup(struct msm_gem_submit *submit, bool fail)
+{
+	unsigned i;
+
+	mutex_lock(&submit->dev->struct_mutex);
+	for (i = 0; i < submit->nr_bos; i++) {
+		struct msm_gem_object *msm_obj = submit->bos[i].obj;
+		submit_unlock_unpin_bo(submit, i);
+		list_del_init(&msm_obj->submit_entry);
+		drm_gem_object_unreference(&msm_obj->base);
+	}
+	mutex_unlock(&submit->dev->struct_mutex);
+
+	ww_acquire_fini(&submit->ticket);
+	kfree(submit);
+}
+
+int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct drm_msm_gem_submit *args = data;
+	struct msm_file_private *ctx = file->driver_priv;
+	struct msm_gem_submit *submit;
+	struct msm_gpu *gpu;
+	unsigned i;
+	int ret;
+
+	/* for now, we just have 3d pipe.. eventually this would need to
+	 * be more clever to dispatch to appropriate gpu module:
+	 */
+	if (args->pipe != MSM_PIPE_3D0)
+		return -EINVAL;
+
+	gpu = priv->gpu;
+
+	if (args->nr_cmds > MAX_CMDS)
+		return -EINVAL;
+
+	submit = submit_create(dev, gpu, args->nr_bos);
+	if (!submit) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = submit_lookup_objects(submit, args, file);
+	if (ret)
+		goto out;
+
+	ret = submit_validate_objects(submit);
+	if (ret)
+		goto out;
+
+	for (i = 0; i < args->nr_cmds; i++) {
+		struct drm_msm_gem_submit_cmd submit_cmd;
+		void __user *userptr =
+			to_user_ptr(args->cmds + (i * sizeof(submit_cmd)));
+		struct msm_gem_object *msm_obj;
+		uint32_t iova;
+
+		ret = copy_from_user(&submit_cmd, userptr, sizeof(submit_cmd));
+		if (ret) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		ret = submit_bo(submit, submit_cmd.submit_idx,
+				&msm_obj, &iova, NULL);
+		if (ret)
+			goto out;
+
+		if (submit_cmd.size % 4) {
+			DBG("non-aligned cmdstream buffer size: %u",
+					submit_cmd.size);
+			ret = -EINVAL;
+			goto out;
+		}
+
+		if (submit_cmd.size >= msm_obj->base.size) {
+			DBG("invalid cmdstream size: %u", submit_cmd.size);
+			ret = -EINVAL;
+			goto out;
+		}
+
+		submit->cmd[i].type = submit_cmd.type;
+		submit->cmd[i].size = submit_cmd.size / 4;
+		submit->cmd[i].iova = iova + submit_cmd.submit_offset;
+
+		if (submit->valid)
+			continue;
+
+		ret = submit_reloc(submit, msm_obj, submit_cmd.submit_offset,
+				submit_cmd.nr_relocs, submit_cmd.relocs);
+		if (ret)
+			goto out;
+	}
+
+	submit->nr_cmds = i;
+
+	ret = msm_gpu_submit(gpu, submit, ctx);
+
+	args->fence = submit->fence;
+
+out:
+	if (submit)
+		submit_cleanup(submit, !!ret);
+	return ret;
+}
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
new file mode 100644
index 0000000..e1e1ec9
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -0,0 +1,463 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "msm_gpu.h"
+#include "msm_gem.h"
+
+
+/*
+ * Power Management:
+ */
+
+#ifdef CONFIG_MSM_BUS_SCALING
+#include <mach/board.h>
+#include <mach/kgsl.h>
+static void bs_init(struct msm_gpu *gpu, struct platform_device *pdev)
+{
+	struct drm_device *dev = gpu->dev;
+	struct kgsl_device_platform_data *pdata = pdev->dev.platform_data;
+
+	if (!pdev) {
+		dev_err(dev->dev, "could not find dtv pdata\n");
+		return;
+	}
+
+	if (pdata->bus_scale_table) {
+		gpu->bsc = msm_bus_scale_register_client(pdata->bus_scale_table);
+		DBG("bus scale client: %08x", gpu->bsc);
+	}
+}
+
+static void bs_fini(struct msm_gpu *gpu)
+{
+	if (gpu->bsc) {
+		msm_bus_scale_unregister_client(gpu->bsc);
+		gpu->bsc = 0;
+	}
+}
+
+static void bs_set(struct msm_gpu *gpu, int idx)
+{
+	if (gpu->bsc) {
+		DBG("set bus scaling: %d", idx);
+		msm_bus_scale_client_update_request(gpu->bsc, idx);
+	}
+}
+#else
+static void bs_init(struct msm_gpu *gpu, struct platform_device *pdev) {}
+static void bs_fini(struct msm_gpu *gpu) {}
+static void bs_set(struct msm_gpu *gpu, int idx) {}
+#endif
+
+static int enable_pwrrail(struct msm_gpu *gpu)
+{
+	struct drm_device *dev = gpu->dev;
+	int ret = 0;
+
+	if (gpu->gpu_reg) {
+		ret = regulator_enable(gpu->gpu_reg);
+		if (ret) {
+			dev_err(dev->dev, "failed to enable 'gpu_reg': %d\n", ret);
+			return ret;
+		}
+	}
+
+	if (gpu->gpu_cx) {
+		ret = regulator_enable(gpu->gpu_cx);
+		if (ret) {
+			dev_err(dev->dev, "failed to enable 'gpu_cx': %d\n", ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int disable_pwrrail(struct msm_gpu *gpu)
+{
+	if (gpu->gpu_cx)
+		regulator_disable(gpu->gpu_cx);
+	if (gpu->gpu_reg)
+		regulator_disable(gpu->gpu_reg);
+	return 0;
+}
+
+static int enable_clk(struct msm_gpu *gpu)
+{
+	struct clk *rate_clk = NULL;
+	int i;
+
+	/* NOTE: kgsl_pwrctrl_clk() ignores grp_clks[0].. */
+	for (i = ARRAY_SIZE(gpu->grp_clks) - 1; i > 0; i--) {
+		if (gpu->grp_clks[i]) {
+			clk_prepare(gpu->grp_clks[i]);
+			rate_clk = gpu->grp_clks[i];
+		}
+	}
+
+	if (rate_clk && gpu->fast_rate)
+		clk_set_rate(rate_clk, gpu->fast_rate);
+
+	for (i = ARRAY_SIZE(gpu->grp_clks) - 1; i > 0; i--)
+		if (gpu->grp_clks[i])
+			clk_enable(gpu->grp_clks[i]);
+
+	return 0;
+}
+
+static int disable_clk(struct msm_gpu *gpu)
+{
+	struct clk *rate_clk = NULL;
+	int i;
+
+	/* NOTE: kgsl_pwrctrl_clk() ignores grp_clks[0].. */
+	for (i = ARRAY_SIZE(gpu->grp_clks) - 1; i > 0; i--) {
+		if (gpu->grp_clks[i]) {
+			clk_disable(gpu->grp_clks[i]);
+			rate_clk = gpu->grp_clks[i];
+		}
+	}
+
+	if (rate_clk && gpu->slow_rate)
+		clk_set_rate(rate_clk, gpu->slow_rate);
+
+	for (i = ARRAY_SIZE(gpu->grp_clks) - 1; i > 0; i--)
+		if (gpu->grp_clks[i])
+			clk_unprepare(gpu->grp_clks[i]);
+
+	return 0;
+}
+
+static int enable_axi(struct msm_gpu *gpu)
+{
+	if (gpu->ebi1_clk)
+		clk_prepare_enable(gpu->ebi1_clk);
+	if (gpu->bus_freq)
+		bs_set(gpu, gpu->bus_freq);
+	return 0;
+}
+
+static int disable_axi(struct msm_gpu *gpu)
+{
+	if (gpu->ebi1_clk)
+		clk_disable_unprepare(gpu->ebi1_clk);
+	if (gpu->bus_freq)
+		bs_set(gpu, 0);
+	return 0;
+}
+
+int msm_gpu_pm_resume(struct msm_gpu *gpu)
+{
+	int ret;
+
+	DBG("%s", gpu->name);
+
+	ret = enable_pwrrail(gpu);
+	if (ret)
+		return ret;
+
+	ret = enable_clk(gpu);
+	if (ret)
+		return ret;
+
+	ret = enable_axi(gpu);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+int msm_gpu_pm_suspend(struct msm_gpu *gpu)
+{
+	int ret;
+
+	DBG("%s", gpu->name);
+
+	ret = disable_axi(gpu);
+	if (ret)
+		return ret;
+
+	ret = disable_clk(gpu);
+	if (ret)
+		return ret;
+
+	ret = disable_pwrrail(gpu);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+/*
+ * Hangcheck detection for locked gpu:
+ */
+
+static void recover_worker(struct work_struct *work)
+{
+	struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work);
+	struct drm_device *dev = gpu->dev;
+
+	dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name);
+
+	mutex_lock(&dev->struct_mutex);
+	gpu->funcs->recover(gpu);
+	mutex_unlock(&dev->struct_mutex);
+
+	msm_gpu_retire(gpu);
+}
+
+static void hangcheck_timer_reset(struct msm_gpu *gpu)
+{
+	DBG("%s", gpu->name);
+	mod_timer(&gpu->hangcheck_timer,
+			round_jiffies_up(jiffies + DRM_MSM_HANGCHECK_JIFFIES));
+}
+
+static void hangcheck_handler(unsigned long data)
+{
+	struct msm_gpu *gpu = (struct msm_gpu *)data;
+	uint32_t fence = gpu->funcs->last_fence(gpu);
+
+	if (fence != gpu->hangcheck_fence) {
+		/* some progress has been made.. ya! */
+		gpu->hangcheck_fence = fence;
+	} else if (fence < gpu->submitted_fence) {
+		/* no progress and not done.. hung! */
+		struct msm_drm_private *priv = gpu->dev->dev_private;
+		gpu->hangcheck_fence = fence;
+		queue_work(priv->wq, &gpu->recover_work);
+	}
+
+	/* if still more pending work, reset the hangcheck timer: */
+	if (gpu->submitted_fence > gpu->hangcheck_fence)
+		hangcheck_timer_reset(gpu);
+}
+
+/*
+ * Cmdstream submission/retirement:
+ */
+
+static void retire_worker(struct work_struct *work)
+{
+	struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work);
+	struct drm_device *dev = gpu->dev;
+	uint32_t fence = gpu->funcs->last_fence(gpu);
+
+	mutex_lock(&dev->struct_mutex);
+
+	while (!list_empty(&gpu->active_list)) {
+		struct msm_gem_object *obj;
+
+		obj = list_first_entry(&gpu->active_list,
+				struct msm_gem_object, mm_list);
+
+		if (obj->fence <= fence) {
+			/* move to inactive: */
+			msm_gem_move_to_inactive(&obj->base);
+			msm_gem_put_iova(&obj->base, gpu->id);
+			drm_gem_object_unreference(&obj->base);
+		} else {
+			break;
+		}
+	}
+
+	msm_update_fence(gpu->dev, fence);
+
+	mutex_unlock(&dev->struct_mutex);
+}
+
+/* call from irq handler to schedule work to retire bo's */
+void msm_gpu_retire(struct msm_gpu *gpu)
+{
+	struct msm_drm_private *priv = gpu->dev->dev_private;
+	queue_work(priv->wq, &gpu->retire_work);
+}
+
+/* add bo's to gpu's ring, and kick gpu: */
+int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
+		struct msm_file_private *ctx)
+{
+	struct drm_device *dev = gpu->dev;
+	struct msm_drm_private *priv = dev->dev_private;
+	int i, ret;
+
+	mutex_lock(&dev->struct_mutex);
+
+	submit->fence = ++priv->next_fence;
+
+	gpu->submitted_fence = submit->fence;
+
+	ret = gpu->funcs->submit(gpu, submit, ctx);
+	priv->lastctx = ctx;
+
+	for (i = 0; i < submit->nr_bos; i++) {
+		struct msm_gem_object *msm_obj = submit->bos[i].obj;
+
+		/* can't happen yet.. but when we add 2d support we'll have
+		 * to deal w/ cross-ring synchronization:
+		 */
+		WARN_ON(is_active(msm_obj) && (msm_obj->gpu != gpu));
+
+		if (!is_active(msm_obj)) {
+			uint32_t iova;
+
+			/* ring takes a reference to the bo and iova: */
+			drm_gem_object_reference(&msm_obj->base);
+			msm_gem_get_iova_locked(&msm_obj->base,
+					submit->gpu->id, &iova);
+		}
+
+		msm_gem_move_to_active(&msm_obj->base, gpu, submit->fence);
+	}
+	hangcheck_timer_reset(gpu);
+	mutex_unlock(&dev->struct_mutex);
+
+	return ret;
+}
+
+/*
+ * Init/Cleanup:
+ */
+
+static irqreturn_t irq_handler(int irq, void *data)
+{
+	struct msm_gpu *gpu = data;
+	return gpu->funcs->irq(gpu);
+}
+
+static const char *clk_names[] = {
+		"src_clk", "core_clk", "iface_clk", "mem_clk", "mem_iface_clk",
+};
+
+int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
+		struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
+		const char *name, const char *ioname, const char *irqname, int ringsz)
+{
+	int i, ret;
+
+	gpu->dev = drm;
+	gpu->funcs = funcs;
+	gpu->name = name;
+
+	INIT_LIST_HEAD(&gpu->active_list);
+	INIT_WORK(&gpu->retire_work, retire_worker);
+	INIT_WORK(&gpu->recover_work, recover_worker);
+
+	setup_timer(&gpu->hangcheck_timer, hangcheck_handler,
+			(unsigned long)gpu);
+
+	BUG_ON(ARRAY_SIZE(clk_names) != ARRAY_SIZE(gpu->grp_clks));
+
+	/* Map registers: */
+	gpu->mmio = msm_ioremap(pdev, ioname, name);
+	if (IS_ERR(gpu->mmio)) {
+		ret = PTR_ERR(gpu->mmio);
+		goto fail;
+	}
+
+	/* Get Interrupt: */
+	gpu->irq = platform_get_irq_byname(pdev, irqname);
+	if (gpu->irq < 0) {
+		ret = gpu->irq;
+		dev_err(drm->dev, "failed to get irq: %d\n", ret);
+		goto fail;
+	}
+
+	ret = devm_request_irq(&pdev->dev, gpu->irq, irq_handler,
+			IRQF_TRIGGER_HIGH, gpu->name, gpu);
+	if (ret) {
+		dev_err(drm->dev, "failed to request IRQ%u: %d\n", gpu->irq, ret);
+		goto fail;
+	}
+
+	/* Acquire clocks: */
+	for (i = 0; i < ARRAY_SIZE(clk_names); i++) {
+		gpu->grp_clks[i] = devm_clk_get(&pdev->dev, clk_names[i]);
+		DBG("grp_clks[%s]: %p", clk_names[i], gpu->grp_clks[i]);
+		if (IS_ERR(gpu->grp_clks[i]))
+			gpu->grp_clks[i] = NULL;
+	}
+
+	gpu->ebi1_clk = devm_clk_get(&pdev->dev, "bus_clk");
+	DBG("ebi1_clk: %p", gpu->ebi1_clk);
+	if (IS_ERR(gpu->ebi1_clk))
+		gpu->ebi1_clk = NULL;
+
+	/* Acquire regulators: */
+	gpu->gpu_reg = devm_regulator_get(&pdev->dev, "vdd");
+	DBG("gpu_reg: %p", gpu->gpu_reg);
+	if (IS_ERR(gpu->gpu_reg))
+		gpu->gpu_reg = NULL;
+
+	gpu->gpu_cx = devm_regulator_get(&pdev->dev, "vddcx");
+	DBG("gpu_cx: %p", gpu->gpu_cx);
+	if (IS_ERR(gpu->gpu_cx))
+		gpu->gpu_cx = NULL;
+
+	/* Setup IOMMU.. eventually we will (I think) do this once per context
+	 * and have separate page tables per context.  For now, to keep things
+	 * simple and to get something working, just use a single address space:
+	 */
+	gpu->iommu = iommu_domain_alloc(&platform_bus_type);
+	if (!gpu->iommu) {
+		dev_err(drm->dev, "failed to allocate IOMMU\n");
+		ret = -ENOMEM;
+		goto fail;
+	}
+	gpu->id = msm_register_iommu(drm, gpu->iommu);
+
+	/* Create ringbuffer: */
+	gpu->rb = msm_ringbuffer_new(gpu, ringsz);
+	if (IS_ERR(gpu->rb)) {
+		ret = PTR_ERR(gpu->rb);
+		gpu->rb = NULL;
+		dev_err(drm->dev, "could not create ringbuffer: %d\n", ret);
+		goto fail;
+	}
+
+	ret = msm_gem_get_iova_locked(gpu->rb->bo, gpu->id, &gpu->rb_iova);
+	if (ret) {
+		gpu->rb_iova = 0;
+		dev_err(drm->dev, "could not map ringbuffer: %d\n", ret);
+		goto fail;
+	}
+
+	bs_init(gpu, pdev);
+
+	return 0;
+
+fail:
+	return ret;
+}
+
+void msm_gpu_cleanup(struct msm_gpu *gpu)
+{
+	DBG("%s", gpu->name);
+
+	WARN_ON(!list_empty(&gpu->active_list));
+
+	bs_fini(gpu);
+
+	if (gpu->rb) {
+		if (gpu->rb_iova)
+			msm_gem_put_iova(gpu->rb->bo, gpu->id);
+		msm_ringbuffer_destroy(gpu->rb);
+	}
+
+	if (gpu->iommu)
+		iommu_domain_free(gpu->iommu);
+}
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
new file mode 100644
index 0000000..8cd829e
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __MSM_GPU_H__
+#define __MSM_GPU_H__
+
+#include <linux/clk.h>
+#include <linux/regulator/consumer.h>
+
+#include "msm_drv.h"
+#include "msm_ringbuffer.h"
+
+struct msm_gem_submit;
+
+/* So far, with hardware that I've seen to date, we can have:
+ *  + zero, one, or two z180 2d cores
+ *  + a3xx or a2xx 3d core, which share a common CP (the firmware
+ *    for the CP seems to implement some different PM4 packet types
+ *    but the basics of cmdstream submission are the same)
+ *
+ * Which means that the eventual complete "class" hierarchy, once
+ * support for all past and present hw is in place, becomes:
+ *  + msm_gpu
+ *    + adreno_gpu
+ *      + a3xx_gpu
+ *      + a2xx_gpu
+ *    + z180_gpu
+ */
+struct msm_gpu_funcs {
+	int (*get_param)(struct msm_gpu *gpu, uint32_t param, uint64_t *value);
+	int (*hw_init)(struct msm_gpu *gpu);
+	int (*pm_suspend)(struct msm_gpu *gpu);
+	int (*pm_resume)(struct msm_gpu *gpu);
+	int (*submit)(struct msm_gpu *gpu, struct msm_gem_submit *submit,
+			struct msm_file_private *ctx);
+	void (*flush)(struct msm_gpu *gpu);
+	void (*idle)(struct msm_gpu *gpu);
+	irqreturn_t (*irq)(struct msm_gpu *irq);
+	uint32_t (*last_fence)(struct msm_gpu *gpu);
+	void (*recover)(struct msm_gpu *gpu);
+	void (*destroy)(struct msm_gpu *gpu);
+#ifdef CONFIG_DEBUG_FS
+	/* show GPU status in debugfs: */
+	void (*show)(struct msm_gpu *gpu, struct seq_file *m);
+#endif
+};
+
+struct msm_gpu {
+	const char *name;
+	struct drm_device *dev;
+	const struct msm_gpu_funcs *funcs;
+
+	struct msm_ringbuffer *rb;
+	uint32_t rb_iova;
+
+	/* list of GEM active objects: */
+	struct list_head active_list;
+
+	uint32_t submitted_fence;
+
+	/* worker for handling active-list retiring: */
+	struct work_struct retire_work;
+
+	void __iomem *mmio;
+	int irq;
+
+	struct iommu_domain *iommu;
+	int id;
+
+	/* Power Control: */
+	struct regulator *gpu_reg, *gpu_cx;
+	struct clk *ebi1_clk, *grp_clks[5];
+	uint32_t fast_rate, slow_rate, bus_freq;
+	uint32_t bsc;
+
+	/* Hang Detction: */
+#define DRM_MSM_HANGCHECK_PERIOD 500 /* in ms */
+#define DRM_MSM_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_MSM_HANGCHECK_PERIOD)
+	struct timer_list hangcheck_timer;
+	uint32_t hangcheck_fence;
+	struct work_struct recover_work;
+};
+
+static inline void gpu_write(struct msm_gpu *gpu, u32 reg, u32 data)
+{
+	msm_writel(data, gpu->mmio + (reg << 2));
+}
+
+static inline u32 gpu_read(struct msm_gpu *gpu, u32 reg)
+{
+	return msm_readl(gpu->mmio + (reg << 2));
+}
+
+int msm_gpu_pm_suspend(struct msm_gpu *gpu);
+int msm_gpu_pm_resume(struct msm_gpu *gpu);
+
+void msm_gpu_retire(struct msm_gpu *gpu);
+int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
+		struct msm_file_private *ctx);
+
+int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
+		struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
+		const char *name, const char *ioname, const char *irqname, int ringsz);
+void msm_gpu_cleanup(struct msm_gpu *gpu);
+
+struct msm_gpu *a3xx_gpu_init(struct drm_device *dev);
+void __init a3xx_register(void);
+void __exit a3xx_unregister(void);
+
+#endif /* __MSM_GPU_H__ */
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
new file mode 100644
index 0000000..8171537d
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "msm_ringbuffer.h"
+#include "msm_gpu.h"
+
+struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size)
+{
+	struct msm_ringbuffer *ring;
+	int ret;
+
+	size = ALIGN(size, 4);   /* size should be dword aligned */
+
+	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
+	if (!ring) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	ring->gpu = gpu;
+	ring->bo = msm_gem_new(gpu->dev, size, MSM_BO_WC);
+	if (IS_ERR(ring->bo)) {
+		ret = PTR_ERR(ring->bo);
+		ring->bo = NULL;
+		goto fail;
+	}
+
+	ring->start = msm_gem_vaddr_locked(ring->bo);
+	ring->end   = ring->start + (size / 4);
+	ring->cur   = ring->start;
+
+	ring->size = size;
+
+	return ring;
+
+fail:
+	if (ring)
+		msm_ringbuffer_destroy(ring);
+	return ERR_PTR(ret);
+}
+
+void msm_ringbuffer_destroy(struct msm_ringbuffer *ring)
+{
+	if (ring->bo)
+		drm_gem_object_unreference(ring->bo);
+	kfree(ring);
+}
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h
new file mode 100644
index 0000000..6e0e104
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __MSM_RINGBUFFER_H__
+#define __MSM_RINGBUFFER_H__
+
+#include "msm_drv.h"
+
+struct msm_ringbuffer {
+	struct msm_gpu *gpu;
+	int size;
+	struct drm_gem_object *bo;
+	uint32_t *start, *end, *cur;
+};
+
+struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size);
+void msm_ringbuffer_destroy(struct msm_ringbuffer *ring);
+
+/* ringbuffer helpers (the parts that are same for a3xx/a2xx/z180..) */
+
+static inline void
+OUT_RING(struct msm_ringbuffer *ring, uint32_t data)
+{
+	if (ring->cur == ring->end)
+		ring->cur = ring->start;
+	*(ring->cur++) = data;
+}
+
+#endif /* __MSM_RINGBUFFER_H__ */
diff --git a/drivers/gpu/drm/nouveau/core/core/mm.c b/drivers/gpu/drm/nouveau/core/core/mm.c
index d829172..7a4e089 100644
--- a/drivers/gpu/drm/nouveau/core/core/mm.c
+++ b/drivers/gpu/drm/nouveau/core/core/mm.c
@@ -98,6 +98,8 @@
 	u32 splitoff;
 	u32 s, e;
 
+	BUG_ON(!type);
+
 	list_for_each_entry(this, &mm->free, fl_entry) {
 		e = this->offset + this->length;
 		s = this->offset;
@@ -162,6 +164,8 @@
 	struct nouveau_mm_node *prev, *this, *next;
 	u32 mask = align - 1;
 
+	BUG_ON(!type);
+
 	list_for_each_entry_reverse(this, &mm->free, fl_entry) {
 		u32 e = this->offset + this->length;
 		u32 s = this->offset;
diff --git a/drivers/gpu/drm/nouveau/core/core/printk.c b/drivers/gpu/drm/nouveau/core/core/printk.c
index 6161eaf..52fb2aa 100644
--- a/drivers/gpu/drm/nouveau/core/core/printk.c
+++ b/drivers/gpu/drm/nouveau/core/core/printk.c
@@ -27,6 +27,8 @@
 #include <core/subdev.h>
 #include <core/printk.h>
 
+int nv_printk_suspend_level = NV_DBG_DEBUG;
+
 void
 nv_printk_(struct nouveau_object *object, const char *pfx, int level,
 	   const char *fmt, ...)
@@ -72,3 +74,20 @@
 	vprintk(mfmt, args);
 	va_end(args);
 }
+
+#define CONV_LEVEL(x) case NV_DBG_##x: return NV_PRINTK_##x
+
+const char *nv_printk_level_to_pfx(int level)
+{
+	switch (level) {
+	CONV_LEVEL(FATAL);
+	CONV_LEVEL(ERROR);
+	CONV_LEVEL(WARN);
+	CONV_LEVEL(INFO);
+	CONV_LEVEL(DEBUG);
+	CONV_LEVEL(PARANOIA);
+	CONV_LEVEL(TRACE);
+	CONV_LEVEL(SPAM);
+	}
+	return NV_PRINTK_DEBUG;
+}
diff --git a/drivers/gpu/drm/nouveau/core/engine/bsp/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/bsp/nvc0.c
index 262c9f5..ce860de 100644
--- a/drivers/gpu/drm/nouveau/core/engine/bsp/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/bsp/nvc0.c
@@ -90,6 +90,7 @@
 		return ret;
 
 	nv_subdev(priv)->unit = 0x00008000;
+	nv_subdev(priv)->intr = nouveau_falcon_intr;
 	nv_engine(priv)->cclass = &nvc0_bsp_cclass;
 	nv_engine(priv)->sclass = nvc0_bsp_sclass;
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/core/engine/bsp/nve0.c b/drivers/gpu/drm/nouveau/core/engine/bsp/nve0.c
index c46882c..ba6aeca 100644
--- a/drivers/gpu/drm/nouveau/core/engine/bsp/nve0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/bsp/nve0.c
@@ -90,6 +90,7 @@
 		return ret;
 
 	nv_subdev(priv)->unit = 0x00008000;
+	nv_subdev(priv)->intr = nouveau_falcon_intr;
 	nv_engine(priv)->cclass = &nve0_bsp_cclass;
 	nv_engine(priv)->sclass = nve0_bsp_sclass;
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/dport.c b/drivers/gpu/drm/nouveau/core/engine/disp/dport.c
index 31cc8fe..054d9cf 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/dport.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/dport.c
@@ -150,7 +150,7 @@
 	if (ret)
 		return ret;
 
-	DBG("status %*ph\n", 6, dp->stat);
+	DBG("status %6ph\n", dp->stat);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/hdanva3.c b/drivers/gpu/drm/nouveau/core/engine/disp/hdanva3.c
index 373dbcc5..a19e7d7 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/hdanva3.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/hdanva3.c
@@ -36,6 +36,8 @@
 	if (data && data[0]) {
 		for (i = 0; i < size; i++)
 			nv_wr32(priv, 0x61c440 + soff, (i << 8) | data[i]);
+		for (; i < 0x60; i++)
+			nv_wr32(priv, 0x61c440 + soff, (i << 8));
 		nv_mask(priv, 0x61c448 + soff, 0x80000003, 0x80000003);
 	} else
 	if (data) {
diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/hdanvd0.c b/drivers/gpu/drm/nouveau/core/engine/disp/hdanvd0.c
index dc57e24..7176393 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/hdanvd0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/hdanvd0.c
@@ -41,6 +41,8 @@
 	if (data && data[0]) {
 		for (i = 0; i < size; i++)
 			nv_wr32(priv, 0x10ec00 + soff, (i << 8) | data[i]);
+		for (; i < 0x60; i++)
+			nv_wr32(priv, 0x10ec00 + soff, (i << 8));
 		nv_mask(priv, 0x10ec10 + soff, 0x80000003, 0x80000003);
 	} else
 	if (data) {
diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/sornv50.c b/drivers/gpu/drm/nouveau/core/engine/disp/sornv50.c
index ab1e918..526b752 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/sornv50.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/sornv50.c
@@ -47,14 +47,8 @@
 nv50_sor_mthd(struct nouveau_object *object, u32 mthd, void *args, u32 size)
 {
 	struct nv50_disp_priv *priv = (void *)object->engine;
-	struct nouveau_bios *bios = nouveau_bios(priv);
-	const u16 type = (mthd & NV50_DISP_SOR_MTHD_TYPE) >> 12;
 	const u8  head = (mthd & NV50_DISP_SOR_MTHD_HEAD) >> 3;
-	const u8  link = (mthd & NV50_DISP_SOR_MTHD_LINK) >> 2;
 	const u8    or = (mthd & NV50_DISP_SOR_MTHD_OR);
-	const u16 mask = (0x0100 << head) | (0x0040 << link) | (0x0001 << or);
-	struct dcb_output outp;
-	u8  ver, hdr;
 	u32 data;
 	int ret = -EINVAL;
 
@@ -62,8 +56,6 @@
 		return -EINVAL;
 	data = *(u32 *)args;
 
-	if (type && !dcb_outp_match(bios, type, mask, &ver, &hdr, &outp))
-		return -ENODEV;
 
 	switch (mthd & ~0x3f) {
 	case NV50_DISP_SOR_PWR:
diff --git a/drivers/gpu/drm/nouveau/core/engine/falcon.c b/drivers/gpu/drm/nouveau/core/engine/falcon.c
index 3c7a31f..e03fc8e 100644
--- a/drivers/gpu/drm/nouveau/core/engine/falcon.c
+++ b/drivers/gpu/drm/nouveau/core/engine/falcon.c
@@ -23,6 +23,25 @@
 #include <engine/falcon.h>
 #include <subdev/timer.h>
 
+void
+nouveau_falcon_intr(struct nouveau_subdev *subdev)
+{
+	struct nouveau_falcon *falcon = (void *)subdev;
+	u32 dispatch = nv_ro32(falcon, 0x01c);
+	u32 intr = nv_ro32(falcon, 0x008) & dispatch & ~(dispatch >> 16);
+
+	if (intr & 0x00000010) {
+		nv_debug(falcon, "ucode halted\n");
+		nv_wo32(falcon, 0x004, 0x00000010);
+		intr &= ~0x00000010;
+	}
+
+	if (intr)  {
+		nv_error(falcon, "unhandled intr 0x%08x\n", intr);
+		nv_wo32(falcon, 0x004, intr);
+	}
+}
+
 u32
 _nouveau_falcon_rd32(struct nouveau_object *object, u64 addr)
 {
diff --git a/drivers/gpu/drm/nouveau/core/engine/mpeg/nv31.c b/drivers/gpu/drm/nouveau/core/engine/mpeg/nv31.c
index 49ecbb8..c190043 100644
--- a/drivers/gpu/drm/nouveau/core/engine/mpeg/nv31.c
+++ b/drivers/gpu/drm/nouveau/core/engine/mpeg/nv31.c
@@ -265,8 +265,8 @@
 int
 nv31_mpeg_init(struct nouveau_object *object)
 {
-	struct nouveau_engine *engine = nv_engine(object->engine);
-	struct nv31_mpeg_priv *priv = (void *)engine;
+	struct nouveau_engine *engine = nv_engine(object);
+	struct nv31_mpeg_priv *priv = (void *)object;
 	struct nouveau_fb *pfb = nouveau_fb(object);
 	int ret, i;
 
@@ -284,7 +284,10 @@
 	/* PMPEG init */
 	nv_wr32(priv, 0x00b32c, 0x00000000);
 	nv_wr32(priv, 0x00b314, 0x00000100);
-	nv_wr32(priv, 0x00b220, nv44_graph_class(priv) ? 0x00000044 : 0x00000031);
+	if (nv_device(priv)->chipset >= 0x40 && nv44_graph_class(priv))
+		nv_wr32(priv, 0x00b220, 0x00000044);
+	else
+		nv_wr32(priv, 0x00b220, 0x00000031);
 	nv_wr32(priv, 0x00b300, 0x02001ec1);
 	nv_mask(priv, 0x00b32c, 0x00000001, 0x00000001);
 
diff --git a/drivers/gpu/drm/nouveau/core/engine/mpeg/nv40.c b/drivers/gpu/drm/nouveau/core/engine/mpeg/nv40.c
index f7c581a..dd61960 100644
--- a/drivers/gpu/drm/nouveau/core/engine/mpeg/nv40.c
+++ b/drivers/gpu/drm/nouveau/core/engine/mpeg/nv40.c
@@ -61,6 +61,7 @@
 	if (ret)
 		return ret;
 
+	nv_wo32(&chan->base.base, 0x78, 0x02001ec1);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/core/engine/ppp/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/ppp/nvc0.c
index 98072c1..73719aa 100644
--- a/drivers/gpu/drm/nouveau/core/engine/ppp/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/ppp/nvc0.c
@@ -90,6 +90,7 @@
 		return ret;
 
 	nv_subdev(priv)->unit = 0x00000002;
+	nv_subdev(priv)->intr = nouveau_falcon_intr;
 	nv_engine(priv)->cclass = &nvc0_ppp_cclass;
 	nv_engine(priv)->sclass = nvc0_ppp_sclass;
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/core/engine/vp/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/vp/nvc0.c
index 1879229..ac1f62a 100644
--- a/drivers/gpu/drm/nouveau/core/engine/vp/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/vp/nvc0.c
@@ -90,6 +90,7 @@
 		return ret;
 
 	nv_subdev(priv)->unit = 0x00020000;
+	nv_subdev(priv)->intr = nouveau_falcon_intr;
 	nv_engine(priv)->cclass = &nvc0_vp_cclass;
 	nv_engine(priv)->sclass = nvc0_vp_sclass;
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/core/engine/vp/nve0.c b/drivers/gpu/drm/nouveau/core/engine/vp/nve0.c
index d28ecbf..d4c3108 100644
--- a/drivers/gpu/drm/nouveau/core/engine/vp/nve0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/vp/nve0.c
@@ -90,6 +90,7 @@
 		return ret;
 
 	nv_subdev(priv)->unit = 0x00020000;
+	nv_subdev(priv)->intr = nouveau_falcon_intr;
 	nv_engine(priv)->cclass = &nve0_vp_cclass;
 	nv_engine(priv)->sclass = nve0_vp_sclass;
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/core/engine/xtensa.c b/drivers/gpu/drm/nouveau/core/engine/xtensa.c
index 0639bc5..5f6ede7 100644
--- a/drivers/gpu/drm/nouveau/core/engine/xtensa.c
+++ b/drivers/gpu/drm/nouveau/core/engine/xtensa.c
@@ -118,7 +118,13 @@
 			return ret;
 		}
 
-		ret = nouveau_gpuobj_new(object, NULL, fw->size, 0x1000, 0,
+		if (fw->size > 0x40000) {
+			nv_warn(xtensa, "firmware %s too large\n", name);
+			release_firmware(fw);
+			return -EINVAL;
+		}
+
+		ret = nouveau_gpuobj_new(object, NULL, 0x40000, 0x1000, 0,
 					 &xtensa->gpu_fw);
 		if (ret) {
 			release_firmware(fw);
diff --git a/drivers/gpu/drm/nouveau/core/include/core/printk.h b/drivers/gpu/drm/nouveau/core/include/core/printk.h
index febed2e..d87836e 100644
--- a/drivers/gpu/drm/nouveau/core/include/core/printk.h
+++ b/drivers/gpu/drm/nouveau/core/include/core/printk.h
@@ -15,6 +15,12 @@
 #define NV_PRINTK_TRACE    KERN_DEBUG
 #define NV_PRINTK_SPAM     KERN_DEBUG
 
+extern int nv_printk_suspend_level;
+
+#define NV_DBG_SUSPEND (nv_printk_suspend_level)
+#define NV_PRINTK_SUSPEND  (nv_printk_level_to_pfx(nv_printk_suspend_level))
+
+const char *nv_printk_level_to_pfx(int level);
 void __printf(4, 5)
 nv_printk_(struct nouveau_object *, const char *, int, const char *, ...);
 
@@ -31,6 +37,13 @@
 #define nv_trace(o,f,a...) nv_printk((o), TRACE, f, ##a)
 #define nv_spam(o,f,a...) nv_printk((o), SPAM, f, ##a)
 
+#define nv_suspend(o,f,a...) nv_printk((o), SUSPEND, f, ##a)
+
+static inline void nv_suspend_set_printk_level(int level)
+{
+	nv_printk_suspend_level = level;
+}
+
 #define nv_assert(f,a...) do {                                                 \
 	if (NV_DBG_FATAL <= CONFIG_NOUVEAU_DEBUG)                              \
 		nv_printk_(NULL, NV_PRINTK_FATAL, NV_DBG_FATAL, f "\n", ##a);  \
diff --git a/drivers/gpu/drm/nouveau/core/include/engine/falcon.h b/drivers/gpu/drm/nouveau/core/include/engine/falcon.h
index 1edec38..181aa7d 100644
--- a/drivers/gpu/drm/nouveau/core/include/engine/falcon.h
+++ b/drivers/gpu/drm/nouveau/core/include/engine/falcon.h
@@ -72,6 +72,8 @@
 			   struct nouveau_oclass *, u32, bool, const char *,
 			   const char *, int, void **);
 
+void nouveau_falcon_intr(struct nouveau_subdev *subdev);
+
 #define _nouveau_falcon_dtor _nouveau_engine_dtor
 int  _nouveau_falcon_init(struct nouveau_object *);
 int  _nouveau_falcon_fini(struct nouveau_object *, bool);
diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/mc.h b/drivers/gpu/drm/nouveau/core/include/subdev/mc.h
index d550226..9d2cd20 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/mc.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/mc.h
@@ -20,8 +20,8 @@
 	return (void *)nv_device(obj)->subdev[NVDEV_SUBDEV_MC];
 }
 
-#define nouveau_mc_create(p,e,o,d)                                             \
-	nouveau_mc_create_((p), (e), (o), sizeof(**d), (void **)d)
+#define nouveau_mc_create(p,e,o,m,d)                                           \
+	nouveau_mc_create_((p), (e), (o), (m), sizeof(**d), (void **)d)
 #define nouveau_mc_destroy(p) ({                                               \
 	struct nouveau_mc *pmc = (p); _nouveau_mc_dtor(nv_object(pmc));        \
 })
@@ -33,7 +33,8 @@
 })
 
 int  nouveau_mc_create_(struct nouveau_object *, struct nouveau_object *,
-			struct nouveau_oclass *, int, void **);
+			struct nouveau_oclass *, const struct nouveau_mc_intr *,
+			int, void **);
 void _nouveau_mc_dtor(struct nouveau_object *);
 int  _nouveau_mc_init(struct nouveau_object *);
 int  _nouveau_mc_fini(struct nouveau_object *, bool);
diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/vm.h b/drivers/gpu/drm/nouveau/core/include/subdev/vm.h
index f2e87b1..fcf57fa 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/vm.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/vm.h
@@ -55,7 +55,7 @@
 struct nouveau_vm {
 	struct nouveau_vmmgr *vmm;
 	struct nouveau_mm mm;
-	int refcount;
+	struct kref refcount;
 
 	struct list_head pgd_list;
 	atomic_t engref[NVDEV_SUBDEV_NR];
diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/init.c b/drivers/gpu/drm/nouveau/core/subdev/bios/init.c
index 0687e64..2e11ea0 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/bios/init.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/bios/init.c
@@ -2165,7 +2165,7 @@
 	u16 data;
 
 	if (execute)
-		nv_info(bios, "running init tables\n");
+		nv_suspend(bios, "running init tables\n");
 	while (!ret && (data = (init_script(bios, ++i)))) {
 		struct nvbios_init init = {
 			.subdev = subdev,
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/priv.h b/drivers/gpu/drm/nouveau/core/subdev/fb/priv.h
index 6c974dd..db9d6dd 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/priv.h
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/priv.h
@@ -81,7 +81,7 @@
 void nv46_fb_tile_init(struct nouveau_fb *, int i, u32 addr, u32 size,
 		       u32 pitch, u32 flags, struct nouveau_fb_tile *);
 
-void nv50_ram_put(struct nouveau_fb *, struct nouveau_mem **);
+void __nv50_ram_put(struct nouveau_fb *, struct nouveau_mem *);
 extern int nv50_fb_memtype[0x80];
 
 #endif
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv49.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv49.c
index 19e3a9a..ab7ef0a 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv49.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv49.c
@@ -40,15 +40,15 @@
 		return ret;
 
 	switch (pfb914 & 0x00000003) {
-	case 0x00000000: pfb->ram->type = NV_MEM_TYPE_DDR1; break;
-	case 0x00000001: pfb->ram->type = NV_MEM_TYPE_DDR2; break;
-	case 0x00000002: pfb->ram->type = NV_MEM_TYPE_GDDR3; break;
+	case 0x00000000: ram->type = NV_MEM_TYPE_DDR1; break;
+	case 0x00000001: ram->type = NV_MEM_TYPE_DDR2; break;
+	case 0x00000002: ram->type = NV_MEM_TYPE_GDDR3; break;
 	case 0x00000003: break;
 	}
 
-	pfb->ram->size  =  nv_rd32(pfb, 0x10020c) & 0xff000000;
-	pfb->ram->parts = (nv_rd32(pfb, 0x100200) & 0x00000003) + 1;
-	pfb->ram->tags  =  nv_rd32(pfb, 0x100320);
+	ram->size  =  nv_rd32(pfb, 0x10020c) & 0xff000000;
+	ram->parts = (nv_rd32(pfb, 0x100200) & 0x00000003) + 1;
+	ram->tags  =  nv_rd32(pfb, 0x100320);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv4e.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv4e.c
index 7192aa6..63a6aab 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv4e.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv4e.c
@@ -38,8 +38,8 @@
 	if (ret)
 		return ret;
 
-	pfb->ram->size = nv_rd32(pfb, 0x10020c) & 0xff000000;
-	pfb->ram->type = NV_MEM_TYPE_STOLEN;
+	ram->size = nv_rd32(pfb, 0x10020c) & 0xff000000;
+	ram->type = NV_MEM_TYPE_STOLEN;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv50.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv50.c
index af5aa7e..903baff 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv50.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv50.c
@@ -27,17 +27,10 @@
 #include "priv.h"
 
 void
-nv50_ram_put(struct nouveau_fb *pfb, struct nouveau_mem **pmem)
+__nv50_ram_put(struct nouveau_fb *pfb, struct nouveau_mem *mem)
 {
 	struct nouveau_mm_node *this;
-	struct nouveau_mem *mem;
 
-	mem = *pmem;
-	*pmem = NULL;
-	if (unlikely(mem == NULL))
-		return;
-
-	mutex_lock(&pfb->base.mutex);
 	while (!list_empty(&mem->regions)) {
 		this = list_first_entry(&mem->regions, typeof(*this), rl_entry);
 
@@ -46,6 +39,19 @@
 	}
 
 	nouveau_mm_free(&pfb->tags, &mem->tag);
+}
+
+void
+nv50_ram_put(struct nouveau_fb *pfb, struct nouveau_mem **pmem)
+{
+	struct nouveau_mem *mem = *pmem;
+
+	*pmem = NULL;
+	if (unlikely(mem == NULL))
+		return;
+
+	mutex_lock(&pfb->base.mutex);
+	__nv50_ram_put(pfb, mem);
 	mutex_unlock(&pfb->base.mutex);
 
 	kfree(mem);
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvc0.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvc0.c
index 9c3634a..cf97c4d 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvc0.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvc0.c
@@ -33,11 +33,19 @@
 nvc0_ram_put(struct nouveau_fb *pfb, struct nouveau_mem **pmem)
 {
 	struct nouveau_ltcg *ltcg = nouveau_ltcg(pfb);
+	struct nouveau_mem *mem = *pmem;
 
-	if ((*pmem)->tag)
-		ltcg->tags_free(ltcg, &(*pmem)->tag);
+	*pmem = NULL;
+	if (unlikely(mem == NULL))
+		return;
 
-	nv50_ram_put(pfb, pmem);
+	mutex_lock(&pfb->base.mutex);
+	if (mem->tag)
+		ltcg->tags_free(ltcg, &mem->tag);
+	__nv50_ram_put(pfb, mem);
+	mutex_unlock(&pfb->base.mutex);
+
+	kfree(mem);
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/core/subdev/gpio/nv50.c b/drivers/gpu/drm/nouveau/core/subdev/gpio/nv50.c
index bf489dc..c4c1d41 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/gpio/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/gpio/nv50.c
@@ -103,7 +103,7 @@
 	int i;
 
 	intr0 = nv_rd32(priv, 0xe054) & nv_rd32(priv, 0xe050);
-	if (nv_device(priv)->chipset >= 0x90)
+	if (nv_device(priv)->chipset > 0x92)
 		intr1 = nv_rd32(priv, 0xe074) & nv_rd32(priv, 0xe070);
 
 	hi = (intr0 & 0x0000ffff) | (intr1 << 16);
@@ -115,7 +115,7 @@
 	}
 
 	nv_wr32(priv, 0xe054, intr0);
-	if (nv_device(priv)->chipset >= 0x90)
+	if (nv_device(priv)->chipset > 0x92)
 		nv_wr32(priv, 0xe074, intr1);
 }
 
@@ -146,7 +146,7 @@
 	int ret;
 
 	ret = nouveau_gpio_create(parent, engine, oclass,
-				  nv_device(parent)->chipset >= 0x90 ? 32 : 16,
+				  nv_device(parent)->chipset > 0x92 ? 32 : 16,
 				  &priv);
 	*pobject = nv_object(priv);
 	if (ret)
@@ -182,7 +182,7 @@
 	/* disable, and ack any pending gpio interrupts */
 	nv_wr32(priv, 0xe050, 0x00000000);
 	nv_wr32(priv, 0xe054, 0xffffffff);
-	if (nv_device(priv)->chipset >= 0x90) {
+	if (nv_device(priv)->chipset > 0x92) {
 		nv_wr32(priv, 0xe070, 0x00000000);
 		nv_wr32(priv, 0xe074, 0xffffffff);
 	}
@@ -195,7 +195,7 @@
 {
 	struct nv50_gpio_priv *priv = (void *)object;
 	nv_wr32(priv, 0xe050, 0x00000000);
-	if (nv_device(priv)->chipset >= 0x90)
+	if (nv_device(priv)->chipset > 0x92)
 		nv_wr32(priv, 0xe070, 0x00000000);
 	return nouveau_gpio_fini(&priv->base, suspend);
 }
diff --git a/drivers/gpu/drm/nouveau/core/subdev/ltcg/nvc0.c b/drivers/gpu/drm/nouveau/core/subdev/ltcg/nvc0.c
index bcca883..cce65cc 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/ltcg/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/ltcg/nvc0.c
@@ -30,8 +30,9 @@
 	struct nouveau_ltcg base;
 	u32 part_nr;
 	u32 subp_nr;
-	struct nouveau_mm tags;
 	u32 num_tags;
+	u32 tag_base;
+	struct nouveau_mm tags;
 	struct nouveau_mm_node *tag_ram;
 };
 
@@ -117,10 +118,6 @@
 	u32 tag_size, tag_margin, tag_align;
 	int ret;
 
-	nv_wr32(priv, 0x17e8d8, priv->part_nr);
-	if (nv_device(pfb)->card_type >= NV_E0)
-		nv_wr32(priv, 0x17e000, priv->part_nr);
-
 	/* tags for 1/4 of VRAM should be enough (8192/4 per GiB of VRAM) */
 	priv->num_tags = (pfb->ram->size >> 17) / 4;
 	if (priv->num_tags > (1 << 17))
@@ -142,7 +139,7 @@
 	tag_size += tag_align;
 	tag_size  = (tag_size + 0xfff) >> 12; /* round up */
 
-	ret = nouveau_mm_tail(&pfb->vram, 0, tag_size, tag_size, 1,
+	ret = nouveau_mm_tail(&pfb->vram, 1, tag_size, tag_size, 1,
 	                      &priv->tag_ram);
 	if (ret) {
 		priv->num_tags = 0;
@@ -152,7 +149,7 @@
 		tag_base += tag_align - 1;
 		ret = do_div(tag_base, tag_align);
 
-		nv_wr32(priv, 0x17e8d4, tag_base);
+		priv->tag_base = tag_base;
 	}
 	ret = nouveau_mm_init(&priv->tags, 0, priv->num_tags, 1);
 
@@ -182,8 +179,6 @@
 	}
 	priv->subp_nr = nv_rd32(priv, 0x17e8dc) >> 28;
 
-	nv_mask(priv, 0x17e820, 0x00100000, 0x00000000); /* INTR_EN &= ~0x10 */
-
 	ret = nvc0_ltcg_init_tag_ram(pfb, priv);
 	if (ret)
 		return ret;
@@ -209,13 +204,32 @@
 	nouveau_ltcg_destroy(ltcg);
 }
 
+static int
+nvc0_ltcg_init(struct nouveau_object *object)
+{
+	struct nouveau_ltcg *ltcg = (struct nouveau_ltcg *)object;
+	struct nvc0_ltcg_priv *priv = (struct nvc0_ltcg_priv *)ltcg;
+	int ret;
+
+	ret = nouveau_ltcg_init(ltcg);
+	if (ret)
+		return ret;
+
+	nv_mask(priv, 0x17e820, 0x00100000, 0x00000000); /* INTR_EN &= ~0x10 */
+	nv_wr32(priv, 0x17e8d8, priv->part_nr);
+	if (nv_device(ltcg)->card_type >= NV_E0)
+		nv_wr32(priv, 0x17e000, priv->part_nr);
+	nv_wr32(priv, 0x17e8d4, priv->tag_base);
+	return 0;
+}
+
 struct nouveau_oclass
 nvc0_ltcg_oclass = {
 	.handle = NV_SUBDEV(LTCG, 0xc0),
 	.ofuncs = &(struct nouveau_ofuncs) {
 		.ctor = nvc0_ltcg_ctor,
 		.dtor = nvc0_ltcg_dtor,
-		.init = _nouveau_ltcg_init,
+		.init = nvc0_ltcg_init,
 		.fini = _nouveau_ltcg_fini,
 	},
 };
diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/base.c b/drivers/gpu/drm/nouveau/core/subdev/mc/base.c
index 1c0330b..20f9a53 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/mc/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/mc/base.c
@@ -23,16 +23,20 @@
  */
 
 #include <subdev/mc.h>
+#include <linux/pm_runtime.h>
 
 static irqreturn_t
 nouveau_mc_intr(int irq, void *arg)
 {
 	struct nouveau_mc *pmc = arg;
 	const struct nouveau_mc_intr *map = pmc->intr_map;
+	struct nouveau_device *device = nv_device(pmc);
 	struct nouveau_subdev *unit;
 	u32 stat, intr;
 
 	intr = stat = nv_rd32(pmc, 0x000100);
+	if (intr == 0xffffffff)
+		return IRQ_NONE;
 	while (stat && map->stat) {
 		if (stat & map->stat) {
 			unit = nouveau_subdev(pmc, map->unit);
@@ -47,6 +51,8 @@
 		nv_error(pmc, "unknown intr 0x%08x\n", stat);
 	}
 
+	if (stat == IRQ_HANDLED)
+		pm_runtime_mark_last_busy(&device->pdev->dev);
 	return stat ? IRQ_HANDLED : IRQ_NONE;
 }
 
@@ -80,7 +86,9 @@
 
 int
 nouveau_mc_create_(struct nouveau_object *parent, struct nouveau_object *engine,
-		   struct nouveau_oclass *oclass, int length, void **pobject)
+		   struct nouveau_oclass *oclass,
+		   const struct nouveau_mc_intr *intr_map,
+		   int length, void **pobject)
 {
 	struct nouveau_device *device = nv_device(parent);
 	struct nouveau_mc *pmc;
@@ -92,6 +100,8 @@
 	if (ret)
 		return ret;
 
+	pmc->intr_map = intr_map;
+
 	ret = request_irq(device->pdev->irq, nouveau_mc_intr,
 			  IRQF_SHARED, "nouveau", pmc);
 	if (ret < 0)
diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nv04.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nv04.c
index 8c76971..64aa4ed 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/mc/nv04.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nv04.c
@@ -50,12 +50,11 @@
 	struct nv04_mc_priv *priv;
 	int ret;
 
-	ret = nouveau_mc_create(parent, engine, oclass, &priv);
+	ret = nouveau_mc_create(parent, engine, oclass, nv04_mc_intr, &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
 
-	priv->base.intr_map = nv04_mc_intr;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nv44.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nv44.c
index 5191937..d989178 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/mc/nv44.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nv44.c
@@ -36,12 +36,11 @@
 	struct nv44_mc_priv *priv;
 	int ret;
 
-	ret = nouveau_mc_create(parent, engine, oclass, &priv);
+	ret = nouveau_mc_create(parent, engine, oclass, nv04_mc_intr, &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
 
-	priv->base.intr_map = nv04_mc_intr;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nv50.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nv50.c
index 0cb322a..2b1afe2 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/mc/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nv50.c
@@ -41,7 +41,7 @@
 	{ 0x04000000, NVDEV_ENGINE_DISP },
 	{ 0x10000000, NVDEV_SUBDEV_BUS },
 	{ 0x80000000, NVDEV_ENGINE_SW },
-	{ 0x0000d101, NVDEV_SUBDEV_FB },
+	{ 0x0002d101, NVDEV_SUBDEV_FB },
 	{},
 };
 
@@ -53,12 +53,11 @@
 	struct nv50_mc_priv *priv;
 	int ret;
 
-	ret = nouveau_mc_create(parent, engine, oclass, &priv);
+	ret = nouveau_mc_create(parent, engine, oclass, nv50_mc_intr, &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
 
-	priv->base.intr_map = nv50_mc_intr;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nv98.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nv98.c
index e82fd21..0d57b4d 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/mc/nv98.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nv98.c
@@ -54,12 +54,11 @@
 	struct nv98_mc_priv *priv;
 	int ret;
 
-	ret = nouveau_mc_create(parent, engine, oclass, &priv);
+	ret = nouveau_mc_create(parent, engine, oclass, nv98_mc_intr, &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
 
-	priv->base.intr_map = nv98_mc_intr;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/core/subdev/mc/nvc0.c b/drivers/gpu/drm/nouveau/core/subdev/mc/nvc0.c
index c5da3ba..104175c 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/mc/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/mc/nvc0.c
@@ -57,12 +57,11 @@
 	struct nvc0_mc_priv *priv;
 	int ret;
 
-	ret = nouveau_mc_create(parent, engine, oclass, &priv);
+	ret = nouveau_mc_create(parent, engine, oclass, nvc0_mc_intr, &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
 
-	priv->base.intr_map = nvc0_mc_intr;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/core/subdev/vm/base.c b/drivers/gpu/drm/nouveau/core/subdev/vm/base.c
index 67fcb6c..ef3133e 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/vm/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/vm/base.c
@@ -361,7 +361,7 @@
 
 	INIT_LIST_HEAD(&vm->pgd_list);
 	vm->vmm = vmm;
-	vm->refcount = 1;
+	kref_init(&vm->refcount);
 	vm->fpde = offset >> (vmm->pgt_bits + 12);
 	vm->lpde = (offset + length - 1) >> (vmm->pgt_bits + 12);
 
@@ -441,8 +441,9 @@
 }
 
 static void
-nouveau_vm_del(struct nouveau_vm *vm)
+nouveau_vm_del(struct kref *kref)
 {
+	struct nouveau_vm *vm = container_of(kref, typeof(*vm), refcount);
 	struct nouveau_vm_pgd *vpgd, *tmp;
 
 	list_for_each_entry_safe(vpgd, tmp, &vm->pgd_list, head) {
@@ -458,27 +459,19 @@
 nouveau_vm_ref(struct nouveau_vm *ref, struct nouveau_vm **ptr,
 	       struct nouveau_gpuobj *pgd)
 {
-	struct nouveau_vm *vm;
-	int ret;
-
-	vm = ref;
-	if (vm) {
-		ret = nouveau_vm_link(vm, pgd);
+	if (ref) {
+		int ret = nouveau_vm_link(ref, pgd);
 		if (ret)
 			return ret;
 
-		vm->refcount++;
+		kref_get(&ref->refcount);
 	}
 
-	vm = *ptr;
+	if (*ptr) {
+		nouveau_vm_unlink(*ptr, pgd);
+		kref_put(&(*ptr)->refcount, nouveau_vm_del);
+	}
+
 	*ptr = ref;
-
-	if (vm) {
-		nouveau_vm_unlink(vm, pgd);
-
-		if (--vm->refcount == 0)
-			nouveau_vm_del(vm);
-	}
-
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
index 0782bd2..d4fbf11 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
@@ -22,6 +22,7 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
+#include <linux/pm_runtime.h>
 
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
@@ -606,6 +607,24 @@
 	regp->ramdac_a34 = 0x1;
 }
 
+static int
+nv_crtc_swap_fbs(struct drm_crtc *crtc, struct drm_framebuffer *old_fb)
+{
+	struct nv04_display *disp = nv04_display(crtc->dev);
+	struct nouveau_framebuffer *nvfb = nouveau_framebuffer(crtc->fb);
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	int ret;
+
+	ret = nouveau_bo_pin(nvfb->nvbo, TTM_PL_FLAG_VRAM);
+	if (ret == 0) {
+		if (disp->image[nv_crtc->index])
+			nouveau_bo_unpin(disp->image[nv_crtc->index]);
+		nouveau_bo_ref(nvfb->nvbo, &disp->image[nv_crtc->index]);
+	}
+
+	return ret;
+}
+
 /**
  * Sets up registers for the given mode/adjusted_mode pair.
  *
@@ -622,10 +641,15 @@
 	struct drm_device *dev = crtc->dev;
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
 	struct nouveau_drm *drm = nouveau_drm(dev);
+	int ret;
 
 	NV_DEBUG(drm, "CTRC mode on CRTC %d:\n", nv_crtc->index);
 	drm_mode_debug_printmodeline(adjusted_mode);
 
+	ret = nv_crtc_swap_fbs(crtc, old_fb);
+	if (ret)
+		return ret;
+
 	/* unlock must come after turning off FP_TG_CONTROL in output_prepare */
 	nv_lock_vga_crtc_shadow(dev, nv_crtc->index, -1);
 
@@ -722,6 +746,7 @@
 
 static void nv_crtc_destroy(struct drm_crtc *crtc)
 {
+	struct nv04_display *disp = nv04_display(crtc->dev);
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
 
 	if (!nv_crtc)
@@ -729,6 +754,10 @@
 
 	drm_crtc_cleanup(crtc);
 
+	if (disp->image[nv_crtc->index])
+		nouveau_bo_unpin(disp->image[nv_crtc->index]);
+	nouveau_bo_ref(NULL, &disp->image[nv_crtc->index]);
+
 	nouveau_bo_unmap(nv_crtc->cursor.nvbo);
 	nouveau_bo_unpin(nv_crtc->cursor.nvbo);
 	nouveau_bo_ref(NULL, &nv_crtc->cursor.nvbo);
@@ -754,6 +783,16 @@
 }
 
 static void
+nv_crtc_disable(struct drm_crtc *crtc)
+{
+	struct nv04_display *disp = nv04_display(crtc->dev);
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	if (disp->image[nv_crtc->index])
+		nouveau_bo_unpin(disp->image[nv_crtc->index]);
+	nouveau_bo_ref(NULL, &disp->image[nv_crtc->index]);
+}
+
+static void
 nv_crtc_gamma_set(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b, uint32_t start,
 		  uint32_t size)
 {
@@ -791,7 +830,6 @@
 	struct drm_framebuffer *drm_fb;
 	struct nouveau_framebuffer *fb;
 	int arb_burst, arb_lwm;
-	int ret;
 
 	NV_DEBUG(drm, "index %d\n", nv_crtc->index);
 
@@ -801,10 +839,8 @@
 		return 0;
 	}
 
-
 	/* If atomic, we want to switch to the fb we were passed, so
-	 * now we update pointers to do that.  (We don't pin; just
-	 * assume we're already pinned and update the base address.)
+	 * now we update pointers to do that.
 	 */
 	if (atomic) {
 		drm_fb = passed_fb;
@@ -812,17 +848,6 @@
 	} else {
 		drm_fb = crtc->fb;
 		fb = nouveau_framebuffer(crtc->fb);
-		/* If not atomic, we can go ahead and pin, and unpin the
-		 * old fb we were passed.
-		 */
-		ret = nouveau_bo_pin(fb->nvbo, TTM_PL_FLAG_VRAM);
-		if (ret)
-			return ret;
-
-		if (passed_fb) {
-			struct nouveau_framebuffer *ofb = nouveau_framebuffer(passed_fb);
-			nouveau_bo_unpin(ofb->nvbo);
-		}
 	}
 
 	nv_crtc->fb.offset = fb->nvbo->bo.offset;
@@ -877,6 +902,9 @@
 nv04_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
 			struct drm_framebuffer *old_fb)
 {
+	int ret = nv_crtc_swap_fbs(crtc, old_fb);
+	if (ret)
+		return ret;
 	return nv04_crtc_do_mode_set_base(crtc, old_fb, x, y, false);
 }
 
@@ -1007,13 +1035,59 @@
 	return 0;
 }
 
+int
+nouveau_crtc_set_config(struct drm_mode_set *set)
+{
+	struct drm_device *dev;
+	struct nouveau_drm *drm;
+	int ret;
+	struct drm_crtc *crtc;
+	bool active = false;
+	if (!set || !set->crtc)
+		return -EINVAL;
+
+	dev = set->crtc->dev;
+
+	/* get a pm reference here */
+	ret = pm_runtime_get_sync(dev->dev);
+	if (ret < 0)
+		return ret;
+
+	ret = drm_crtc_helper_set_config(set);
+
+	drm = nouveau_drm(dev);
+
+	/* if we get here with no crtcs active then we can drop a reference */
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		if (crtc->enabled)
+			active = true;
+	}
+
+	pm_runtime_mark_last_busy(dev->dev);
+	/* if we have active crtcs and we don't have a power ref,
+	   take the current one */
+	if (active && !drm->have_disp_power_ref) {
+		drm->have_disp_power_ref = true;
+		return ret;
+	}
+	/* if we have no active crtcs, then drop the power ref
+	   we got before */
+	if (!active && drm->have_disp_power_ref) {
+		pm_runtime_put_autosuspend(dev->dev);
+		drm->have_disp_power_ref = false;
+	}
+	/* drop the power reference we got coming in here */
+	pm_runtime_put_autosuspend(dev->dev);
+	return ret;
+}
+
 static const struct drm_crtc_funcs nv04_crtc_funcs = {
 	.save = nv_crtc_save,
 	.restore = nv_crtc_restore,
 	.cursor_set = nv04_crtc_cursor_set,
 	.cursor_move = nv04_crtc_cursor_move,
 	.gamma_set = nv_crtc_gamma_set,
-	.set_config = drm_crtc_helper_set_config,
+	.set_config = nouveau_crtc_set_config,
 	.page_flip = nouveau_crtc_page_flip,
 	.destroy = nv_crtc_destroy,
 };
@@ -1027,6 +1101,7 @@
 	.mode_set_base = nv04_crtc_mode_set_base,
 	.mode_set_base_atomic = nv04_crtc_mode_set_base_atomic,
 	.load_lut = nv_crtc_gamma_load,
+	.disable = nv_crtc_disable,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.h b/drivers/gpu/drm/nouveau/dispnv04/disp.h
index a0a031d..9928187 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/disp.h
+++ b/drivers/gpu/drm/nouveau/dispnv04/disp.h
@@ -81,6 +81,7 @@
 	uint32_t saved_vga_font[4][16384];
 	uint32_t dac_users[4];
 	struct nouveau_object *core;
+	struct nouveau_bo *image[2];
 };
 
 static inline struct nv04_display *
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index d97f200..dd7d2e1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -25,8 +25,27 @@
 #define NOUVEAU_DSM_POWER_SPEED 0x01
 #define NOUVEAU_DSM_POWER_STAMINA 0x02
 
-#define NOUVEAU_DSM_OPTIMUS_FN 0x1A
-#define NOUVEAU_DSM_OPTIMUS_ARGS 0x03000001
+#define NOUVEAU_DSM_OPTIMUS_CAPS 0x1A
+#define NOUVEAU_DSM_OPTIMUS_FLAGS 0x1B
+
+#define NOUVEAU_DSM_OPTIMUS_POWERDOWN_PS3 (3 << 24)
+#define NOUVEAU_DSM_OPTIMUS_NO_POWERDOWN_PS3 (2 << 24)
+#define NOUVEAU_DSM_OPTIMUS_FLAGS_CHANGED (1)
+
+#define NOUVEAU_DSM_OPTIMUS_SET_POWERDOWN (NOUVEAU_DSM_OPTIMUS_POWERDOWN_PS3 | NOUVEAU_DSM_OPTIMUS_FLAGS_CHANGED)
+
+/* result of the optimus caps function */
+#define OPTIMUS_ENABLED (1 << 0)
+#define OPTIMUS_STATUS_MASK (3 << 3)
+#define OPTIMUS_STATUS_OFF  (0 << 3)
+#define OPTIMUS_STATUS_ON_ENABLED  (1 << 3)
+#define OPTIMUS_STATUS_PWR_STABLE  (3 << 3)
+#define OPTIMUS_DISPLAY_HOTPLUG (1 << 6)
+#define OPTIMUS_CAPS_MASK (7 << 24)
+#define OPTIMUS_DYNAMIC_PWR_CAP (1 << 24)
+
+#define OPTIMUS_AUDIO_CAPS_MASK (3 << 27)
+#define OPTIMUS_HDA_CODEC_MASK (2 << 27) /* hda bios control */
 
 static struct nouveau_dsm_priv {
 	bool dsm_detected;
@@ -251,9 +270,18 @@
 		retval |= NOUVEAU_DSM_HAS_MUX;
 
 	if (nouveau_test_dsm(dhandle, nouveau_optimus_dsm,
-		NOUVEAU_DSM_OPTIMUS_FN))
+		NOUVEAU_DSM_OPTIMUS_CAPS))
 		retval |= NOUVEAU_DSM_HAS_OPT;
 
+	if (retval & NOUVEAU_DSM_HAS_OPT) {
+		uint32_t result;
+		nouveau_optimus_dsm(dhandle, NOUVEAU_DSM_OPTIMUS_CAPS, 0,
+				    &result);
+		dev_info(&pdev->dev, "optimus capabilities: %s, status %s%s\n",
+			 (result & OPTIMUS_ENABLED) ? "enabled" : "disabled",
+			 (result & OPTIMUS_DYNAMIC_PWR_CAP) ? "dynamic power, " : "",
+			 (result & OPTIMUS_HDA_CODEC_MASK) ? "hda bios codec supported" : "");
+	}
 	if (retval)
 		nouveau_dsm_priv.dhandle = dhandle;
 
@@ -328,8 +356,12 @@
 	if (!nouveau_dsm_priv.optimus_detected)
 		return;
 
-	nouveau_optimus_dsm(nouveau_dsm_priv.dhandle, NOUVEAU_DSM_OPTIMUS_FN,
-		NOUVEAU_DSM_OPTIMUS_ARGS, &result);
+	nouveau_optimus_dsm(nouveau_dsm_priv.dhandle, NOUVEAU_DSM_OPTIMUS_FLAGS,
+			    0x3, &result);
+
+	nouveau_optimus_dsm(nouveau_dsm_priv.dhandle, NOUVEAU_DSM_OPTIMUS_CAPS,
+		NOUVEAU_DSM_OPTIMUS_SET_POWERDOWN, &result);
+
 }
 
 void nouveau_unregister_dsm_handler(void)
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 4b1afb1..755c38d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -148,6 +148,7 @@
 
 	if (unlikely(nvbo->gem))
 		DRM_ERROR("bo %p still attached to GEM object\n", bo);
+	WARN_ON(nvbo->pin_refcnt > 0);
 	nv10_bo_put_tile_region(dev, nvbo->tile, NULL);
 	kfree(nvbo);
 }
@@ -197,6 +198,17 @@
 	size_t acc_size;
 	int ret;
 	int type = ttm_bo_type_device;
+	int lpg_shift = 12;
+	int max_size;
+
+	if (drm->client.base.vm)
+		lpg_shift = drm->client.base.vm->vmm->lpg_shift;
+	max_size = INT_MAX & ~((1 << lpg_shift) - 1);
+
+	if (size <= 0 || size > max_size) {
+		nv_warn(drm, "skipped size %x\n", (u32)size);
+		return -EINVAL;
+	}
 
 	if (sg)
 		type = ttm_bo_type_sg;
@@ -340,13 +352,15 @@
 {
 	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
 	struct ttm_buffer_object *bo = &nvbo->bo;
-	int ret;
+	int ret, ref;
 
 	ret = ttm_bo_reserve(bo, false, false, false, 0);
 	if (ret)
 		return ret;
 
-	if (--nvbo->pin_refcnt)
+	ref = --nvbo->pin_refcnt;
+	WARN_ON_ONCE(ref < 0);
+	if (ref)
 		goto out;
 
 	nouveau_bo_placement_set(nvbo, bo->mem.placement, 0);
@@ -578,7 +592,7 @@
 	int ret = RING_SPACE(chan, 2);
 	if (ret == 0) {
 		BEGIN_NVC0(chan, NvSubCopy, 0x0000, 1);
-		OUT_RING  (chan, handle);
+		OUT_RING  (chan, handle & 0x0000ffff);
 		FIRE_RING (chan);
 	}
 	return ret;
@@ -973,7 +987,7 @@
 	struct ttm_mem_reg *old_mem = &bo->mem;
 	int ret;
 
-	mutex_lock(&chan->cli->mutex);
+	mutex_lock_nested(&chan->cli->mutex, SINGLE_DEPTH_NESTING);
 
 	/* create temporary vmas for the transfer and attach them to the
 	 * old nouveau_mem node, these will get cleaned up after ttm has
@@ -1014,7 +1028,7 @@
 			    struct ttm_mem_reg *, struct ttm_mem_reg *);
 		int (*init)(struct nouveau_channel *, u32 handle);
 	} _methods[] = {
-		{  "COPY", 0, 0xa0b5, nve0_bo_move_copy, nve0_bo_move_init },
+		{  "COPY", 4, 0xa0b5, nve0_bo_move_copy, nve0_bo_move_init },
 		{  "GRCE", 0, 0xa0b5, nve0_bo_move_copy, nvc0_bo_move_init },
 		{ "COPY1", 5, 0x90b8, nvc0_bo_move_copy, nvc0_bo_move_init },
 		{ "COPY0", 4, 0x90b5, nvc0_bo_move_copy, nvc0_bo_move_init },
@@ -1034,7 +1048,7 @@
 		struct nouveau_channel *chan;
 		u32 handle = (mthd->engine << 16) | mthd->oclass;
 
-		if (mthd->init == nve0_bo_move_init)
+		if (mthd->engine)
 			chan = drm->cechan;
 		else
 			chan = drm->channel;
@@ -1251,7 +1265,9 @@
 static int
 nouveau_bo_verify_access(struct ttm_buffer_object *bo, struct file *filp)
 {
-	return 0;
+	struct nouveau_bo *nvbo = nouveau_bo(bo);
+
+	return drm_vma_node_verify_access(&nvbo->gem->vma_node, filp);
 }
 
 static int
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 4da776f..c5b36f9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -26,6 +26,8 @@
 
 #include <acpi/button.h>
 
+#include <linux/pm_runtime.h>
+
 #include <drm/drmP.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_crtc_helper.h>
@@ -240,6 +242,8 @@
 	struct nouveau_encoder *nv_partner;
 	struct nouveau_i2c_port *i2c;
 	int type;
+	int ret;
+	enum drm_connector_status conn_status = connector_status_disconnected;
 
 	/* Cleanup the previous EDID block. */
 	if (nv_connector->edid) {
@@ -248,6 +252,10 @@
 		nv_connector->edid = NULL;
 	}
 
+	ret = pm_runtime_get_sync(connector->dev->dev);
+	if (ret < 0)
+		return conn_status;
+
 	i2c = nouveau_connector_ddc_detect(connector, &nv_encoder);
 	if (i2c) {
 		nv_connector->edid = drm_get_edid(connector, &i2c->adapter);
@@ -263,7 +271,8 @@
 		    !nouveau_dp_detect(to_drm_encoder(nv_encoder))) {
 			NV_ERROR(drm, "Detected %s, but failed init\n",
 				 drm_get_connector_name(connector));
-			return connector_status_disconnected;
+			conn_status = connector_status_disconnected;
+			goto out;
 		}
 
 		/* Override encoder type for DVI-I based on whether EDID
@@ -290,13 +299,15 @@
 		}
 
 		nouveau_connector_set_encoder(connector, nv_encoder);
-		return connector_status_connected;
+		conn_status = connector_status_connected;
+		goto out;
 	}
 
 	nv_encoder = nouveau_connector_of_detect(connector);
 	if (nv_encoder) {
 		nouveau_connector_set_encoder(connector, nv_encoder);
-		return connector_status_connected;
+		conn_status = connector_status_connected;
+		goto out;
 	}
 
 detect_analog:
@@ -311,12 +322,18 @@
 		if (helper->detect(encoder, connector) ==
 						connector_status_connected) {
 			nouveau_connector_set_encoder(connector, nv_encoder);
-			return connector_status_connected;
+			conn_status = connector_status_connected;
+			goto out;
 		}
 
 	}
 
-	return connector_status_disconnected;
+ out:
+
+	pm_runtime_mark_last_busy(connector->dev->dev);
+	pm_runtime_put_autosuspend(connector->dev->dev);
+
+	return conn_status;
 }
 
 static enum drm_connector_status
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 708b2d1..77ffded 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -138,7 +138,7 @@
 {
 	struct nouveau_framebuffer *nouveau_fb;
 	struct drm_gem_object *gem;
-	int ret;
+	int ret = -ENOMEM;
 
 	gem = drm_gem_object_lookup(dev, file_priv, mode_cmd->handles[0]);
 	if (!gem)
@@ -146,15 +146,19 @@
 
 	nouveau_fb = kzalloc(sizeof(struct nouveau_framebuffer), GFP_KERNEL);
 	if (!nouveau_fb)
-		return ERR_PTR(-ENOMEM);
+		goto err_unref;
 
 	ret = nouveau_framebuffer_init(dev, nouveau_fb, mode_cmd, nouveau_gem_object(gem));
-	if (ret) {
-		drm_gem_object_unreference(gem);
-		return ERR_PTR(ret);
-	}
+	if (ret)
+		goto err;
 
 	return &nouveau_fb->base;
+
+err:
+	kfree(nouveau_fb);
+err_unref:
+	drm_gem_object_unreference(gem);
+	return ERR_PTR(ret);
 }
 
 static const struct drm_mode_config_funcs nouveau_mode_config_funcs = {
@@ -390,7 +394,7 @@
 
 	nouveau_display_fini(dev);
 
-	NV_INFO(drm, "unpinning framebuffer(s)...\n");
+	NV_SUSPEND(drm, "unpinning framebuffer(s)...\n");
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
 		struct nouveau_framebuffer *nouveau_fb;
 
@@ -412,7 +416,7 @@
 }
 
 void
-nouveau_display_resume(struct drm_device *dev)
+nouveau_display_repin(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct drm_crtc *crtc;
@@ -437,10 +441,12 @@
 		if (ret)
 			NV_ERROR(drm, "Could not pin/map cursor.\n");
 	}
+}
 
-	nouveau_fbcon_set_suspend(dev, 0);
-	nouveau_fbcon_zfill_all(dev);
-
+void
+nouveau_display_resume(struct drm_device *dev)
+{
+	struct drm_crtc *crtc;
 	nouveau_display_init(dev);
 
 	/* Force CLUT to get re-loaded during modeset */
@@ -515,7 +521,8 @@
 
 int
 nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
-		       struct drm_pending_vblank_event *event)
+		       struct drm_pending_vblank_event *event,
+		       uint32_t page_flip_flags)
 {
 	struct drm_device *dev = crtc->dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
@@ -524,9 +531,12 @@
 	struct nouveau_page_flip_state *s;
 	struct nouveau_channel *chan = NULL;
 	struct nouveau_fence *fence;
-	struct list_head res;
-	struct ttm_validate_buffer res_val[2];
+	struct ttm_validate_buffer resv[2] = {
+		{ .bo = &old_bo->bo },
+		{ .bo = &new_bo->bo },
+	};
 	struct ww_acquire_ctx ticket;
+	LIST_HEAD(res);
 	int ret;
 
 	if (!drm->channel)
@@ -545,27 +555,19 @@
 		chan = drm->channel;
 	spin_unlock(&old_bo->bo.bdev->fence_lock);
 
-	mutex_lock(&chan->cli->mutex);
-
 	if (new_bo != old_bo) {
 		ret = nouveau_bo_pin(new_bo, TTM_PL_FLAG_VRAM);
-		if (likely(!ret)) {
-			res_val[0].bo = &old_bo->bo;
-			res_val[1].bo = &new_bo->bo;
-			INIT_LIST_HEAD(&res);
-			list_add_tail(&res_val[0].head, &res);
-			list_add_tail(&res_val[1].head, &res);
-			ret = ttm_eu_reserve_buffers(&ticket, &res);
-			if (ret)
-				nouveau_bo_unpin(new_bo);
-		}
-	} else
-		ret = ttm_bo_reserve(&new_bo->bo, false, false, false, 0);
+		if (ret)
+			goto fail_free;
 
-	if (ret) {
-		mutex_unlock(&chan->cli->mutex);
-		goto fail_free;
+		list_add(&resv[1].head, &res);
 	}
+	list_add(&resv[0].head, &res);
+
+	mutex_lock(&chan->cli->mutex);
+	ret = ttm_eu_reserve_buffers(&ticket, &res);
+	if (ret)
+		goto fail_unpin;
 
 	/* Initialize a page flip struct */
 	*s = (struct nouveau_page_flip_state)
@@ -576,10 +578,11 @@
 	/* Emit a page flip */
 	if (nv_device(drm->device)->card_type >= NV_50) {
 		ret = nv50_display_flip_next(crtc, fb, chan, 0);
-		if (ret) {
-			mutex_unlock(&chan->cli->mutex);
+		if (ret)
 			goto fail_unreserve;
-		}
+	} else {
+		struct nv04_display *dispnv04 = nv04_display(dev);
+		nouveau_bo_ref(new_bo, &dispnv04->image[nouveau_crtc(crtc)->index]);
 	}
 
 	ret = nouveau_page_flip_emit(chan, old_bo, new_bo, s, &fence);
@@ -590,22 +593,18 @@
 	/* Update the crtc struct and cleanup */
 	crtc->fb = fb;
 
-	if (old_bo != new_bo) {
-		ttm_eu_fence_buffer_objects(&ticket, &res, fence);
+	ttm_eu_fence_buffer_objects(&ticket, &res, fence);
+	if (old_bo != new_bo)
 		nouveau_bo_unpin(old_bo);
-	} else {
-		nouveau_bo_fence(new_bo, fence);
-		ttm_bo_unreserve(&new_bo->bo);
-	}
 	nouveau_fence_unref(&fence);
 	return 0;
 
 fail_unreserve:
-	if (old_bo != new_bo) {
-		ttm_eu_backoff_reservation(&ticket, &res);
+	ttm_eu_backoff_reservation(&ticket, &res);
+fail_unpin:
+	mutex_unlock(&chan->cli->mutex);
+	if (old_bo != new_bo)
 		nouveau_bo_unpin(new_bo);
-	} else
-		ttm_bo_unreserve(&new_bo->bo);
 fail_free:
 	kfree(s);
 	return ret;
@@ -681,13 +680,6 @@
 }
 
 int
-nouveau_display_dumb_destroy(struct drm_file *file_priv, struct drm_device *dev,
-			     uint32_t handle)
-{
-	return drm_gem_handle_delete(file_priv, handle);
-}
-
-int
 nouveau_display_dumb_map_offset(struct drm_file *file_priv,
 				struct drm_device *dev,
 				uint32_t handle, uint64_t *poffset)
@@ -697,7 +689,7 @@
 	gem = drm_gem_object_lookup(dev, file_priv, handle);
 	if (gem) {
 		struct nouveau_bo *bo = gem->driver_private;
-		*poffset = bo->bo.addr_space_offset;
+		*poffset = drm_vma_node_offset_addr(&bo->bo.vma_node);
 		drm_gem_object_unreference_unlocked(gem);
 		return 0;
 	}
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.h b/drivers/gpu/drm/nouveau/nouveau_display.h
index 1ea3e47..025c66f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.h
+++ b/drivers/gpu/drm/nouveau/nouveau_display.h
@@ -57,10 +57,12 @@
 int  nouveau_display_init(struct drm_device *dev);
 void nouveau_display_fini(struct drm_device *dev);
 int  nouveau_display_suspend(struct drm_device *dev);
+void nouveau_display_repin(struct drm_device *dev);
 void nouveau_display_resume(struct drm_device *dev);
 
 int  nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
-			    struct drm_pending_vblank_event *event);
+			    struct drm_pending_vblank_event *event,
+			    uint32_t page_flip_flags);
 int  nouveau_finish_page_flip(struct nouveau_channel *,
 			      struct nouveau_page_flip_state *);
 
@@ -68,11 +70,10 @@
 				 struct drm_mode_create_dumb *args);
 int  nouveau_display_dumb_map_offset(struct drm_file *, struct drm_device *,
 				     u32 handle, u64 *offset);
-int  nouveau_display_dumb_destroy(struct drm_file *, struct drm_device *,
-				  u32 handle);
 
 void nouveau_hdmi_mode_set(struct drm_encoder *, struct drm_display_mode *);
 
+int nouveau_crtc_set_config(struct drm_mode_set *set);
 #ifdef CONFIG_DRM_NOUVEAU_BACKLIGHT
 extern int nouveau_backlight_init(struct drm_device *);
 extern void nouveau_backlight_exit(struct drm_device *);
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 218a4b5..8863644 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -25,7 +25,10 @@
 #include <linux/console.h>
 #include <linux/module.h>
 #include <linux/pci.h>
-
+#include <linux/pm_runtime.h>
+#include <linux/vga_switcheroo.h>
+#include "drmP.h"
+#include "drm_crtc_helper.h"
 #include <core/device.h>
 #include <core/client.h>
 #include <core/gpuobj.h>
@@ -69,6 +72,10 @@
 int nouveau_modeset = -1;
 module_param_named(modeset, nouveau_modeset, int, 0400);
 
+MODULE_PARM_DESC(runpm, "disable (0), force enable (1), optimus only default (-1)");
+int nouveau_runtime_pm = -1;
+module_param_named(runpm, nouveau_runtime_pm, int, 0400);
+
 static struct drm_driver driver;
 
 static int
@@ -192,6 +199,18 @@
 
 		arg0 = NVE0_CHANNEL_IND_ENGINE_GR;
 		arg1 = 1;
+	} else
+	if (device->chipset >= 0xa3 &&
+	    device->chipset != 0xaa &&
+	    device->chipset != 0xac) {
+		ret = nouveau_channel_new(drm, &drm->client, NVDRM_DEVICE,
+					  NVDRM_CHAN + 1, NvDmaFB, NvDmaTT,
+					  &drm->cechan);
+		if (ret)
+			NV_ERROR(drm, "failed to create ce channel, %d\n", ret);
+
+		arg0 = NvDmaFB;
+		arg1 = NvDmaTT;
 	} else {
 		arg0 = NvDmaFB;
 		arg1 = NvDmaTT;
@@ -284,7 +303,30 @@
 	return 0;
 }
 
-static struct lock_class_key drm_client_lock_class_key;
+#define PCI_CLASS_MULTIMEDIA_HD_AUDIO 0x0403
+
+static void
+nouveau_get_hdmi_dev(struct drm_device *dev)
+{
+	struct nouveau_drm *drm = dev->dev_private;
+	struct pci_dev *pdev = dev->pdev;
+
+	/* subfunction one is a hdmi audio device? */
+	drm->hdmi_device = pci_get_bus_and_slot((unsigned int)pdev->bus->number,
+						PCI_DEVFN(PCI_SLOT(pdev->devfn), 1));
+
+	if (!drm->hdmi_device) {
+		DRM_INFO("hdmi device  not found %d %d %d\n", pdev->bus->number, PCI_SLOT(pdev->devfn), 1);
+		return;
+	}
+
+	if ((drm->hdmi_device->class >> 8) != PCI_CLASS_MULTIMEDIA_HD_AUDIO) {
+		DRM_INFO("possible hdmi device  not audio %d\n", drm->hdmi_device->class);
+		pci_dev_put(drm->hdmi_device);
+		drm->hdmi_device = NULL;
+		return;
+	}
+}
 
 static int
 nouveau_drm_load(struct drm_device *dev, unsigned long flags)
@@ -297,7 +339,6 @@
 	ret = nouveau_cli_create(pdev, "DRM", sizeof(*drm), (void**)&drm);
 	if (ret)
 		return ret;
-	lockdep_set_class(&drm->client.mutex, &drm_client_lock_class_key);
 
 	dev->dev_private = drm;
 	drm->dev = dev;
@@ -305,6 +346,8 @@
 	INIT_LIST_HEAD(&drm->clients);
 	spin_lock_init(&drm->tile.lock);
 
+	nouveau_get_hdmi_dev(dev);
+
 	/* make sure AGP controller is in a consistent state before we
 	 * (possibly) execute vbios init tables (see nouveau_agp.h)
 	 */
@@ -379,6 +422,15 @@
 
 	nouveau_accel_init(drm);
 	nouveau_fbcon_init(dev);
+
+	if (nouveau_runtime_pm != 0) {
+		pm_runtime_use_autosuspend(dev->dev);
+		pm_runtime_set_autosuspend_delay(dev->dev, 5000);
+		pm_runtime_set_active(dev->dev);
+		pm_runtime_allow(dev->dev);
+		pm_runtime_mark_last_busy(dev->dev);
+		pm_runtime_put(dev->dev);
+	}
 	return 0;
 
 fail_dispinit:
@@ -400,6 +452,7 @@
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
 
+	pm_runtime_get_sync(dev->dev);
 	nouveau_fbcon_fini(dev);
 	nouveau_accel_fini(drm);
 
@@ -415,6 +468,8 @@
 	nouveau_agp_fini(drm);
 	nouveau_vga_fini(drm);
 
+	if (drm->hdmi_device)
+		pci_dev_put(drm->hdmi_device);
 	nouveau_cli_destroy(&drm->client);
 	return 0;
 }
@@ -441,19 +496,16 @@
 	int ret;
 
 	if (dev->mode_config.num_crtc) {
-		NV_INFO(drm, "suspending fbcon...\n");
-		nouveau_fbcon_set_suspend(dev, 1);
-
-		NV_INFO(drm, "suspending display...\n");
+		NV_SUSPEND(drm, "suspending display...\n");
 		ret = nouveau_display_suspend(dev);
 		if (ret)
 			return ret;
 	}
 
-	NV_INFO(drm, "evicting buffers...\n");
+	NV_SUSPEND(drm, "evicting buffers...\n");
 	ttm_bo_evict_mm(&drm->ttm.bdev, TTM_PL_VRAM);
 
-	NV_INFO(drm, "waiting for kernel channels to go idle...\n");
+	NV_SUSPEND(drm, "waiting for kernel channels to go idle...\n");
 	if (drm->cechan) {
 		ret = nouveau_channel_idle(drm->cechan);
 		if (ret)
@@ -466,7 +518,7 @@
 			return ret;
 	}
 
-	NV_INFO(drm, "suspending client object trees...\n");
+	NV_SUSPEND(drm, "suspending client object trees...\n");
 	if (drm->fence && nouveau_fence(drm)->suspend) {
 		if (!nouveau_fence(drm)->suspend(drm))
 			return -ENOMEM;
@@ -478,7 +530,7 @@
 			goto fail_client;
 	}
 
-	NV_INFO(drm, "suspending kernel object tree...\n");
+	NV_SUSPEND(drm, "suspending kernel object tree...\n");
 	ret = nouveau_client_fini(&drm->client.base, true);
 	if (ret)
 		goto fail_client;
@@ -492,7 +544,7 @@
 	}
 
 	if (dev->mode_config.num_crtc) {
-		NV_INFO(drm, "resuming display...\n");
+		NV_SUSPEND(drm, "resuming display...\n");
 		nouveau_display_resume(dev);
 	}
 	return ret;
@@ -504,9 +556,14 @@
 	struct drm_device *drm_dev = pci_get_drvdata(pdev);
 	int ret;
 
-	if (drm_dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+	if (drm_dev->switch_power_state == DRM_SWITCH_POWER_OFF ||
+	    drm_dev->switch_power_state == DRM_SWITCH_POWER_DYNAMIC_OFF)
 		return 0;
 
+	if (drm_dev->mode_config.num_crtc)
+		nouveau_fbcon_set_suspend(drm_dev, 1);
+
+	nv_suspend_set_printk_level(NV_DBG_INFO);
 	ret = nouveau_do_suspend(drm_dev);
 	if (ret)
 		return ret;
@@ -514,6 +571,7 @@
 	pci_save_state(pdev);
 	pci_disable_device(pdev);
 	pci_set_power_state(pdev, PCI_D3hot);
+	nv_suspend_set_printk_level(NV_DBG_DEBUG);
 
 	return 0;
 }
@@ -524,15 +582,15 @@
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nouveau_cli *cli;
 
-	NV_INFO(drm, "re-enabling device...\n");
+	NV_SUSPEND(drm, "re-enabling device...\n");
 
 	nouveau_agp_reset(drm);
 
-	NV_INFO(drm, "resuming kernel object tree...\n");
+	NV_SUSPEND(drm, "resuming kernel object tree...\n");
 	nouveau_client_init(&drm->client.base);
 	nouveau_agp_init(drm);
 
-	NV_INFO(drm, "resuming client object trees...\n");
+	NV_SUSPEND(drm, "resuming client object trees...\n");
 	if (drm->fence && nouveau_fence(drm)->resume)
 		nouveau_fence(drm)->resume(drm);
 
@@ -544,9 +602,10 @@
 	nouveau_pm_resume(dev);
 
 	if (dev->mode_config.num_crtc) {
-		NV_INFO(drm, "resuming display...\n");
-		nouveau_display_resume(dev);
+		NV_SUSPEND(drm, "resuming display...\n");
+		nouveau_display_repin(dev);
 	}
+
 	return 0;
 }
 
@@ -556,7 +615,8 @@
 	struct drm_device *drm_dev = pci_get_drvdata(pdev);
 	int ret;
 
-	if (drm_dev->switch_power_state == DRM_SWITCH_POWER_OFF)
+	if (drm_dev->switch_power_state == DRM_SWITCH_POWER_OFF ||
+	    drm_dev->switch_power_state == DRM_SWITCH_POWER_DYNAMIC_OFF)
 		return 0;
 
 	pci_set_power_state(pdev, PCI_D0);
@@ -566,23 +626,54 @@
 		return ret;
 	pci_set_master(pdev);
 
-	return nouveau_do_resume(drm_dev);
+	nv_suspend_set_printk_level(NV_DBG_INFO);
+	ret = nouveau_do_resume(drm_dev);
+	if (ret) {
+		nv_suspend_set_printk_level(NV_DBG_DEBUG);
+		return ret;
+	}
+	if (drm_dev->mode_config.num_crtc)
+		nouveau_fbcon_set_suspend(drm_dev, 0);
+
+	nouveau_fbcon_zfill_all(drm_dev);
+	nouveau_display_resume(drm_dev);
+	nv_suspend_set_printk_level(NV_DBG_DEBUG);
+	return 0;
 }
 
 static int nouveau_pmops_freeze(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct drm_device *drm_dev = pci_get_drvdata(pdev);
+	int ret;
 
-	return nouveau_do_suspend(drm_dev);
+	nv_suspend_set_printk_level(NV_DBG_INFO);
+	if (drm_dev->mode_config.num_crtc)
+		nouveau_fbcon_set_suspend(drm_dev, 1);
+
+	ret = nouveau_do_suspend(drm_dev);
+	nv_suspend_set_printk_level(NV_DBG_DEBUG);
+	return ret;
 }
 
 static int nouveau_pmops_thaw(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct drm_device *drm_dev = pci_get_drvdata(pdev);
+	int ret;
 
-	return nouveau_do_resume(drm_dev);
+	nv_suspend_set_printk_level(NV_DBG_INFO);
+	ret = nouveau_do_resume(drm_dev);
+	if (ret) {
+		nv_suspend_set_printk_level(NV_DBG_DEBUG);
+		return ret;
+	}
+	if (drm_dev->mode_config.num_crtc)
+		nouveau_fbcon_set_suspend(drm_dev, 0);
+	nouveau_fbcon_zfill_all(drm_dev);
+	nouveau_display_resume(drm_dev);
+	nv_suspend_set_printk_level(NV_DBG_DEBUG);
+	return 0;
 }
 
 
@@ -595,19 +686,24 @@
 	char name[32], tmpname[TASK_COMM_LEN];
 	int ret;
 
+	/* need to bring up power immediately if opening device */
+	ret = pm_runtime_get_sync(dev->dev);
+	if (ret < 0)
+		return ret;
+
 	get_task_comm(tmpname, current);
 	snprintf(name, sizeof(name), "%s[%d]", tmpname, pid_nr(fpriv->pid));
 
 	ret = nouveau_cli_create(pdev, name, sizeof(*cli), (void **)&cli);
 	if (ret)
-		return ret;
+		goto out_suspend;
 
 	if (nv_device(drm->device)->card_type >= NV_50) {
 		ret = nouveau_vm_new(nv_device(drm->device), 0, (1ULL << 40),
 				     0x1000, &cli->base.vm);
 		if (ret) {
 			nouveau_cli_destroy(cli);
-			return ret;
+			goto out_suspend;
 		}
 	}
 
@@ -616,7 +712,12 @@
 	mutex_lock(&drm->client.mutex);
 	list_add(&cli->head, &drm->clients);
 	mutex_unlock(&drm->client.mutex);
-	return 0;
+
+out_suspend:
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+
+	return ret;
 }
 
 static void
@@ -625,12 +726,15 @@
 	struct nouveau_cli *cli = nouveau_cli(fpriv);
 	struct nouveau_drm *drm = nouveau_drm(dev);
 
+	pm_runtime_get_sync(dev->dev);
+
 	if (cli->abi16)
 		nouveau_abi16_fini(cli->abi16);
 
 	mutex_lock(&drm->client.mutex);
 	list_del(&cli->head);
 	mutex_unlock(&drm->client.mutex);
+
 }
 
 static void
@@ -638,33 +742,52 @@
 {
 	struct nouveau_cli *cli = nouveau_cli(fpriv);
 	nouveau_cli_destroy(cli);
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
 }
 
-static struct drm_ioctl_desc
+static const struct drm_ioctl_desc
 nouveau_ioctls[] = {
-	DRM_IOCTL_DEF_DRV(NOUVEAU_GETPARAM, nouveau_abi16_ioctl_getparam, DRM_UNLOCKED|DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GETPARAM, nouveau_abi16_ioctl_getparam, DRM_UNLOCKED|DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(NOUVEAU_SETPARAM, nouveau_abi16_ioctl_setparam, DRM_UNLOCKED|DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF_DRV(NOUVEAU_CHANNEL_ALLOC, nouveau_abi16_ioctl_channel_alloc, DRM_UNLOCKED|DRM_AUTH),
-	DRM_IOCTL_DEF_DRV(NOUVEAU_CHANNEL_FREE, nouveau_abi16_ioctl_channel_free, DRM_UNLOCKED|DRM_AUTH),
-	DRM_IOCTL_DEF_DRV(NOUVEAU_GROBJ_ALLOC, nouveau_abi16_ioctl_grobj_alloc, DRM_UNLOCKED|DRM_AUTH),
-	DRM_IOCTL_DEF_DRV(NOUVEAU_NOTIFIEROBJ_ALLOC, nouveau_abi16_ioctl_notifierobj_alloc, DRM_UNLOCKED|DRM_AUTH),
-	DRM_IOCTL_DEF_DRV(NOUVEAU_GPUOBJ_FREE, nouveau_abi16_ioctl_gpuobj_free, DRM_UNLOCKED|DRM_AUTH),
-	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_NEW, nouveau_gem_ioctl_new, DRM_UNLOCKED|DRM_AUTH),
-	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_PUSHBUF, nouveau_gem_ioctl_pushbuf, DRM_UNLOCKED|DRM_AUTH),
-	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_PREP, nouveau_gem_ioctl_cpu_prep, DRM_UNLOCKED|DRM_AUTH),
-	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_FINI, nouveau_gem_ioctl_cpu_fini, DRM_UNLOCKED|DRM_AUTH),
-	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_INFO, nouveau_gem_ioctl_info, DRM_UNLOCKED|DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_CHANNEL_ALLOC, nouveau_abi16_ioctl_channel_alloc, DRM_UNLOCKED|DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_CHANNEL_FREE, nouveau_abi16_ioctl_channel_free, DRM_UNLOCKED|DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GROBJ_ALLOC, nouveau_abi16_ioctl_grobj_alloc, DRM_UNLOCKED|DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_NOTIFIEROBJ_ALLOC, nouveau_abi16_ioctl_notifierobj_alloc, DRM_UNLOCKED|DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GPUOBJ_FREE, nouveau_abi16_ioctl_gpuobj_free, DRM_UNLOCKED|DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_NEW, nouveau_gem_ioctl_new, DRM_UNLOCKED|DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_PUSHBUF, nouveau_gem_ioctl_pushbuf, DRM_UNLOCKED|DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_PREP, nouveau_gem_ioctl_cpu_prep, DRM_UNLOCKED|DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_FINI, nouveau_gem_ioctl_cpu_fini, DRM_UNLOCKED|DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_INFO, nouveau_gem_ioctl_info, DRM_UNLOCKED|DRM_AUTH|DRM_RENDER_ALLOW),
 };
 
+long nouveau_drm_ioctl(struct file *filp,
+		       unsigned int cmd, unsigned long arg)
+{
+	struct drm_file *file_priv = filp->private_data;
+	struct drm_device *dev;
+	long ret;
+	dev = file_priv->minor->dev;
+
+	ret = pm_runtime_get_sync(dev->dev);
+	if (ret < 0)
+		return ret;
+
+	ret = drm_ioctl(filp, cmd, arg);
+
+	pm_runtime_mark_last_busy(dev->dev);
+	pm_runtime_put_autosuspend(dev->dev);
+	return ret;
+}
 static const struct file_operations
 nouveau_driver_fops = {
 	.owner = THIS_MODULE,
 	.open = drm_open,
 	.release = drm_release,
-	.unlocked_ioctl = drm_ioctl,
+	.unlocked_ioctl = nouveau_drm_ioctl,
 	.mmap = nouveau_ttm_mmap,
 	.poll = drm_poll,
-	.fasync = drm_fasync,
 	.read = drm_read,
 #if defined(CONFIG_COMPAT)
 	.compat_ioctl = nouveau_compat_ioctl,
@@ -675,8 +798,8 @@
 static struct drm_driver
 driver = {
 	.driver_features =
-		DRIVER_USE_AGP | DRIVER_PCI_DMA | DRIVER_SG |
-		DRIVER_GEM | DRIVER_MODESET | DRIVER_PRIME,
+		DRIVER_USE_AGP |
+		DRIVER_GEM | DRIVER_MODESET | DRIVER_PRIME | DRIVER_RENDER,
 
 	.load = nouveau_drm_load,
 	.unload = nouveau_drm_unload,
@@ -695,6 +818,7 @@
 	.disable_vblank = nouveau_drm_vblank_disable,
 
 	.ioctls = nouveau_ioctls,
+	.num_ioctls = ARRAY_SIZE(nouveau_ioctls),
 	.fops = &nouveau_driver_fops,
 
 	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
@@ -715,7 +839,7 @@
 
 	.dumb_create = nouveau_display_dumb_create,
 	.dumb_map_offset = nouveau_display_dumb_map_offset,
-	.dumb_destroy = nouveau_display_dumb_destroy,
+	.dumb_destroy = drm_gem_dumb_destroy,
 
 	.name = DRIVER_NAME,
 	.desc = DRIVER_DESC,
@@ -744,6 +868,90 @@
 	{}
 };
 
+static int nouveau_pmops_runtime_suspend(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct drm_device *drm_dev = pci_get_drvdata(pdev);
+	int ret;
+
+	if (nouveau_runtime_pm == 0)
+		return -EINVAL;
+
+	drm_kms_helper_poll_disable(drm_dev);
+	vga_switcheroo_set_dynamic_switch(pdev, VGA_SWITCHEROO_OFF);
+	nouveau_switcheroo_optimus_dsm();
+	ret = nouveau_do_suspend(drm_dev);
+	pci_save_state(pdev);
+	pci_disable_device(pdev);
+	pci_set_power_state(pdev, PCI_D3cold);
+	drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
+	return ret;
+}
+
+static int nouveau_pmops_runtime_resume(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct drm_device *drm_dev = pci_get_drvdata(pdev);
+	struct nouveau_device *device = nouveau_dev(drm_dev);
+	int ret;
+
+	if (nouveau_runtime_pm == 0)
+		return -EINVAL;
+
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+	ret = pci_enable_device(pdev);
+	if (ret)
+		return ret;
+	pci_set_master(pdev);
+
+	ret = nouveau_do_resume(drm_dev);
+	nouveau_display_resume(drm_dev);
+	drm_kms_helper_poll_enable(drm_dev);
+	/* do magic */
+	nv_mask(device, 0x88488, (1 << 25), (1 << 25));
+	vga_switcheroo_set_dynamic_switch(pdev, VGA_SWITCHEROO_ON);
+	drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;
+	return ret;
+}
+
+static int nouveau_pmops_runtime_idle(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct drm_device *drm_dev = pci_get_drvdata(pdev);
+	struct nouveau_drm *drm = nouveau_drm(drm_dev);
+	struct drm_crtc *crtc;
+
+	if (nouveau_runtime_pm == 0)
+		return -EBUSY;
+
+	/* are we optimus enabled? */
+	if (nouveau_runtime_pm == -1 && !nouveau_is_optimus() && !nouveau_is_v1_dsm()) {
+		DRM_DEBUG_DRIVER("failing to power off - not optimus\n");
+		return -EBUSY;
+	}
+
+	/* if we have a hdmi audio device - make sure it has a driver loaded */
+	if (drm->hdmi_device) {
+		if (!drm->hdmi_device->driver) {
+			DRM_DEBUG_DRIVER("failing to power off - no HDMI audio driver loaded\n");
+			pm_runtime_mark_last_busy(dev);
+			return -EBUSY;
+		}
+	}
+
+	list_for_each_entry(crtc, &drm->dev->mode_config.crtc_list, head) {
+		if (crtc->enabled) {
+			DRM_DEBUG_DRIVER("failing to power off - crtc active\n");
+			return -EBUSY;
+		}
+	}
+	pm_runtime_mark_last_busy(dev);
+	pm_runtime_autosuspend(dev);
+	/* we don't want the main rpm_idle to call suspend - we want to autosuspend */
+	return 1;
+}
+
 static const struct dev_pm_ops nouveau_pm_ops = {
 	.suspend = nouveau_pmops_suspend,
 	.resume = nouveau_pmops_resume,
@@ -751,6 +959,9 @@
 	.thaw = nouveau_pmops_thaw,
 	.poweroff = nouveau_pmops_freeze,
 	.restore = nouveau_pmops_resume,
+	.runtime_suspend = nouveau_pmops_runtime_suspend,
+	.runtime_resume = nouveau_pmops_runtime_resume,
+	.runtime_idle = nouveau_pmops_runtime_idle,
 };
 
 static struct pci_driver
@@ -765,8 +976,6 @@
 static int __init
 nouveau_drm_init(void)
 {
-	driver.num_ioctls = ARRAY_SIZE(nouveau_ioctls);
-
 	if (nouveau_modeset == -1) {
 #ifdef CONFIG_VGA_CONSOLE
 		if (vgacon_text_force())
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.h b/drivers/gpu/drm/nouveau/nouveau_drm.h
index 41ff7e0..994fd6e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.h
@@ -70,6 +70,8 @@
 	return fpriv ? fpriv->driver_priv : NULL;
 }
 
+extern int nouveau_runtime_pm;
+
 struct nouveau_drm {
 	struct nouveau_cli client;
 	struct drm_device *dev;
@@ -129,6 +131,12 @@
 
 	/* power management */
 	struct nouveau_pm *pm;
+
+	/* display power reference */
+	bool have_disp_power_ref;
+
+	struct dev_pm_domain vga_pm_domain;
+	struct pci_dev *hdmi_device;
 };
 
 static inline struct nouveau_drm *
@@ -146,6 +154,7 @@
 int nouveau_pmops_suspend(struct device *);
 int nouveau_pmops_resume(struct device *);
 
+#define NV_SUSPEND(cli, fmt, args...) nv_suspend((cli), fmt, ##args)
 #define NV_FATAL(cli, fmt, args...) nv_fatal((cli), fmt, ##args)
 #define NV_ERROR(cli, fmt, args...) nv_error((cli), fmt, ##args)
 #define NV_WARN(cli, fmt, args...) nv_warn((cli), fmt, ##args)
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index 9352010..8f6d63d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -385,6 +385,7 @@
 	mutex_unlock(&dev->struct_mutex);
 	if (chan)
 		nouveau_bo_vma_del(nvbo, &fbcon->nouveau_fb.vma);
+	nouveau_bo_unmap(nvbo);
 out_unpin:
 	nouveau_bo_unpin(nvbo);
 out_unref:
@@ -397,7 +398,8 @@
 nouveau_fbcon_output_poll_changed(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	drm_fb_helper_hotplug_event(&drm->fbcon->helper);
+	if (drm->fbcon)
+		drm_fb_helper_hotplug_event(&drm->fbcon->helper);
 }
 
 static int
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index 1680d91..be31499 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -143,7 +143,7 @@
 	int ret;
 
 	fence->channel  = chan;
-	fence->timeout  = jiffies + (3 * DRM_HZ);
+	fence->timeout  = jiffies + (15 * DRM_HZ);
 	fence->sequence = ++fctx->sequence;
 
 	ret = fctx->emit(fence);
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index e72d09c..487242f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -50,12 +50,6 @@
 		return;
 	nvbo->gem = NULL;
 
-	/* Lockdep hates you for doing reserve with gem object lock held */
-	if (WARN_ON_ONCE(nvbo->pin_refcnt)) {
-		nvbo->pin_refcnt = 1;
-		nouveau_bo_unpin(nvbo);
-	}
-
 	if (gem->import_attach)
 		drm_prime_gem_destroy(gem, nvbo->bo.sg);
 
@@ -226,7 +220,7 @@
 	}
 
 	rep->size = nvbo->bo.mem.num_pages << PAGE_SHIFT;
-	rep->map_handle = nvbo->bo.addr_space_offset;
+	rep->map_handle = drm_vma_node_offset_addr(&nvbo->bo.vma_node);
 	rep->tile_mode = nvbo->tile_mode;
 	rep->tile_flags = nvbo->tile_flags;
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/nouveau_ioc32.c b/drivers/gpu/drm/nouveau/nouveau_ioc32.c
index 08214bc..c1a7e5a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ioc32.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ioc32.c
@@ -63,7 +63,7 @@
 	if (fn != NULL)
 		ret = (*fn)(filp, cmd, arg);
 	else
-		ret = drm_ioctl(filp, cmd, arg);
+		ret = nouveau_drm_ioctl(filp, cmd, arg);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_ioctl.h b/drivers/gpu/drm/nouveau/nouveau_ioctl.h
index ef2b290..3b9f2e5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ioctl.h
+++ b/drivers/gpu/drm/nouveau/nouveau_ioctl.h
@@ -2,5 +2,6 @@
 #define __NOUVEAU_IOCTL_H__
 
 long nouveau_compat_ioctl(struct file *, unsigned int cmd, unsigned long arg);
+long nouveau_drm_ioctl(struct file *, unsigned int cmd, unsigned long arg);
 
 #endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_vga.c b/drivers/gpu/drm/nouveau/nouveau_vga.c
index 25d3495..81638d7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vga.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vga.c
@@ -32,6 +32,9 @@
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
 
+	if ((nouveau_is_optimus() || nouveau_is_v1_dsm()) && state == VGA_SWITCHEROO_OFF)
+		return;
+
 	if (state == VGA_SWITCHEROO_ON) {
 		printk(KERN_ERR "VGA switcheroo: switched nouveau on\n");
 		dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
@@ -78,8 +81,17 @@
 nouveau_vga_init(struct nouveau_drm *drm)
 {
 	struct drm_device *dev = drm->dev;
+	bool runtime = false;
 	vga_client_register(dev->pdev, dev, NULL, nouveau_vga_set_decode);
-	vga_switcheroo_register_client(dev->pdev, &nouveau_switcheroo_ops);
+
+	if (nouveau_runtime_pm == 1)
+		runtime = true;
+	if ((nouveau_runtime_pm == -1) && (nouveau_is_optimus() || nouveau_is_v1_dsm()))
+		runtime = true;
+	vga_switcheroo_register_client(dev->pdev, &nouveau_switcheroo_ops, runtime);
+
+	if (runtime && nouveau_is_v1_dsm() && !nouveau_is_optimus())
+		vga_switcheroo_init_domain_pm_ops(drm->dev->dev, &drm->vga_pm_domain);
 }
 
 void
diff --git a/drivers/gpu/drm/nouveau/nv17_fence.c b/drivers/gpu/drm/nouveau/nv17_fence.c
index 8e47a9b..22aa996 100644
--- a/drivers/gpu/drm/nouveau/nv17_fence.c
+++ b/drivers/gpu/drm/nouveau/nv17_fence.c
@@ -76,7 +76,7 @@
 	struct ttm_mem_reg *mem = &priv->bo->bo.mem;
 	struct nouveau_object *object;
 	u32 start = mem->start * PAGE_SIZE;
-	u32 limit = mem->start + mem->size - 1;
+	u32 limit = start + mem->size - 1;
 	int ret = 0;
 
 	fctx = chan->fence = kzalloc(sizeof(*fctx), GFP_KERNEL);
diff --git a/drivers/gpu/drm/nouveau/nv40_pm.c b/drivers/gpu/drm/nouveau/nv40_pm.c
index 3af5bcd..625f80d 100644
--- a/drivers/gpu/drm/nouveau/nv40_pm.c
+++ b/drivers/gpu/drm/nouveau/nv40_pm.c
@@ -131,7 +131,7 @@
 	if (clk < pll->vco1.max_freq)
 		pll->vco2.max_freq = 0;
 
-	pclk->pll_calc(pclk, pll, clk, &coef);
+	ret = pclk->pll_calc(pclk, pll, clk, &coef);
 	if (ret == 0)
 		return -ERANGE;
 
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index 54dc635..9d2092a 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -355,6 +355,7 @@
 
 struct nv50_head {
 	struct nouveau_crtc base;
+	struct nouveau_bo *image;
 	struct nv50_curs curs;
 	struct nv50_sync sync;
 	struct nv50_ovly ovly;
@@ -517,9 +518,10 @@
 {
 	struct nouveau_framebuffer *nv_fb = nouveau_framebuffer(fb);
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	struct nv50_head *head = nv50_head(crtc);
 	struct nv50_sync *sync = nv50_sync(crtc);
-	int head = nv_crtc->index, ret;
 	u32 *push;
+	int ret;
 
 	swap_interval <<= 4;
 	if (swap_interval == 0)
@@ -537,7 +539,7 @@
 			return ret;
 
 		BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 2);
-		OUT_RING  (chan, NvEvoSema0 + head);
+		OUT_RING  (chan, NvEvoSema0 + nv_crtc->index);
 		OUT_RING  (chan, sync->addr ^ 0x10);
 		BEGIN_NV04(chan, 0, NV11_SUBCHAN_SEMAPHORE_RELEASE, 1);
 		OUT_RING  (chan, sync->data + 1);
@@ -546,7 +548,7 @@
 		OUT_RING  (chan, sync->data);
 	} else
 	if (chan && nv_mclass(chan->object) < NVC0_CHANNEL_IND_CLASS) {
-		u64 addr = nv84_fence_crtc(chan, head) + sync->addr;
+		u64 addr = nv84_fence_crtc(chan, nv_crtc->index) + sync->addr;
 		ret = RING_SPACE(chan, 12);
 		if (ret)
 			return ret;
@@ -565,7 +567,7 @@
 		OUT_RING  (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
 	} else
 	if (chan) {
-		u64 addr = nv84_fence_crtc(chan, head) + sync->addr;
+		u64 addr = nv84_fence_crtc(chan, nv_crtc->index) + sync->addr;
 		ret = RING_SPACE(chan, 10);
 		if (ret)
 			return ret;
@@ -630,6 +632,8 @@
 	evo_mthd(push, 0x0080, 1);
 	evo_data(push, 0x00000000);
 	evo_kick(push, sync);
+
+	nouveau_bo_ref(nv_fb->nvbo, &head->image);
 	return 0;
 }
 
@@ -1038,18 +1042,17 @@
 nv50_crtc_swap_fbs(struct drm_crtc *crtc, struct drm_framebuffer *old_fb)
 {
 	struct nouveau_framebuffer *nvfb = nouveau_framebuffer(crtc->fb);
+	struct nv50_head *head = nv50_head(crtc);
 	int ret;
 
 	ret = nouveau_bo_pin(nvfb->nvbo, TTM_PL_FLAG_VRAM);
-	if (ret)
-		return ret;
-
-	if (old_fb) {
-		nvfb = nouveau_framebuffer(old_fb);
-		nouveau_bo_unpin(nvfb->nvbo);
+	if (ret == 0) {
+		if (head->image)
+			nouveau_bo_unpin(head->image);
+		nouveau_bo_ref(nvfb->nvbo, &head->image);
 	}
 
-	return 0;
+	return ret;
 }
 
 static int
@@ -1198,6 +1201,15 @@
 	}
 }
 
+static void
+nv50_crtc_disable(struct drm_crtc *crtc)
+{
+	struct nv50_head *head = nv50_head(crtc);
+	if (head->image)
+		nouveau_bo_unpin(head->image);
+	nouveau_bo_ref(NULL, &head->image);
+}
+
 static int
 nv50_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
 		     uint32_t handle, uint32_t width, uint32_t height)
@@ -1271,18 +1283,29 @@
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
 	struct nv50_disp *disp = nv50_disp(crtc->dev);
 	struct nv50_head *head = nv50_head(crtc);
+
 	nv50_dmac_destroy(disp->core, &head->ovly.base);
 	nv50_pioc_destroy(disp->core, &head->oimm.base);
 	nv50_dmac_destroy(disp->core, &head->sync.base);
 	nv50_pioc_destroy(disp->core, &head->curs.base);
+
+	/*XXX: this shouldn't be necessary, but the core doesn't call
+	 *     disconnect() during the cleanup paths
+	 */
+	if (head->image)
+		nouveau_bo_unpin(head->image);
+	nouveau_bo_ref(NULL, &head->image);
+
 	nouveau_bo_unmap(nv_crtc->cursor.nvbo);
 	if (nv_crtc->cursor.nvbo)
 		nouveau_bo_unpin(nv_crtc->cursor.nvbo);
 	nouveau_bo_ref(NULL, &nv_crtc->cursor.nvbo);
+
 	nouveau_bo_unmap(nv_crtc->lut.nvbo);
 	if (nv_crtc->lut.nvbo)
 		nouveau_bo_unpin(nv_crtc->lut.nvbo);
 	nouveau_bo_ref(NULL, &nv_crtc->lut.nvbo);
+
 	drm_crtc_cleanup(crtc);
 	kfree(crtc);
 }
@@ -1296,13 +1319,14 @@
 	.mode_set_base = nv50_crtc_mode_set_base,
 	.mode_set_base_atomic = nv50_crtc_mode_set_base_atomic,
 	.load_lut = nv50_crtc_lut_load,
+	.disable = nv50_crtc_disable,
 };
 
 static const struct drm_crtc_funcs nv50_crtc_func = {
 	.cursor_set = nv50_crtc_cursor_set,
 	.cursor_move = nv50_crtc_cursor_move,
 	.gamma_set = nv50_crtc_gamma_set,
-	.set_config = drm_crtc_helper_set_config,
+	.set_config = nouveau_crtc_set_config,
 	.destroy = nv50_crtc_destroy,
 	.page_flip = nouveau_crtc_page_flip,
 };
diff --git a/drivers/gpu/drm/nouveau/nv50_fence.c b/drivers/gpu/drm/nouveau/nv50_fence.c
index f9701e5..0ee3638 100644
--- a/drivers/gpu/drm/nouveau/nv50_fence.c
+++ b/drivers/gpu/drm/nouveau/nv50_fence.c
@@ -39,6 +39,8 @@
 	struct nv10_fence_chan *fctx;
 	struct ttm_mem_reg *mem = &priv->bo->bo.mem;
 	struct nouveau_object *object;
+	u32 start = mem->start * PAGE_SIZE;
+	u32 limit = start + mem->size - 1;
 	int ret, i;
 
 	fctx = chan->fence = kzalloc(sizeof(*fctx), GFP_KERNEL);
@@ -51,26 +53,28 @@
 	fctx->base.sync = nv17_fence_sync;
 
 	ret = nouveau_object_new(nv_object(chan->cli), chan->handle,
-				 NvSema, 0x0002,
+				 NvSema, 0x003d,
 				 &(struct nv_dma_class) {
 					.flags = NV_DMA_TARGET_VRAM |
 						 NV_DMA_ACCESS_RDWR,
-					.start = mem->start * PAGE_SIZE,
-					.limit = mem->size - 1,
+					.start = start,
+					.limit = limit,
 				 }, sizeof(struct nv_dma_class),
 				 &object);
 
 	/* dma objects for display sync channel semaphore blocks */
 	for (i = 0; !ret && i < dev->mode_config.num_crtc; i++) {
 		struct nouveau_bo *bo = nv50_display_crtc_sema(dev, i);
+		u32 start = bo->bo.mem.start * PAGE_SIZE;
+		u32 limit = start + bo->bo.mem.size - 1;
 
 		ret = nouveau_object_new(nv_object(chan->cli), chan->handle,
 					 NvEvoSema0 + i, 0x003d,
 					 &(struct nv_dma_class) {
 						.flags = NV_DMA_TARGET_VRAM |
 							 NV_DMA_ACCESS_RDWR,
-						.start = bo->bo.offset,
-						.limit = bo->bo.offset + 0xfff,
+						.start = start,
+						.limit = limit,
 					 }, sizeof(struct nv_dma_class),
 					 &object);
 	}
diff --git a/drivers/gpu/drm/omapdrm/Makefile b/drivers/gpu/drm/omapdrm/Makefile
index d85e058..778372b 100644
--- a/drivers/gpu/drm/omapdrm/Makefile
+++ b/drivers/gpu/drm/omapdrm/Makefile
@@ -18,7 +18,4 @@
 	omap_dmm_tiler.o \
 	tcm-sita.o
 
-# temporary:
-omapdrm-y += omap_gem_helpers.o
-
 obj-$(CONFIG_DRM_OMAP)	+= omapdrm.o
diff --git a/drivers/gpu/drm/omapdrm/omap_crtc.c b/drivers/gpu/drm/omapdrm/omap_crtc.c
index 11a5263..0fd2eb1 100644
--- a/drivers/gpu/drm/omapdrm/omap_crtc.c
+++ b/drivers/gpu/drm/omapdrm/omap_crtc.c
@@ -331,7 +331,8 @@
 
 static int omap_crtc_page_flip_locked(struct drm_crtc *crtc,
 		 struct drm_framebuffer *fb,
-		 struct drm_pending_vblank_event *event)
+		 struct drm_pending_vblank_event *event,
+		 uint32_t page_flip_flags)
 {
 	struct drm_device *dev = crtc->dev;
 	struct omap_crtc *omap_crtc = to_omap_crtc(crtc);
diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
index 9b794c9..acf6678 100644
--- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
+++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c
@@ -871,7 +871,7 @@
 		goto error;
 
 	for (lut_idx = 0; lut_idx < omap_dmm->num_lut; lut_idx++) {
-		memset(map, 0, sizeof(h_adj * sizeof(*map)));
+		memset(map, 0, h_adj * sizeof(*map));
 		memset(global_map, ' ', (w_adj + 1) * h_adj);
 
 		for (i = 0; i < omap_dmm->container_height; i++) {
diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c
index a3004f1..2603d90 100644
--- a/drivers/gpu/drm/omapdrm/omap_drv.c
+++ b/drivers/gpu/drm/omapdrm/omap_drv.c
@@ -419,7 +419,7 @@
 	return ret;
 }
 
-static struct drm_ioctl_desc ioctls[DRM_COMMAND_END - DRM_COMMAND_BASE] = {
+static const struct drm_ioctl_desc ioctls[DRM_COMMAND_END - DRM_COMMAND_BASE] = {
 	DRM_IOCTL_DEF_DRV(OMAP_GET_PARAM, ioctl_get_param, DRM_UNLOCKED|DRM_AUTH),
 	DRM_IOCTL_DEF_DRV(OMAP_SET_PARAM, ioctl_set_param, DRM_UNLOCKED|DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(OMAP_GEM_NEW, ioctl_gem_new, DRM_UNLOCKED|DRM_AUTH),
@@ -524,12 +524,6 @@
 	return 0;
 }
 
-static int dev_firstopen(struct drm_device *dev)
-{
-	DBG("firstopen: dev=%p", dev);
-	return 0;
-}
-
 /**
  * lastclose - clean up after all DRM clients have exited
  * @dev: DRM device
@@ -598,7 +592,6 @@
 		.release = drm_release,
 		.mmap = omap_gem_mmap,
 		.poll = drm_poll,
-		.fasync = drm_fasync,
 		.read = drm_read,
 		.llseek = noop_llseek,
 };
@@ -609,7 +602,6 @@
 		.load = dev_load,
 		.unload = dev_unload,
 		.open = dev_open,
-		.firstopen = dev_firstopen,
 		.lastclose = dev_lastclose,
 		.preclose = dev_preclose,
 		.postclose = dev_postclose,
@@ -633,7 +625,7 @@
 		.gem_vm_ops = &omap_gem_vm_ops,
 		.dumb_create = omap_gem_dumb_create,
 		.dumb_map_offset = omap_gem_dumb_map_offset,
-		.dumb_destroy = omap_gem_dumb_destroy,
+		.dumb_destroy = drm_gem_dumb_destroy,
 		.ioctls = ioctls,
 		.num_ioctls = DRM_OMAP_NUM_IOCTLS,
 		.fops = &omapdriver_fops,
diff --git a/drivers/gpu/drm/omapdrm/omap_drv.h b/drivers/gpu/drm/omapdrm/omap_drv.h
index 14f17da..30b95b7 100644
--- a/drivers/gpu/drm/omapdrm/omap_drv.h
+++ b/drivers/gpu/drm/omapdrm/omap_drv.h
@@ -203,9 +203,8 @@
 struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev,
 		struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos);
 struct drm_gem_object *omap_framebuffer_bo(struct drm_framebuffer *fb, int p);
-int omap_framebuffer_replace(struct drm_framebuffer *a,
-		struct drm_framebuffer *b, void *arg,
-		void (*unpin)(void *arg, struct drm_gem_object *bo));
+int omap_framebuffer_pin(struct drm_framebuffer *fb);
+int omap_framebuffer_unpin(struct drm_framebuffer *fb);
 void omap_framebuffer_update_scanout(struct drm_framebuffer *fb,
 		struct omap_drm_window *win, struct omap_overlay_info *info);
 struct drm_connector *omap_framebuffer_get_next_connector(
@@ -225,8 +224,6 @@
 void *omap_gem_vaddr(struct drm_gem_object *obj);
 int omap_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev,
 		uint32_t handle, uint64_t *offset);
-int omap_gem_dumb_destroy(struct drm_file *file, struct drm_device *dev,
-		uint32_t handle);
 int omap_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
 		struct drm_mode_create_dumb *args);
 int omap_gem_mmap(struct file *filp, struct vm_area_struct *vma);
diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c
index 8031402..f2b8f06 100644
--- a/drivers/gpu/drm/omapdrm/omap_fb.c
+++ b/drivers/gpu/drm/omapdrm/omap_fb.c
@@ -237,58 +237,52 @@
 	}
 }
 
-/* Call for unpin 'a' (if not NULL), and pin 'b' (if not NULL).  Although
- * buffers to unpin are just pushed to the unpin fifo so that the
- * caller can defer unpin until vblank.
- *
- * Note if this fails (ie. something went very wrong!), all buffers are
- * unpinned, and the caller disables the overlay.  We could have tried
- * to revert back to the previous set of pinned buffers but if things are
- * hosed there is no guarantee that would succeed.
- */
-int omap_framebuffer_replace(struct drm_framebuffer *a,
-		struct drm_framebuffer *b, void *arg,
-		void (*unpin)(void *arg, struct drm_gem_object *bo))
+/* pin, prepare for scanout: */
+int omap_framebuffer_pin(struct drm_framebuffer *fb)
 {
-	int ret = 0, i, na, nb;
-	struct omap_framebuffer *ofba = to_omap_framebuffer(a);
-	struct omap_framebuffer *ofbb = to_omap_framebuffer(b);
-	uint32_t pinned_mask = 0;
+	struct omap_framebuffer *omap_fb = to_omap_framebuffer(fb);
+	int ret, i, n = drm_format_num_planes(fb->pixel_format);
 
-	na = a ? drm_format_num_planes(a->pixel_format) : 0;
-	nb = b ? drm_format_num_planes(b->pixel_format) : 0;
-
-	for (i = 0; i < max(na, nb); i++) {
-		struct plane *pa, *pb;
-
-		pa = (i < na) ? &ofba->planes[i] : NULL;
-		pb = (i < nb) ? &ofbb->planes[i] : NULL;
-
-		if (pa)
-			unpin(arg, pa->bo);
-
-		if (pb && !ret) {
-			ret = omap_gem_get_paddr(pb->bo, &pb->paddr, true);
-			if (!ret) {
-				omap_gem_dma_sync(pb->bo, DMA_TO_DEVICE);
-				pinned_mask |= (1 << i);
-			}
-		}
+	for (i = 0; i < n; i++) {
+		struct plane *plane = &omap_fb->planes[i];
+		ret = omap_gem_get_paddr(plane->bo, &plane->paddr, true);
+		if (ret)
+			goto fail;
+		omap_gem_dma_sync(plane->bo, DMA_TO_DEVICE);
 	}
 
-	if (ret) {
-		/* something went wrong.. unpin what has been pinned */
-		for (i = 0; i < nb; i++) {
-			if (pinned_mask & (1 << i)) {
-				struct plane *pb = &ofba->planes[i];
-				unpin(arg, pb->bo);
-			}
-		}
+	return 0;
+
+fail:
+	for (i--; i >= 0; i--) {
+		struct plane *plane = &omap_fb->planes[i];
+		omap_gem_put_paddr(plane->bo);
+		plane->paddr = 0;
 	}
 
 	return ret;
 }
 
+/* unpin, no longer being scanned out: */
+int omap_framebuffer_unpin(struct drm_framebuffer *fb)
+{
+	struct omap_framebuffer *omap_fb = to_omap_framebuffer(fb);
+	int ret, i, n = drm_format_num_planes(fb->pixel_format);
+
+	for (i = 0; i < n; i++) {
+		struct plane *plane = &omap_fb->planes[i];
+		ret = omap_gem_put_paddr(plane->bo);
+		if (ret)
+			goto fail;
+		plane->paddr = 0;
+	}
+
+	return 0;
+
+fail:
+	return ret;
+}
+
 struct drm_gem_object *omap_framebuffer_bo(struct drm_framebuffer *fb, int p)
 {
 	struct omap_framebuffer *omap_fb = to_omap_framebuffer(fb);
diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c
index ebbdf41..533f6eb 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem.c
@@ -20,6 +20,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/shmem_fs.h>
+#include <drm/drm_vma_manager.h>
 
 #include "omap_drv.h"
 #include "omap_dmm_tiler.h"
@@ -236,7 +237,7 @@
 	 * mapping_gfp_mask(mapping) which conflicts w/ GFP_DMA32.. probably
 	 * we actually want CMA memory for it all anyways..
 	 */
-	pages = _drm_gem_get_pages(obj, GFP_KERNEL);
+	pages = drm_gem_get_pages(obj, GFP_KERNEL);
 	if (IS_ERR(pages)) {
 		dev_err(obj->dev->dev, "could not get pages: %ld\n", PTR_ERR(pages));
 		return PTR_ERR(pages);
@@ -270,7 +271,7 @@
 	return 0;
 
 free_pages:
-	_drm_gem_put_pages(obj, pages, true, false);
+	drm_gem_put_pages(obj, pages, true, false);
 
 	return ret;
 }
@@ -294,7 +295,7 @@
 	kfree(omap_obj->addrs);
 	omap_obj->addrs = NULL;
 
-	_drm_gem_put_pages(obj, omap_obj->pages, true, false);
+	drm_gem_put_pages(obj, omap_obj->pages, true, false);
 	omap_obj->pages = NULL;
 }
 
@@ -308,21 +309,20 @@
 static uint64_t mmap_offset(struct drm_gem_object *obj)
 {
 	struct drm_device *dev = obj->dev;
+	int ret;
+	size_t size;
 
 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 
-	if (!obj->map_list.map) {
-		/* Make it mmapable */
-		size_t size = omap_gem_mmap_size(obj);
-		int ret = _drm_gem_create_mmap_offset_size(obj, size);
-
-		if (ret) {
-			dev_err(dev->dev, "could not allocate mmap offset\n");
-			return 0;
-		}
+	/* Make it mmapable */
+	size = omap_gem_mmap_size(obj);
+	ret = drm_gem_create_mmap_offset_size(obj, size);
+	if (ret) {
+		dev_err(dev->dev, "could not allocate mmap offset\n");
+		return 0;
 	}
 
-	return (uint64_t)obj->map_list.hash.key << PAGE_SHIFT;
+	return drm_vma_node_offset_addr(&obj->vma_node);
 }
 
 uint64_t omap_gem_mmap_offset(struct drm_gem_object *obj)
@@ -629,21 +629,6 @@
 }
 
 /**
- * omap_gem_dumb_destroy	-	destroy a dumb buffer
- * @file: client file
- * @dev: our DRM device
- * @handle: the object handle
- *
- * Destroy a handle that was created via omap_gem_dumb_create.
- */
-int omap_gem_dumb_destroy(struct drm_file *file, struct drm_device *dev,
-		uint32_t handle)
-{
-	/* No special work needed, drop the reference and see what falls out */
-	return drm_gem_handle_delete(file, handle);
-}
-
-/**
  * omap_gem_dumb_map	-	buffer mapping for dumb interface
  * @file: our drm client file
  * @dev: drm device
@@ -997,12 +982,11 @@
 {
 	struct drm_device *dev = obj->dev;
 	struct omap_gem_object *omap_obj = to_omap_bo(obj);
-	uint64_t off = 0;
+	uint64_t off;
 
 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 
-	if (obj->map_list.map)
-		off = (uint64_t)obj->map_list.hash.key;
+	off = drm_vma_node_start(&obj->vma_node);
 
 	seq_printf(m, "%08x: %2d (%2d) %08llx %08Zx (%2d) %p %4d",
 			omap_obj->flags, obj->name, obj->refcount.refcount.counter,
@@ -1309,8 +1293,7 @@
 
 	list_del(&omap_obj->mm_list);
 
-	if (obj->map_list.map)
-		drm_gem_free_mmap_offset(obj);
+	drm_gem_free_mmap_offset(obj);
 
 	/* this means the object is still pinned.. which really should
 	 * not happen.  I think..
@@ -1427,8 +1410,9 @@
 		omap_obj->height = gsize.tiled.height;
 	}
 
+	ret = 0;
 	if (flags & (OMAP_BO_DMA|OMAP_BO_EXT_MEM))
-		ret = drm_gem_private_object_init(dev, obj, size);
+		drm_gem_private_object_init(dev, obj, size);
 	else
 		ret = drm_gem_object_init(dev, obj, size);
 
diff --git a/drivers/gpu/drm/omapdrm/omap_gem_helpers.c b/drivers/gpu/drm/omapdrm/omap_gem_helpers.c
deleted file mode 100644
index f9eb679..0000000
--- a/drivers/gpu/drm/omapdrm/omap_gem_helpers.c
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * drivers/gpu/drm/omapdrm/omap_gem_helpers.c
- *
- * Copyright (C) 2011 Texas Instruments
- * Author: Rob Clark <rob.clark@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published by
- * the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-/* temporary copy of drm_gem_{get,put}_pages() until the
- * "drm/gem: add functions to get/put pages" patch is merged..
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/shmem_fs.h>
-
-#include <drm/drmP.h>
-
-/**
- * drm_gem_get_pages - helper to allocate backing pages for a GEM object
- * @obj: obj in question
- * @gfpmask: gfp mask of requested pages
- */
-struct page **_drm_gem_get_pages(struct drm_gem_object *obj, gfp_t gfpmask)
-{
-	struct inode *inode;
-	struct address_space *mapping;
-	struct page *p, **pages;
-	int i, npages;
-
-	/* This is the shared memory object that backs the GEM resource */
-	inode = file_inode(obj->filp);
-	mapping = inode->i_mapping;
-
-	npages = obj->size >> PAGE_SHIFT;
-
-	pages = drm_malloc_ab(npages, sizeof(struct page *));
-	if (pages == NULL)
-		return ERR_PTR(-ENOMEM);
-
-	gfpmask |= mapping_gfp_mask(mapping);
-
-	for (i = 0; i < npages; i++) {
-		p = shmem_read_mapping_page_gfp(mapping, i, gfpmask);
-		if (IS_ERR(p))
-			goto fail;
-		pages[i] = p;
-
-		/* There is a hypothetical issue w/ drivers that require
-		 * buffer memory in the low 4GB.. if the pages are un-
-		 * pinned, and swapped out, they can end up swapped back
-		 * in above 4GB.  If pages are already in memory, then
-		 * shmem_read_mapping_page_gfp will ignore the gfpmask,
-		 * even if the already in-memory page disobeys the mask.
-		 *
-		 * It is only a theoretical issue today, because none of
-		 * the devices with this limitation can be populated with
-		 * enough memory to trigger the issue.  But this BUG_ON()
-		 * is here as a reminder in case the problem with
-		 * shmem_read_mapping_page_gfp() isn't solved by the time
-		 * it does become a real issue.
-		 *
-		 * See this thread: http://lkml.org/lkml/2011/7/11/238
-		 */
-		BUG_ON((gfpmask & __GFP_DMA32) &&
-				(page_to_pfn(p) >= 0x00100000UL));
-	}
-
-	return pages;
-
-fail:
-	while (i--)
-		page_cache_release(pages[i]);
-
-	drm_free_large(pages);
-	return ERR_CAST(p);
-}
-
-/**
- * drm_gem_put_pages - helper to free backing pages for a GEM object
- * @obj: obj in question
- * @pages: pages to free
- */
-void _drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages,
-		bool dirty, bool accessed)
-{
-	int i, npages;
-
-	npages = obj->size >> PAGE_SHIFT;
-
-	for (i = 0; i < npages; i++) {
-		if (dirty)
-			set_page_dirty(pages[i]);
-
-		if (accessed)
-			mark_page_accessed(pages[i]);
-
-		/* Undo the reference we took when populating the table */
-		page_cache_release(pages[i]);
-	}
-
-	drm_free_large(pages);
-}
-
-int
-_drm_gem_create_mmap_offset_size(struct drm_gem_object *obj, size_t size)
-{
-	struct drm_device *dev = obj->dev;
-	struct drm_gem_mm *mm = dev->mm_private;
-	struct drm_map_list *list;
-	struct drm_local_map *map;
-	int ret = 0;
-
-	/* Set the object up for mmap'ing */
-	list = &obj->map_list;
-	list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
-	if (!list->map)
-		return -ENOMEM;
-
-	map = list->map;
-	map->type = _DRM_GEM;
-	map->size = size;
-	map->handle = obj;
-
-	/* Get a DRM GEM mmap offset allocated... */
-	list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
-			size / PAGE_SIZE, 0, 0);
-
-	if (!list->file_offset_node) {
-		DRM_ERROR("failed to allocate offset for bo %d\n", obj->name);
-		ret = -ENOSPC;
-		goto out_free_list;
-	}
-
-	list->file_offset_node = drm_mm_get_block(list->file_offset_node,
-			size / PAGE_SIZE, 0);
-	if (!list->file_offset_node) {
-		ret = -ENOMEM;
-		goto out_free_list;
-	}
-
-	list->hash.key = list->file_offset_node->start;
-	ret = drm_ht_insert_item(&mm->offset_hash, &list->hash);
-	if (ret) {
-		DRM_ERROR("failed to add to map hash\n");
-		goto out_free_mm;
-	}
-
-	return 0;
-
-out_free_mm:
-	drm_mm_put_block(list->file_offset_node);
-out_free_list:
-	kfree(list->map);
-	list->map = NULL;
-
-	return ret;
-}
diff --git a/drivers/gpu/drm/omapdrm/omap_plane.c b/drivers/gpu/drm/omapdrm/omap_plane.c
index 8d225d7..046d5e6 100644
--- a/drivers/gpu/drm/omapdrm/omap_plane.c
+++ b/drivers/gpu/drm/omapdrm/omap_plane.c
@@ -17,7 +17,7 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <linux/kfifo.h>
+#include "drm_flip_work.h"
 
 #include "omap_drv.h"
 #include "omap_dmm_tiler.h"
@@ -58,26 +58,23 @@
 
 	struct omap_drm_irq error_irq;
 
-	/* set of bo's pending unpin until next post_apply() */
-	DECLARE_KFIFO_PTR(unpin_fifo, struct drm_gem_object *);
+	/* for deferring bo unpin's until next post_apply(): */
+	struct drm_flip_work unpin_work;
 
 	// XXX maybe get rid of this and handle vblank in crtc too?
 	struct callback apply_done_cb;
 };
 
-static void unpin(void *arg, struct drm_gem_object *bo)
+static void unpin_worker(struct drm_flip_work *work, void *val)
 {
-	struct drm_plane *plane = arg;
-	struct omap_plane *omap_plane = to_omap_plane(plane);
+	struct omap_plane *omap_plane =
+			container_of(work, struct omap_plane, unpin_work);
+	struct drm_device *dev = omap_plane->base.dev;
 
-	if (kfifo_put(&omap_plane->unpin_fifo,
-			(const struct drm_gem_object **)&bo)) {
-		/* also hold a ref so it isn't free'd while pinned */
-		drm_gem_object_reference(bo);
-	} else {
-		dev_err(plane->dev->dev, "unpin fifo full!\n");
-		omap_gem_put_paddr(bo);
-	}
+	omap_framebuffer_unpin(val);
+	mutex_lock(&dev->mode_config.mutex);
+	drm_framebuffer_unreference(val);
+	mutex_unlock(&dev->mode_config.mutex);
 }
 
 /* update which fb (if any) is pinned for scanout */
@@ -87,23 +84,22 @@
 	struct drm_framebuffer *pinned_fb = omap_plane->pinned_fb;
 
 	if (pinned_fb != fb) {
-		int ret;
+		int ret = 0;
 
 		DBG("%p -> %p", pinned_fb, fb);
 
-		if (fb)
+		if (fb) {
 			drm_framebuffer_reference(fb);
-
-		ret = omap_framebuffer_replace(pinned_fb, fb, plane, unpin);
+			ret = omap_framebuffer_pin(fb);
+		}
 
 		if (pinned_fb)
-			drm_framebuffer_unreference(pinned_fb);
+			drm_flip_work_queue(&omap_plane->unpin_work, pinned_fb);
 
 		if (ret) {
 			dev_err(plane->dev->dev, "could not swap %p -> %p\n",
 					omap_plane->pinned_fb, fb);
-			if (fb)
-				drm_framebuffer_unreference(fb);
+			drm_framebuffer_unreference(fb);
 			omap_plane->pinned_fb = NULL;
 			return ret;
 		}
@@ -170,17 +166,14 @@
 	struct omap_plane *omap_plane =
 			container_of(apply, struct omap_plane, apply);
 	struct drm_plane *plane = &omap_plane->base;
+	struct omap_drm_private *priv = plane->dev->dev_private;
 	struct omap_overlay_info *info = &omap_plane->info;
-	struct drm_gem_object *bo = NULL;
 	struct callback cb;
 
 	cb = omap_plane->apply_done_cb;
 	omap_plane->apply_done_cb.fxn = NULL;
 
-	while (kfifo_get(&omap_plane->unpin_fifo, &bo)) {
-		omap_gem_put_paddr(bo);
-		drm_gem_object_unreference_unlocked(bo);
-	}
+	drm_flip_work_commit(&omap_plane->unpin_work, priv->wq);
 
 	if (cb.fxn)
 		cb.fxn(cb.arg);
@@ -277,8 +270,7 @@
 	omap_plane_disable(plane);
 	drm_plane_cleanup(plane);
 
-	WARN_ON(!kfifo_is_empty(&omap_plane->unpin_fifo));
-	kfifo_free(&omap_plane->unpin_fifo);
+	drm_flip_work_cleanup(&omap_plane->unpin_work);
 
 	kfree(omap_plane);
 }
@@ -399,7 +391,8 @@
 	if (!omap_plane)
 		goto fail;
 
-	ret = kfifo_alloc(&omap_plane->unpin_fifo, 16, GFP_KERNEL);
+	ret = drm_flip_work_init(&omap_plane->unpin_work, 16,
+			"unpin", unpin_worker);
 	if (ret) {
 		dev_err(dev->dev, "could not allocate unpin FIFO\n");
 		goto fail;
diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c
index 93c2f2c..eb89653 100644
--- a/drivers/gpu/drm/qxl/qxl_cmd.c
+++ b/drivers/gpu/drm/qxl/qxl_cmd.c
@@ -179,9 +179,10 @@
 			      uint32_t type, bool interruptible)
 {
 	struct qxl_command cmd;
+	struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head);
 
 	cmd.type = type;
-	cmd.data = qxl_bo_physical_address(qdev, release->bos[0], release->release_offset);
+	cmd.data = qxl_bo_physical_address(qdev, to_qxl_bo(entry->tv.bo), release->release_offset);
 
 	return qxl_ring_push(qdev->command_ring, &cmd, interruptible);
 }
@@ -191,9 +192,10 @@
 			     uint32_t type, bool interruptible)
 {
 	struct qxl_command cmd;
+	struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head);
 
 	cmd.type = type;
-	cmd.data = qxl_bo_physical_address(qdev, release->bos[0], release->release_offset);
+	cmd.data = qxl_bo_physical_address(qdev, to_qxl_bo(entry->tv.bo), release->release_offset);
 
 	return qxl_ring_push(qdev->cursor_ring, &cmd, interruptible);
 }
@@ -214,7 +216,6 @@
 	struct qxl_release *release;
 	uint64_t id, next_id;
 	int i = 0;
-	int ret;
 	union qxl_release_info *info;
 
 	while (qxl_ring_pop(qdev->release_ring, &id)) {
@@ -224,17 +225,10 @@
 			if (release == NULL)
 				break;
 
-			ret = qxl_release_reserve(qdev, release, false);
-			if (ret) {
-				qxl_io_log(qdev, "failed to reserve release on garbage collect %lld\n", id);
-				DRM_ERROR("failed to reserve release %lld\n", id);
-			}
-
 			info = qxl_release_map(qdev, release);
 			next_id = info->next;
 			qxl_release_unmap(qdev, release, info);
 
-			qxl_release_unreserve(qdev, release);
 			QXL_INFO(qdev, "popped %lld, next %lld\n", id,
 				next_id);
 
@@ -259,27 +253,29 @@
 	return i;
 }
 
-int qxl_alloc_bo_reserved(struct qxl_device *qdev, unsigned long size,
+int qxl_alloc_bo_reserved(struct qxl_device *qdev,
+			  struct qxl_release *release,
+			  unsigned long size,
 			  struct qxl_bo **_bo)
 {
 	struct qxl_bo *bo;
 	int ret;
 
 	ret = qxl_bo_create(qdev, size, false /* not kernel - device */,
-			    QXL_GEM_DOMAIN_VRAM, NULL, &bo);
+			    false, QXL_GEM_DOMAIN_VRAM, NULL, &bo);
 	if (ret) {
 		DRM_ERROR("failed to allocate VRAM BO\n");
 		return ret;
 	}
-	ret = qxl_bo_reserve(bo, false);
-	if (unlikely(ret != 0))
+	ret = qxl_release_list_add(release, bo);
+	if (ret)
 		goto out_unref;
 
 	*_bo = bo;
 	return 0;
 out_unref:
 	qxl_bo_unref(&bo);
-	return 0;
+	return ret;
 }
 
 static int wait_for_io_cmd_user(struct qxl_device *qdev, uint8_t val, long port, bool intr)
@@ -503,6 +499,10 @@
 	if (ret)
 		return ret;
 
+	ret = qxl_release_reserve_list(release, true);
+	if (ret)
+		return ret;
+
 	cmd = (struct qxl_surface_cmd *)qxl_release_map(qdev, release);
 	cmd->type = QXL_SURFACE_CMD_CREATE;
 	cmd->u.surface_create.format = surf->surf.format;
@@ -524,14 +524,11 @@
 
 	surf->surf_create = release;
 
-	/* no need to add a release to the fence for this bo,
+	/* no need to add a release to the fence for this surface bo,
 	   since it is only released when we ask to destroy the surface
 	   and it would never signal otherwise */
-	qxl_fence_releaseable(qdev, release);
-
 	qxl_push_command_ring_release(qdev, release, QXL_CMD_SURFACE, false);
-
-	qxl_release_unreserve(qdev, release);
+	qxl_release_fence_buffer_objects(release);
 
 	surf->hw_surf_alloc = true;
 	spin_lock(&qdev->surf_id_idr_lock);
@@ -573,12 +570,9 @@
 	cmd->surface_id = id;
 	qxl_release_unmap(qdev, release, &cmd->release_info);
 
-	qxl_fence_releaseable(qdev, release);
-
 	qxl_push_command_ring_release(qdev, release, QXL_CMD_SURFACE, false);
 
-	qxl_release_unreserve(qdev, release);
-
+	qxl_release_fence_buffer_objects(release);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index f76f5dd..835caba 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -179,7 +179,7 @@
 	kfree(qxl_crtc);
 }
 
-static void
+static int
 qxl_hide_cursor(struct qxl_device *qdev)
 {
 	struct qxl_release *release;
@@ -188,14 +188,22 @@
 
 	ret = qxl_alloc_release_reserved(qdev, sizeof(*cmd), QXL_RELEASE_CURSOR_CMD,
 					 &release, NULL);
+	if (ret)
+		return ret;
+
+	ret = qxl_release_reserve_list(release, true);
+	if (ret) {
+		qxl_release_free(qdev, release);
+		return ret;
+	}
 
 	cmd = (struct qxl_cursor_cmd *)qxl_release_map(qdev, release);
 	cmd->type = QXL_CURSOR_HIDE;
 	qxl_release_unmap(qdev, release, &cmd->release_info);
 
-	qxl_fence_releaseable(qdev, release);
 	qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false);
-	qxl_release_unreserve(qdev, release);
+	qxl_release_fence_buffer_objects(release);
+	return 0;
 }
 
 static int qxl_crtc_cursor_set2(struct drm_crtc *crtc,
@@ -216,10 +224,8 @@
 
 	int size = 64*64*4;
 	int ret = 0;
-	if (!handle) {
-		qxl_hide_cursor(qdev);
-		return 0;
-	}
+	if (!handle)
+		return qxl_hide_cursor(qdev);
 
 	obj = drm_gem_object_lookup(crtc->dev, file_priv, handle);
 	if (!obj) {
@@ -234,8 +240,9 @@
 		goto out_unref;
 
 	ret = qxl_bo_pin(user_bo, QXL_GEM_DOMAIN_CPU, NULL);
+	qxl_bo_unreserve(user_bo);
 	if (ret)
-		goto out_unreserve;
+		goto out_unref;
 
 	ret = qxl_bo_kmap(user_bo, &user_ptr);
 	if (ret)
@@ -246,14 +253,20 @@
 					 &release, NULL);
 	if (ret)
 		goto out_kunmap;
-	ret = qxl_alloc_bo_reserved(qdev, sizeof(struct qxl_cursor) + size,
-				    &cursor_bo);
+
+	ret = qxl_alloc_bo_reserved(qdev, release, sizeof(struct qxl_cursor) + size,
+			   &cursor_bo);
 	if (ret)
 		goto out_free_release;
-	ret = qxl_bo_kmap(cursor_bo, (void **)&cursor);
+
+	ret = qxl_release_reserve_list(release, false);
 	if (ret)
 		goto out_free_bo;
 
+	ret = qxl_bo_kmap(cursor_bo, (void **)&cursor);
+	if (ret)
+		goto out_backoff;
+
 	cursor->header.unique = 0;
 	cursor->header.type = SPICE_CURSOR_TYPE_ALPHA;
 	cursor->header.width = 64;
@@ -269,11 +282,7 @@
 
 	qxl_bo_kunmap(cursor_bo);
 
-	/* finish with the userspace bo */
 	qxl_bo_kunmap(user_bo);
-	qxl_bo_unpin(user_bo);
-	qxl_bo_unreserve(user_bo);
-	drm_gem_object_unreference_unlocked(obj);
 
 	cmd = (struct qxl_cursor_cmd *)qxl_release_map(qdev, release);
 	cmd->type = QXL_CURSOR_SET;
@@ -281,30 +290,35 @@
 	cmd->u.set.position.y = qcrtc->cur_y;
 
 	cmd->u.set.shape = qxl_bo_physical_address(qdev, cursor_bo, 0);
-	qxl_release_add_res(qdev, release, cursor_bo);
 
 	cmd->u.set.visible = 1;
 	qxl_release_unmap(qdev, release, &cmd->release_info);
 
-	qxl_fence_releaseable(qdev, release);
 	qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false);
-	qxl_release_unreserve(qdev, release);
+	qxl_release_fence_buffer_objects(release);
 
-	qxl_bo_unreserve(cursor_bo);
+	/* finish with the userspace bo */
+	ret = qxl_bo_reserve(user_bo, false);
+	if (!ret) {
+		qxl_bo_unpin(user_bo);
+		qxl_bo_unreserve(user_bo);
+	}
+	drm_gem_object_unreference_unlocked(obj);
+
 	qxl_bo_unref(&cursor_bo);
 
 	return ret;
+
+out_backoff:
+	qxl_release_backoff_reserve_list(release);
 out_free_bo:
 	qxl_bo_unref(&cursor_bo);
 out_free_release:
-	qxl_release_unreserve(qdev, release);
 	qxl_release_free(qdev, release);
 out_kunmap:
 	qxl_bo_kunmap(user_bo);
 out_unpin:
 	qxl_bo_unpin(user_bo);
-out_unreserve:
-	qxl_bo_unreserve(user_bo);
 out_unref:
 	drm_gem_object_unreference_unlocked(obj);
 	return ret;
@@ -322,6 +336,14 @@
 
 	ret = qxl_alloc_release_reserved(qdev, sizeof(*cmd), QXL_RELEASE_CURSOR_CMD,
 				   &release, NULL);
+	if (ret)
+		return ret;
+
+	ret = qxl_release_reserve_list(release, true);
+	if (ret) {
+		qxl_release_free(qdev, release);
+		return ret;
+	}
 
 	qcrtc->cur_x = x;
 	qcrtc->cur_y = y;
@@ -332,9 +354,9 @@
 	cmd->u.position.y = qcrtc->cur_y;
 	qxl_release_unmap(qdev, release, &cmd->release_info);
 
-	qxl_fence_releaseable(qdev, release);
 	qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false);
-	qxl_release_unreserve(qdev, release);
+	qxl_release_fence_buffer_objects(release);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/qxl/qxl_draw.c b/drivers/gpu/drm/qxl/qxl_draw.c
index 3c8c3db..56e1d63 100644
--- a/drivers/gpu/drm/qxl/qxl_draw.c
+++ b/drivers/gpu/drm/qxl/qxl_draw.c
@@ -23,25 +23,29 @@
 #include "qxl_drv.h"
 #include "qxl_object.h"
 
+static int alloc_clips(struct qxl_device *qdev,
+		       struct qxl_release *release,
+		       unsigned num_clips,
+		       struct qxl_bo **clips_bo)
+{
+	int size = sizeof(struct qxl_clip_rects) + sizeof(struct qxl_rect) * num_clips;
+
+	return qxl_alloc_bo_reserved(qdev, release, size, clips_bo);
+}
+
 /* returns a pointer to the already allocated qxl_rect array inside
  * the qxl_clip_rects. This is *not* the same as the memory allocated
  * on the device, it is offset to qxl_clip_rects.chunk.data */
 static struct qxl_rect *drawable_set_clipping(struct qxl_device *qdev,
 					      struct qxl_drawable *drawable,
 					      unsigned num_clips,
-					      struct qxl_bo **clips_bo,
-					      struct qxl_release *release)
+					      struct qxl_bo *clips_bo)
 {
 	struct qxl_clip_rects *dev_clips;
 	int ret;
-	int size = sizeof(*dev_clips) + sizeof(struct qxl_rect) * num_clips;
-	ret = qxl_alloc_bo_reserved(qdev, size, clips_bo);
-	if (ret)
-		return NULL;
 
-	ret = qxl_bo_kmap(*clips_bo, (void **)&dev_clips);
+	ret = qxl_bo_kmap(clips_bo, (void **)&dev_clips);
 	if (ret) {
-		qxl_bo_unref(clips_bo);
 		return NULL;
 	}
 	dev_clips->num_rects = num_clips;
@@ -52,20 +56,34 @@
 }
 
 static int
-make_drawable(struct qxl_device *qdev, int surface, uint8_t type,
-	      const struct qxl_rect *rect,
-	      struct qxl_release **release)
+alloc_drawable(struct qxl_device *qdev, struct qxl_release **release)
 {
-	struct qxl_drawable *drawable;
-	int i, ret;
-
-	ret = qxl_alloc_release_reserved(qdev, sizeof(*drawable),
+	int ret;
+	ret = qxl_alloc_release_reserved(qdev, sizeof(struct qxl_drawable),
 					 QXL_RELEASE_DRAWABLE, release,
 					 NULL);
-	if (ret)
-		return ret;
+	return ret;
+}
 
-	drawable = (struct qxl_drawable *)qxl_release_map(qdev, *release);
+static void
+free_drawable(struct qxl_device *qdev, struct qxl_release *release)
+{
+	qxl_release_free(qdev, release);
+}
+
+/* release needs to be reserved at this point */
+static int
+make_drawable(struct qxl_device *qdev, int surface, uint8_t type,
+	      const struct qxl_rect *rect,
+	      struct qxl_release *release)
+{
+	struct qxl_drawable *drawable;
+	int i;
+
+	drawable = (struct qxl_drawable *)qxl_release_map(qdev, release);
+	if (!drawable)
+		return -ENOMEM;
+
 	drawable->type = type;
 
 	drawable->surface_id = surface;		/* Only primary for now */
@@ -91,14 +109,23 @@
 		drawable->bbox = *rect;
 
 	drawable->mm_time = qdev->rom->mm_clock;
-	qxl_release_unmap(qdev, *release, &drawable->release_info);
+	qxl_release_unmap(qdev, release, &drawable->release_info);
 	return 0;
 }
 
-static int qxl_palette_create_1bit(struct qxl_bo **palette_bo,
+static int alloc_palette_object(struct qxl_device *qdev,
+				struct qxl_release *release,
+				struct qxl_bo **palette_bo)
+{
+	return qxl_alloc_bo_reserved(qdev, release,
+				     sizeof(struct qxl_palette) + sizeof(uint32_t) * 2,
+				     palette_bo);
+}
+
+static int qxl_palette_create_1bit(struct qxl_bo *palette_bo,
+				   struct qxl_release *release,
 				   const struct qxl_fb_image *qxl_fb_image)
 {
-	struct qxl_device *qdev = qxl_fb_image->qdev;
 	const struct fb_image *fb_image = &qxl_fb_image->fb_image;
 	uint32_t visual = qxl_fb_image->visual;
 	const uint32_t *pseudo_palette = qxl_fb_image->pseudo_palette;
@@ -108,12 +135,7 @@
 	static uint64_t unique; /* we make no attempt to actually set this
 				 * correctly globaly, since that would require
 				 * tracking all of our palettes. */
-
-	ret = qxl_alloc_bo_reserved(qdev,
-				    sizeof(struct qxl_palette) + sizeof(uint32_t) * 2,
-				    palette_bo);
-
-	ret = qxl_bo_kmap(*palette_bo, (void **)&pal);
+	ret = qxl_bo_kmap(palette_bo, (void **)&pal);
 	pal->num_ents = 2;
 	pal->unique = unique++;
 	if (visual == FB_VISUAL_TRUECOLOR || visual == FB_VISUAL_DIRECTCOLOR) {
@@ -126,7 +148,7 @@
 	}
 	pal->ents[0] = bgcolor;
 	pal->ents[1] = fgcolor;
-	qxl_bo_kunmap(*palette_bo);
+	qxl_bo_kunmap(palette_bo);
 	return 0;
 }
 
@@ -144,44 +166,63 @@
 	const char *src = fb_image->data;
 	int depth = fb_image->depth;
 	struct qxl_release *release;
-	struct qxl_bo *image_bo;
 	struct qxl_image *image;
 	int ret;
-
+	struct qxl_drm_image *dimage;
+	struct qxl_bo *palette_bo = NULL;
 	if (stride == 0)
 		stride = depth * width / 8;
 
+	ret = alloc_drawable(qdev, &release);
+	if (ret)
+		return;
+
+	ret = qxl_image_alloc_objects(qdev, release,
+				      &dimage,
+				      height, stride);
+	if (ret)
+		goto out_free_drawable;
+
+	if (depth == 1) {
+		ret = alloc_palette_object(qdev, release, &palette_bo);
+		if (ret)
+			goto out_free_image;
+	}
+
+	/* do a reservation run over all the objects we just allocated */
+	ret = qxl_release_reserve_list(release, true);
+	if (ret)
+		goto out_free_palette;
+
 	rect.left = x;
 	rect.right = x + width;
 	rect.top = y;
 	rect.bottom = y + height;
 
-	ret = make_drawable(qdev, 0, QXL_DRAW_COPY, &rect, &release);
-	if (ret)
-		return;
-
-	ret = qxl_image_create(qdev, release, &image_bo,
-			       (const uint8_t *)src, 0, 0,
-			       width, height, depth, stride);
+	ret = make_drawable(qdev, 0, QXL_DRAW_COPY, &rect, release);
 	if (ret) {
-		qxl_release_unreserve(qdev, release);
+		qxl_release_backoff_reserve_list(release);
+		goto out_free_palette;
+	}
+
+	ret = qxl_image_init(qdev, release, dimage,
+			     (const uint8_t *)src, 0, 0,
+			     width, height, depth, stride);
+	if (ret) {
+		qxl_release_backoff_reserve_list(release);
 		qxl_release_free(qdev, release);
 		return;
 	}
 
 	if (depth == 1) {
-		struct qxl_bo *palette_bo;
 		void *ptr;
-		ret = qxl_palette_create_1bit(&palette_bo, qxl_fb_image);
-		qxl_release_add_res(qdev, release, palette_bo);
+		ret = qxl_palette_create_1bit(palette_bo, release, qxl_fb_image);
 
-		ptr = qxl_bo_kmap_atomic_page(qdev, image_bo, 0);
+		ptr = qxl_bo_kmap_atomic_page(qdev, dimage->bo, 0);
 		image = ptr;
 		image->u.bitmap.palette =
 			qxl_bo_physical_address(qdev, palette_bo, 0);
-		qxl_bo_kunmap_atomic_page(qdev, image_bo, ptr);
-		qxl_bo_unreserve(palette_bo);
-		qxl_bo_unref(&palette_bo);
+		qxl_bo_kunmap_atomic_page(qdev, dimage->bo, ptr);
 	}
 
 	drawable = (struct qxl_drawable *)qxl_release_map(qdev, release);
@@ -199,16 +240,20 @@
 	drawable->u.copy.mask.bitmap = 0;
 
 	drawable->u.copy.src_bitmap =
-		qxl_bo_physical_address(qdev, image_bo, 0);
+		qxl_bo_physical_address(qdev, dimage->bo, 0);
 	qxl_release_unmap(qdev, release, &drawable->release_info);
 
-	qxl_release_add_res(qdev, release, image_bo);
-	qxl_bo_unreserve(image_bo);
-	qxl_bo_unref(&image_bo);
-
-	qxl_fence_releaseable(qdev, release);
 	qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false);
-	qxl_release_unreserve(qdev, release);
+	qxl_release_fence_buffer_objects(release);
+
+out_free_palette:
+	if (palette_bo)
+		qxl_bo_unref(&palette_bo);
+out_free_image:
+	qxl_image_free_objects(qdev, dimage);
+out_free_drawable:
+	if (ret)
+		free_drawable(qdev, release);
 }
 
 /* push a draw command using the given clipping rectangles as
@@ -243,10 +288,14 @@
 	int depth = qxl_fb->base.bits_per_pixel;
 	uint8_t *surface_base;
 	struct qxl_release *release;
-	struct qxl_bo *image_bo;
 	struct qxl_bo *clips_bo;
+	struct qxl_drm_image *dimage;
 	int ret;
 
+	ret = alloc_drawable(qdev, &release);
+	if (ret)
+		return;
+
 	left = clips->x1;
 	right = clips->x2;
 	top = clips->y1;
@@ -263,36 +312,52 @@
 
 	width = right - left;
 	height = bottom - top;
+
+	ret = alloc_clips(qdev, release, num_clips, &clips_bo);
+	if (ret)
+		goto out_free_drawable;
+
+	ret = qxl_image_alloc_objects(qdev, release,
+				      &dimage,
+				      height, stride);
+	if (ret)
+		goto out_free_clips;
+
+	/* do a reservation run over all the objects we just allocated */
+	ret = qxl_release_reserve_list(release, true);
+	if (ret)
+		goto out_free_image;
+
 	drawable_rect.left = left;
 	drawable_rect.right = right;
 	drawable_rect.top = top;
 	drawable_rect.bottom = bottom;
+
 	ret = make_drawable(qdev, 0, QXL_DRAW_COPY, &drawable_rect,
-			    &release);
+			    release);
 	if (ret)
-		return;
+		goto out_release_backoff;
 
 	ret = qxl_bo_kmap(bo, (void **)&surface_base);
 	if (ret)
-		goto out_unref;
+		goto out_release_backoff;
 
-	ret = qxl_image_create(qdev, release, &image_bo, surface_base,
-			       left, top, width, height, depth, stride);
+
+	ret = qxl_image_init(qdev, release, dimage, surface_base,
+			     left, top, width, height, depth, stride);
 	qxl_bo_kunmap(bo);
 	if (ret)
-		goto out_unref;
+		goto out_release_backoff;
 
-	rects = drawable_set_clipping(qdev, drawable, num_clips, &clips_bo, release);
-	if (!rects) {
-		qxl_bo_unref(&image_bo);
-		goto out_unref;
-	}
+	rects = drawable_set_clipping(qdev, drawable, num_clips, clips_bo);
+	if (!rects)
+		goto out_release_backoff;
+
 	drawable = (struct qxl_drawable *)qxl_release_map(qdev, release);
 
 	drawable->clip.type = SPICE_CLIP_TYPE_RECTS;
 	drawable->clip.data = qxl_bo_physical_address(qdev,
 						      clips_bo, 0);
-	qxl_release_add_res(qdev, release, clips_bo);
 
 	drawable->u.copy.src_area.top = 0;
 	drawable->u.copy.src_area.bottom = height;
@@ -306,11 +371,9 @@
 	drawable->u.copy.mask.pos.y = 0;
 	drawable->u.copy.mask.bitmap = 0;
 
-	drawable->u.copy.src_bitmap = qxl_bo_physical_address(qdev, image_bo, 0);
+	drawable->u.copy.src_bitmap = qxl_bo_physical_address(qdev, dimage->bo, 0);
 	qxl_release_unmap(qdev, release, &drawable->release_info);
-	qxl_release_add_res(qdev, release, image_bo);
-	qxl_bo_unreserve(image_bo);
-	qxl_bo_unref(&image_bo);
+
 	clips_ptr = clips;
 	for (i = 0; i < num_clips; i++, clips_ptr += inc) {
 		rects[i].left   = clips_ptr->x1;
@@ -319,17 +382,22 @@
 		rects[i].bottom = clips_ptr->y2;
 	}
 	qxl_bo_kunmap(clips_bo);
-	qxl_bo_unreserve(clips_bo);
-	qxl_bo_unref(&clips_bo);
 
-	qxl_fence_releaseable(qdev, release);
 	qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false);
-	qxl_release_unreserve(qdev, release);
-	return;
+	qxl_release_fence_buffer_objects(release);
 
-out_unref:
-	qxl_release_unreserve(qdev, release);
-	qxl_release_free(qdev, release);
+out_release_backoff:
+	if (ret)
+		qxl_release_backoff_reserve_list(release);
+out_free_image:
+	qxl_image_free_objects(qdev, dimage);
+out_free_clips:
+	qxl_bo_unref(&clips_bo);
+out_free_drawable:
+	/* only free drawable on error */
+	if (ret)
+		free_drawable(qdev, release);
+
 }
 
 void qxl_draw_copyarea(struct qxl_device *qdev,
@@ -342,22 +410,36 @@
 	struct qxl_release *release;
 	int ret;
 
+	ret = alloc_drawable(qdev, &release);
+	if (ret)
+		return;
+
+	/* do a reservation run over all the objects we just allocated */
+	ret = qxl_release_reserve_list(release, true);
+	if (ret)
+		goto out_free_release;
+
 	rect.left = dx;
 	rect.top = dy;
 	rect.right = dx + width;
 	rect.bottom = dy + height;
-	ret = make_drawable(qdev, 0, QXL_COPY_BITS, &rect, &release);
-	if (ret)
-		return;
+	ret = make_drawable(qdev, 0, QXL_COPY_BITS, &rect, release);
+	if (ret) {
+		qxl_release_backoff_reserve_list(release);
+		goto out_free_release;
+	}
 
 	drawable = (struct qxl_drawable *)qxl_release_map(qdev, release);
 	drawable->u.copy_bits.src_pos.x = sx;
 	drawable->u.copy_bits.src_pos.y = sy;
-
 	qxl_release_unmap(qdev, release, &drawable->release_info);
-	qxl_fence_releaseable(qdev, release);
+
 	qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false);
-	qxl_release_unreserve(qdev, release);
+	qxl_release_fence_buffer_objects(release);
+
+out_free_release:
+	if (ret)
+		free_drawable(qdev, release);
 }
 
 void qxl_draw_fill(struct qxl_draw_fill *qxl_draw_fill_rec)
@@ -370,10 +452,21 @@
 	struct qxl_release *release;
 	int ret;
 
-	ret = make_drawable(qdev, 0, QXL_DRAW_FILL, &rect, &release);
+	ret = alloc_drawable(qdev, &release);
 	if (ret)
 		return;
 
+	/* do a reservation run over all the objects we just allocated */
+	ret = qxl_release_reserve_list(release, true);
+	if (ret)
+		goto out_free_release;
+
+	ret = make_drawable(qdev, 0, QXL_DRAW_FILL, &rect, release);
+	if (ret) {
+		qxl_release_backoff_reserve_list(release);
+		goto out_free_release;
+	}
+
 	drawable = (struct qxl_drawable *)qxl_release_map(qdev, release);
 	drawable->u.fill.brush.type = SPICE_BRUSH_TYPE_SOLID;
 	drawable->u.fill.brush.u.color = color;
@@ -384,7 +477,11 @@
 	drawable->u.fill.mask.bitmap = 0;
 
 	qxl_release_unmap(qdev, release, &drawable->release_info);
-	qxl_fence_releaseable(qdev, release);
+
 	qxl_push_command_ring_release(qdev, release, QXL_CMD_DRAW, false);
-	qxl_release_unreserve(qdev, release);
+	qxl_release_fence_buffer_objects(release);
+
+out_free_release:
+	if (ret)
+		free_drawable(qdev, release);
 }
diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c
index df0b577..514118a 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.c
+++ b/drivers/gpu/drm/qxl/qxl_drv.c
@@ -84,7 +84,6 @@
 	.release = drm_release,
 	.unlocked_ioctl = drm_ioctl,
 	.poll = drm_poll,
-	.fasync = drm_fasync,
 	.mmap = qxl_mmap,
 };
 
@@ -221,7 +220,7 @@
 
 	.dumb_create = qxl_mode_dumb_create,
 	.dumb_map_offset = qxl_mode_dumb_mmap,
-	.dumb_destroy = qxl_mode_dumb_destroy,
+	.dumb_destroy = drm_gem_dumb_destroy,
 #if defined(CONFIG_DEBUG_FS)
 	.debugfs_init = qxl_debugfs_init,
 	.debugfs_cleanup = qxl_debugfs_takedown,
diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h
index aacb791..f7c9add 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.h
+++ b/drivers/gpu/drm/qxl/qxl_drv.h
@@ -42,6 +42,9 @@
 #include <ttm/ttm_placement.h>
 #include <ttm/ttm_module.h>
 
+/* just for ttm_validate_buffer */
+#include <ttm/ttm_execbuf_util.h>
+
 #include <drm/qxl_drm.h>
 #include "qxl_dev.h"
 
@@ -118,9 +121,9 @@
 	uint32_t surface_id;
 	struct qxl_fence fence; /* per bo fence  - list of releases */
 	struct qxl_release *surf_create;
-	atomic_t reserve_count;
 };
 #define gem_to_qxl_bo(gobj) container_of((gobj), struct qxl_bo, gem_base)
+#define to_qxl_bo(tobj) container_of((tobj), struct qxl_bo, tbo)
 
 struct qxl_gem {
 	struct mutex		mutex;
@@ -128,12 +131,7 @@
 };
 
 struct qxl_bo_list {
-	struct list_head lhead;
-	struct qxl_bo *bo;
-};
-
-struct qxl_reloc_list {
-	struct list_head bos;
+	struct ttm_validate_buffer tv;
 };
 
 struct qxl_crtc {
@@ -195,10 +193,20 @@
 struct qxl_release {
 	int id;
 	int type;
-	int bo_count;
 	uint32_t release_offset;
 	uint32_t surface_release_id;
-	struct qxl_bo *bos[QXL_MAX_RES];
+	struct ww_acquire_ctx ticket;
+	struct list_head bos;
+};
+
+struct qxl_drm_chunk {
+	struct list_head head;
+	struct qxl_bo *bo;
+};
+
+struct qxl_drm_image {
+	struct qxl_bo *bo;
+	struct list_head chunk_list;
 };
 
 struct qxl_fb_image {
@@ -314,12 +322,13 @@
 	struct workqueue_struct *gc_queue;
 	struct work_struct gc_work;
 
+	struct work_struct fb_work;
 };
 
 /* forward declaration for QXL_INFO_IO */
 void qxl_io_log(struct qxl_device *qdev, const char *fmt, ...);
 
-extern struct drm_ioctl_desc qxl_ioctls[];
+extern const struct drm_ioctl_desc qxl_ioctls[];
 extern int qxl_max_ioctl;
 
 int qxl_driver_load(struct drm_device *dev, unsigned long flags);
@@ -396,9 +405,6 @@
 			  bool discardable, bool kernel,
 			  struct qxl_surface *surf,
 			  struct drm_gem_object **obj);
-int qxl_gem_object_pin(struct drm_gem_object *obj, uint32_t pin_domain,
-			  uint64_t *gpu_addr);
-void qxl_gem_object_unpin(struct drm_gem_object *obj);
 int qxl_gem_object_create_with_handle(struct qxl_device *qdev,
 				      struct drm_file *file_priv,
 				      u32 domain,
@@ -418,9 +424,6 @@
 int qxl_mode_dumb_create(struct drm_file *file_priv,
 			 struct drm_device *dev,
 			 struct drm_mode_create_dumb *args);
-int qxl_mode_dumb_destroy(struct drm_file *file_priv,
-			  struct drm_device *dev,
-			  uint32_t handle);
 int qxl_mode_dumb_mmap(struct drm_file *filp,
 		       struct drm_device *dev,
 		       uint32_t handle, uint64_t *offset_p);
@@ -433,12 +436,19 @@
 
 /* qxl image */
 
-int qxl_image_create(struct qxl_device *qdev,
-		     struct qxl_release *release,
-		     struct qxl_bo **image_bo,
-		     const uint8_t *data,
-		     int x, int y, int width, int height,
-		     int depth, int stride);
+int qxl_image_init(struct qxl_device *qdev,
+		   struct qxl_release *release,
+		   struct qxl_drm_image *dimage,
+		   const uint8_t *data,
+		   int x, int y, int width, int height,
+		   int depth, int stride);
+int
+qxl_image_alloc_objects(struct qxl_device *qdev,
+			struct qxl_release *release,
+			struct qxl_drm_image **image_ptr,
+			int height, int stride);
+void qxl_image_free_objects(struct qxl_device *qdev, struct qxl_drm_image *dimage);
+
 void qxl_update_screen(struct qxl_device *qxl);
 
 /* qxl io operations (qxl_cmd.c) */
@@ -459,20 +469,15 @@
 void qxl_io_flush_release(struct qxl_device *qdev);
 void qxl_io_flush_surfaces(struct qxl_device *qdev);
 
-int qxl_release_reserve(struct qxl_device *qdev,
-			struct qxl_release *release, bool no_wait);
-void qxl_release_unreserve(struct qxl_device *qdev,
-			   struct qxl_release *release);
 union qxl_release_info *qxl_release_map(struct qxl_device *qdev,
 					struct qxl_release *release);
 void qxl_release_unmap(struct qxl_device *qdev,
 		       struct qxl_release *release,
 		       union qxl_release_info *info);
-/*
- * qxl_bo_add_resource.
- *
- */
-void qxl_bo_add_resource(struct qxl_bo *main_bo, struct qxl_bo *resource);
+int qxl_release_list_add(struct qxl_release *release, struct qxl_bo *bo);
+int qxl_release_reserve_list(struct qxl_release *release, bool no_intr);
+void qxl_release_backoff_reserve_list(struct qxl_release *release);
+void qxl_release_fence_buffer_objects(struct qxl_release *release);
 
 int qxl_alloc_surface_release_reserved(struct qxl_device *qdev,
 				       enum qxl_surface_cmd_type surface_cmd_type,
@@ -481,15 +486,16 @@
 int qxl_alloc_release_reserved(struct qxl_device *qdev, unsigned long size,
 			       int type, struct qxl_release **release,
 			       struct qxl_bo **rbo);
-int qxl_fence_releaseable(struct qxl_device *qdev,
-			  struct qxl_release *release);
+
 int
 qxl_push_command_ring_release(struct qxl_device *qdev, struct qxl_release *release,
 			      uint32_t type, bool interruptible);
 int
 qxl_push_cursor_ring_release(struct qxl_device *qdev, struct qxl_release *release,
 			     uint32_t type, bool interruptible);
-int qxl_alloc_bo_reserved(struct qxl_device *qdev, unsigned long size,
+int qxl_alloc_bo_reserved(struct qxl_device *qdev,
+			  struct qxl_release *release,
+			  unsigned long size,
 			  struct qxl_bo **_bo);
 /* qxl drawing commands */
 
@@ -510,15 +516,9 @@
 		       u32 sx, u32 sy,
 		       u32 dx, u32 dy);
 
-uint64_t
-qxl_release_alloc(struct qxl_device *qdev, int type,
-		  struct qxl_release **ret);
-
 void qxl_release_free(struct qxl_device *qdev,
 		      struct qxl_release *release);
-void qxl_release_add_res(struct qxl_device *qdev,
-			 struct qxl_release *release,
-			 struct qxl_bo *bo);
+
 /* used by qxl_debugfs_release */
 struct qxl_release *qxl_release_from_id_locked(struct qxl_device *qdev,
 						   uint64_t id);
@@ -561,7 +561,7 @@
 int qxl_update_surface(struct qxl_device *qdev, struct qxl_bo *surf);
 
 /* qxl_fence.c */
-int qxl_fence_add_release(struct qxl_fence *qfence, uint32_t rel_id);
+void qxl_fence_add_release_locked(struct qxl_fence *qfence, uint32_t rel_id);
 int qxl_fence_remove_release(struct qxl_fence *qfence, uint32_t rel_id);
 int qxl_fence_init(struct qxl_device *qdev, struct qxl_fence *qfence);
 void qxl_fence_fini(struct qxl_fence *qfence);
diff --git a/drivers/gpu/drm/qxl/qxl_dumb.c b/drivers/gpu/drm/qxl/qxl_dumb.c
index 847c4ee..d34bb41 100644
--- a/drivers/gpu/drm/qxl/qxl_dumb.c
+++ b/drivers/gpu/drm/qxl/qxl_dumb.c
@@ -68,13 +68,6 @@
 	return 0;
 }
 
-int qxl_mode_dumb_destroy(struct drm_file *file_priv,
-			     struct drm_device *dev,
-			     uint32_t handle)
-{
-	return drm_gem_handle_delete(file_priv, handle);
-}
-
 int qxl_mode_dumb_mmap(struct drm_file *file_priv,
 		       struct drm_device *dev,
 		       uint32_t handle, uint64_t *offset_p)
diff --git a/drivers/gpu/drm/qxl/qxl_fb.c b/drivers/gpu/drm/qxl/qxl_fb.c
index 76f39d8..88722f2 100644
--- a/drivers/gpu/drm/qxl/qxl_fb.c
+++ b/drivers/gpu/drm/qxl/qxl_fb.c
@@ -37,12 +37,29 @@
 
 #define QXL_DIRTY_DELAY (HZ / 30)
 
+#define QXL_FB_OP_FILLRECT 1
+#define QXL_FB_OP_COPYAREA 2
+#define QXL_FB_OP_IMAGEBLIT 3
+
+struct qxl_fb_op {
+	struct list_head head;
+	int op_type;
+	union {
+		struct fb_fillrect fr;
+		struct fb_copyarea ca;
+		struct fb_image ib;
+	} op;
+	void *img_data;
+};
+
 struct qxl_fbdev {
 	struct drm_fb_helper helper;
 	struct qxl_framebuffer	qfb;
 	struct list_head	fbdev_list;
 	struct qxl_device	*qdev;
 
+	spinlock_t delayed_ops_lock;
+	struct list_head delayed_ops;
 	void *shadow;
 	int size;
 
@@ -164,8 +181,69 @@
 	.deferred_io	= qxl_deferred_io,
 };
 
-static void qxl_fb_fillrect(struct fb_info *info,
-			    const struct fb_fillrect *fb_rect)
+static void qxl_fb_delayed_fillrect(struct qxl_fbdev *qfbdev,
+				    const struct fb_fillrect *fb_rect)
+{
+	struct qxl_fb_op *op;
+	unsigned long flags;
+
+	op = kmalloc(sizeof(struct qxl_fb_op), GFP_ATOMIC | __GFP_NOWARN);
+	if (!op)
+		return;
+
+	op->op.fr = *fb_rect;
+	op->img_data = NULL;
+	op->op_type = QXL_FB_OP_FILLRECT;
+
+	spin_lock_irqsave(&qfbdev->delayed_ops_lock, flags);
+	list_add_tail(&op->head, &qfbdev->delayed_ops);
+	spin_unlock_irqrestore(&qfbdev->delayed_ops_lock, flags);
+}
+
+static void qxl_fb_delayed_copyarea(struct qxl_fbdev *qfbdev,
+				    const struct fb_copyarea *fb_copy)
+{
+	struct qxl_fb_op *op;
+	unsigned long flags;
+
+	op = kmalloc(sizeof(struct qxl_fb_op), GFP_ATOMIC | __GFP_NOWARN);
+	if (!op)
+		return;
+
+	op->op.ca = *fb_copy;
+	op->img_data = NULL;
+	op->op_type = QXL_FB_OP_COPYAREA;
+
+	spin_lock_irqsave(&qfbdev->delayed_ops_lock, flags);
+	list_add_tail(&op->head, &qfbdev->delayed_ops);
+	spin_unlock_irqrestore(&qfbdev->delayed_ops_lock, flags);
+}
+
+static void qxl_fb_delayed_imageblit(struct qxl_fbdev *qfbdev,
+				     const struct fb_image *fb_image)
+{
+	struct qxl_fb_op *op;
+	unsigned long flags;
+	uint32_t size = fb_image->width * fb_image->height * (fb_image->depth >= 8 ? fb_image->depth / 8 : 1);
+
+	op = kmalloc(sizeof(struct qxl_fb_op) + size, GFP_ATOMIC | __GFP_NOWARN);
+	if (!op)
+		return;
+
+	op->op.ib = *fb_image;
+	op->img_data = (void *)(op + 1);
+	op->op_type = QXL_FB_OP_IMAGEBLIT;
+
+	memcpy(op->img_data, fb_image->data, size);
+
+	op->op.ib.data = op->img_data;
+	spin_lock_irqsave(&qfbdev->delayed_ops_lock, flags);
+	list_add_tail(&op->head, &qfbdev->delayed_ops);
+	spin_unlock_irqrestore(&qfbdev->delayed_ops_lock, flags);
+}
+
+static void qxl_fb_fillrect_internal(struct fb_info *info,
+				     const struct fb_fillrect *fb_rect)
 {
 	struct qxl_fbdev *qfbdev = info->par;
 	struct qxl_device *qdev = qfbdev->qdev;
@@ -203,17 +281,28 @@
 	qxl_draw_fill_rec.rect = rect;
 	qxl_draw_fill_rec.color = color;
 	qxl_draw_fill_rec.rop = rop;
-	if (!drm_can_sleep()) {
-		qxl_io_log(qdev,
-			"%s: TODO use RCU, mysterious locks with spin_lock\n",
-			__func__);
-		return;
-	}
+
 	qxl_draw_fill(&qxl_draw_fill_rec);
 }
 
-static void qxl_fb_copyarea(struct fb_info *info,
-			    const struct fb_copyarea *region)
+static void qxl_fb_fillrect(struct fb_info *info,
+			    const struct fb_fillrect *fb_rect)
+{
+	struct qxl_fbdev *qfbdev = info->par;
+	struct qxl_device *qdev = qfbdev->qdev;
+
+	if (!drm_can_sleep()) {
+		qxl_fb_delayed_fillrect(qfbdev, fb_rect);
+		schedule_work(&qdev->fb_work);
+		return;
+	}
+	/* make sure any previous work is done */
+	flush_work(&qdev->fb_work);
+	qxl_fb_fillrect_internal(info, fb_rect);
+}
+
+static void qxl_fb_copyarea_internal(struct fb_info *info,
+				     const struct fb_copyarea *region)
 {
 	struct qxl_fbdev *qfbdev = info->par;
 
@@ -223,37 +312,89 @@
 			  region->dx, region->dy);
 }
 
+static void qxl_fb_copyarea(struct fb_info *info,
+			    const struct fb_copyarea *region)
+{
+	struct qxl_fbdev *qfbdev = info->par;
+	struct qxl_device *qdev = qfbdev->qdev;
+
+	if (!drm_can_sleep()) {
+		qxl_fb_delayed_copyarea(qfbdev, region);
+		schedule_work(&qdev->fb_work);
+		return;
+	}
+	/* make sure any previous work is done */
+	flush_work(&qdev->fb_work);
+	qxl_fb_copyarea_internal(info, region);
+}
+
 static void qxl_fb_imageblit_safe(struct qxl_fb_image *qxl_fb_image)
 {
 	qxl_draw_opaque_fb(qxl_fb_image, 0);
 }
 
+static void qxl_fb_imageblit_internal(struct fb_info *info,
+				      const struct fb_image *image)
+{
+	struct qxl_fbdev *qfbdev = info->par;
+	struct qxl_fb_image qxl_fb_image;
+
+	/* ensure proper order  rendering operations - TODO: must do this
+	 * for everything. */
+	qxl_fb_image_init(&qxl_fb_image, qfbdev->qdev, info, image);
+	qxl_fb_imageblit_safe(&qxl_fb_image);
+}
+
 static void qxl_fb_imageblit(struct fb_info *info,
 			     const struct fb_image *image)
 {
 	struct qxl_fbdev *qfbdev = info->par;
 	struct qxl_device *qdev = qfbdev->qdev;
-	struct qxl_fb_image qxl_fb_image;
 
 	if (!drm_can_sleep()) {
-		/* we cannot do any ttm_bo allocation since that will fail on
-		 * ioremap_wc..__get_vm_area_node, so queue the work item
-		 * instead This can happen from printk inside an interrupt
-		 * context, i.e.: smp_apic_timer_interrupt..check_cpu_stall */
-		qxl_io_log(qdev,
-			"%s: TODO use RCU, mysterious locks with spin_lock\n",
-			   __func__);
+		qxl_fb_delayed_imageblit(qfbdev, image);
+		schedule_work(&qdev->fb_work);
 		return;
 	}
+	/* make sure any previous work is done */
+	flush_work(&qdev->fb_work);
+	qxl_fb_imageblit_internal(info, image);
+}
 
-	/* ensure proper order of rendering operations - TODO: must do this
-	 * for everything. */
-	qxl_fb_image_init(&qxl_fb_image, qfbdev->qdev, info, image);
-	qxl_fb_imageblit_safe(&qxl_fb_image);
+static void qxl_fb_work(struct work_struct *work)
+{
+	struct qxl_device *qdev = container_of(work, struct qxl_device, fb_work);
+	unsigned long flags;
+	struct qxl_fb_op *entry, *tmp;
+	struct qxl_fbdev *qfbdev = qdev->mode_info.qfbdev;
+
+	/* since the irq context just adds entries to the end of the
+	   list dropping the lock should be fine, as entry isn't modified
+	   in the operation code */
+	spin_lock_irqsave(&qfbdev->delayed_ops_lock, flags);
+	list_for_each_entry_safe(entry, tmp, &qfbdev->delayed_ops, head) {
+		spin_unlock_irqrestore(&qfbdev->delayed_ops_lock, flags);
+		switch (entry->op_type) {
+		case QXL_FB_OP_FILLRECT:
+			qxl_fb_fillrect_internal(qfbdev->helper.fbdev, &entry->op.fr);
+			break;
+		case QXL_FB_OP_COPYAREA:
+			qxl_fb_copyarea_internal(qfbdev->helper.fbdev, &entry->op.ca);
+			break;
+		case QXL_FB_OP_IMAGEBLIT:
+			qxl_fb_imageblit_internal(qfbdev->helper.fbdev, &entry->op.ib);
+			break;
+		}
+		spin_lock_irqsave(&qfbdev->delayed_ops_lock, flags);
+		list_del(&entry->head);
+		kfree(entry);
+	}
+	spin_unlock_irqrestore(&qfbdev->delayed_ops_lock, flags);
 }
 
 int qxl_fb_init(struct qxl_device *qdev)
 {
+	INIT_WORK(&qdev->fb_work, qxl_fb_work);
 	return 0;
 }
 
@@ -536,7 +677,8 @@
 	qfbdev->qdev = qdev;
 	qdev->mode_info.qfbdev = qfbdev;
 	qfbdev->helper.funcs = &qxl_fb_helper_funcs;
-
+	spin_lock_init(&qfbdev->delayed_ops_lock);
+	INIT_LIST_HEAD(&qfbdev->delayed_ops);
 	ret = drm_fb_helper_init(qdev->ddev, &qfbdev->helper,
 				 qxl_num_crtc /* num_crtc - QXL supports just 1 */,
 				 QXLFB_CONN_LIMIT);
diff --git a/drivers/gpu/drm/qxl/qxl_fence.c b/drivers/gpu/drm/qxl/qxl_fence.c
index 63c6715..ae59e91 100644
--- a/drivers/gpu/drm/qxl/qxl_fence.c
+++ b/drivers/gpu/drm/qxl/qxl_fence.c
@@ -49,17 +49,11 @@
 
    For some reason every so often qxl hw fails to release, things go wrong.
 */
-
-
-int qxl_fence_add_release(struct qxl_fence *qfence, uint32_t rel_id)
+/* must be called with the fence lock held */
+void qxl_fence_add_release_locked(struct qxl_fence *qfence, uint32_t rel_id)
 {
-	struct qxl_bo *bo = container_of(qfence, struct qxl_bo, fence);
-
-	spin_lock(&bo->tbo.bdev->fence_lock);
 	radix_tree_insert(&qfence->tree, rel_id, qfence);
 	qfence->num_active_releases++;
-	spin_unlock(&bo->tbo.bdev->fence_lock);
-	return 0;
 }
 
 int qxl_fence_remove_release(struct qxl_fence *qfence, uint32_t rel_id)
diff --git a/drivers/gpu/drm/qxl/qxl_gem.c b/drivers/gpu/drm/qxl/qxl_gem.c
index a235693..1648e41 100644
--- a/drivers/gpu/drm/qxl/qxl_gem.c
+++ b/drivers/gpu/drm/qxl/qxl_gem.c
@@ -55,7 +55,7 @@
 	/* At least align on page size */
 	if (alignment < PAGE_SIZE)
 		alignment = PAGE_SIZE;
-	r = qxl_bo_create(qdev, size, kernel, initial_domain, surf, &qbo);
+	r = qxl_bo_create(qdev, size, kernel, false, initial_domain, surf, &qbo);
 	if (r) {
 		if (r != -ERESTARTSYS)
 			DRM_ERROR(
@@ -101,32 +101,6 @@
 	return 0;
 }
 
-int qxl_gem_object_pin(struct drm_gem_object *obj, uint32_t pin_domain,
-			  uint64_t *gpu_addr)
-{
-	struct qxl_bo *qobj = obj->driver_private;
-	int r;
-
-	r = qxl_bo_reserve(qobj, false);
-	if (unlikely(r != 0))
-		return r;
-	r = qxl_bo_pin(qobj, pin_domain, gpu_addr);
-	qxl_bo_unreserve(qobj);
-	return r;
-}
-
-void qxl_gem_object_unpin(struct drm_gem_object *obj)
-{
-	struct qxl_bo *qobj = obj->driver_private;
-	int r;
-
-	r = qxl_bo_reserve(qobj, false);
-	if (likely(r == 0)) {
-		qxl_bo_unpin(qobj);
-		qxl_bo_unreserve(qobj);
-	}
-}
-
 int qxl_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_priv)
 {
 	return 0;
diff --git a/drivers/gpu/drm/qxl/qxl_image.c b/drivers/gpu/drm/qxl/qxl_image.c
index cf85620..7fbcc35 100644
--- a/drivers/gpu/drm/qxl/qxl_image.c
+++ b/drivers/gpu/drm/qxl/qxl_image.c
@@ -30,31 +30,100 @@
 #include "qxl_object.h"
 
 static int
-qxl_image_create_helper(struct qxl_device *qdev,
-			struct qxl_release *release,
-			struct qxl_bo **image_bo,
-			const uint8_t *data,
-			int width, int height,
-			int depth, unsigned int hash,
-			int stride)
+qxl_allocate_chunk(struct qxl_device *qdev,
+		   struct qxl_release *release,
+		   struct qxl_drm_image *image,
+		   unsigned int chunk_size)
 {
+	struct qxl_drm_chunk *chunk;
+	int ret;
+
+	chunk = kmalloc(sizeof(struct qxl_drm_chunk), GFP_KERNEL);
+	if (!chunk)
+		return -ENOMEM;
+
+	ret = qxl_alloc_bo_reserved(qdev, release, chunk_size, &chunk->bo);
+	if (ret) {
+		kfree(chunk);
+		return ret;
+	}
+
+	list_add_tail(&chunk->head, &image->chunk_list);
+	return 0;
+}
+
+int
+qxl_image_alloc_objects(struct qxl_device *qdev,
+			struct qxl_release *release,
+			struct qxl_drm_image **image_ptr,
+			int height, int stride)
+{
+	struct qxl_drm_image *image;
+	int ret;
+
+	image = kmalloc(sizeof(struct qxl_drm_image), GFP_KERNEL);
+	if (!image)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&image->chunk_list);
+
+	ret = qxl_alloc_bo_reserved(qdev, release, sizeof(struct qxl_image), &image->bo);
+	if (ret) {
+		kfree(image);
+		return ret;
+	}
+
+	ret = qxl_allocate_chunk(qdev, release, image, sizeof(struct qxl_data_chunk) + stride * height);
+	if (ret) {
+		qxl_bo_unref(&image->bo);
+		kfree(image);
+		return ret;
+	}
+	*image_ptr = image;
+	return 0;
+}
+
+void qxl_image_free_objects(struct qxl_device *qdev, struct qxl_drm_image *dimage)
+{
+	struct qxl_drm_chunk *chunk, *tmp;
+
+	list_for_each_entry_safe(chunk, tmp, &dimage->chunk_list, head) {
+		qxl_bo_unref(&chunk->bo);
+		kfree(chunk);
+	}
+
+	qxl_bo_unref(&dimage->bo);
+	kfree(dimage);
+}
+
+static int
+qxl_image_init_helper(struct qxl_device *qdev,
+		      struct qxl_release *release,
+		      struct qxl_drm_image *dimage,
+		      const uint8_t *data,
+		      int width, int height,
+		      int depth, unsigned int hash,
+		      int stride)
+{
+	struct qxl_drm_chunk *drv_chunk;
 	struct qxl_image *image;
 	struct qxl_data_chunk *chunk;
 	int i;
 	int chunk_stride;
 	int linesize = width * depth / 8;
-	struct qxl_bo *chunk_bo;
-	int ret;
+	struct qxl_bo *chunk_bo, *image_bo;
 	void *ptr;
 	/* Chunk */
 	/* FIXME: Check integer overflow */
 	/* TODO: variable number of chunks */
+
+	drv_chunk = list_first_entry(&dimage->chunk_list, struct qxl_drm_chunk, head);
+
+	chunk_bo = drv_chunk->bo;
 	chunk_stride = stride; /* TODO: should use linesize, but it renders
 				  wrong (check the bitmaps are sent correctly
 				  first) */
-	ret = qxl_alloc_bo_reserved(qdev, sizeof(*chunk) + height * chunk_stride,
-				    &chunk_bo);
-	
+
 	ptr = qxl_bo_kmap_atomic_page(qdev, chunk_bo, 0);
 	chunk = ptr;
 	chunk->data_size = height * chunk_stride;
@@ -102,7 +171,6 @@
 				while (remain > 0) {
 					page_base = out_offset & PAGE_MASK;
 					page_offset = offset_in_page(out_offset);
-					
 					size = min((int)(PAGE_SIZE - page_offset), remain);
 
 					ptr = qxl_bo_kmap_atomic_page(qdev, chunk_bo, page_base);
@@ -116,14 +184,10 @@
 			}
 		}
 	}
-
-
 	qxl_bo_kunmap(chunk_bo);
 
-	/* Image */
-	ret = qxl_alloc_bo_reserved(qdev, sizeof(*image), image_bo);
-
-	ptr = qxl_bo_kmap_atomic_page(qdev, *image_bo, 0);
+	image_bo = dimage->bo;
+	ptr = qxl_bo_kmap_atomic_page(qdev, image_bo, 0);
 	image = ptr;
 
 	image->descriptor.id = 0;
@@ -154,23 +218,20 @@
 	image->u.bitmap.stride = chunk_stride;
 	image->u.bitmap.palette = 0;
 	image->u.bitmap.data = qxl_bo_physical_address(qdev, chunk_bo, 0);
-	qxl_release_add_res(qdev, release, chunk_bo);
-	qxl_bo_unreserve(chunk_bo);
-	qxl_bo_unref(&chunk_bo);
 
-	qxl_bo_kunmap_atomic_page(qdev, *image_bo, ptr);
+	qxl_bo_kunmap_atomic_page(qdev, image_bo, ptr);
 
 	return 0;
 }
 
-int qxl_image_create(struct qxl_device *qdev,
+int qxl_image_init(struct qxl_device *qdev,
 		     struct qxl_release *release,
-		     struct qxl_bo **image_bo,
+		     struct qxl_drm_image *dimage,
 		     const uint8_t *data,
 		     int x, int y, int width, int height,
 		     int depth, int stride)
 {
 	data += y * stride + x * (depth / 8);
-	return qxl_image_create_helper(qdev, release, image_bo, data,
+	return qxl_image_init_helper(qdev, release, dimage, data,
 				       width, height, depth, 0, stride);
 }
diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c
index 27f45e4..7b95c75 100644
--- a/drivers/gpu/drm/qxl/qxl_ioctl.c
+++ b/drivers/gpu/drm/qxl/qxl_ioctl.c
@@ -68,55 +68,60 @@
 				  &qxl_map->offset);
 }
 
+struct qxl_reloc_info {
+	int type;
+	struct qxl_bo *dst_bo;
+	uint32_t dst_offset;
+	struct qxl_bo *src_bo;
+	int src_offset;
+};
+
 /*
  * dst must be validated, i.e. whole bo on vram/surfacesram (right now all bo's
  * are on vram).
  * *(dst + dst_off) = qxl_bo_physical_address(src, src_off)
  */
 static void
-apply_reloc(struct qxl_device *qdev, struct qxl_bo *dst, uint64_t dst_off,
-	    struct qxl_bo *src, uint64_t src_off)
+apply_reloc(struct qxl_device *qdev, struct qxl_reloc_info *info)
 {
 	void *reloc_page;
-
-	reloc_page = qxl_bo_kmap_atomic_page(qdev, dst, dst_off & PAGE_MASK);
-	*(uint64_t *)(reloc_page + (dst_off & ~PAGE_MASK)) = qxl_bo_physical_address(qdev,
-								     src, src_off);
-	qxl_bo_kunmap_atomic_page(qdev, dst, reloc_page);
+	reloc_page = qxl_bo_kmap_atomic_page(qdev, info->dst_bo, info->dst_offset & PAGE_MASK);
+	*(uint64_t *)(reloc_page + (info->dst_offset & ~PAGE_MASK)) = qxl_bo_physical_address(qdev,
+											      info->src_bo,
+											      info->src_offset);
+	qxl_bo_kunmap_atomic_page(qdev, info->dst_bo, reloc_page);
 }
 
 static void
-apply_surf_reloc(struct qxl_device *qdev, struct qxl_bo *dst, uint64_t dst_off,
-		 struct qxl_bo *src)
+apply_surf_reloc(struct qxl_device *qdev, struct qxl_reloc_info *info)
 {
 	uint32_t id = 0;
 	void *reloc_page;
 
-	if (src && !src->is_primary)
-		id = src->surface_id;
+	if (info->src_bo && !info->src_bo->is_primary)
+		id = info->src_bo->surface_id;
 
-	reloc_page = qxl_bo_kmap_atomic_page(qdev, dst, dst_off & PAGE_MASK);
-	*(uint32_t *)(reloc_page + (dst_off & ~PAGE_MASK)) = id;
-	qxl_bo_kunmap_atomic_page(qdev, dst, reloc_page);
+	reloc_page = qxl_bo_kmap_atomic_page(qdev, info->dst_bo, info->dst_offset & PAGE_MASK);
+	*(uint32_t *)(reloc_page + (info->dst_offset & ~PAGE_MASK)) = id;
+	qxl_bo_kunmap_atomic_page(qdev, info->dst_bo, reloc_page);
 }
 
 /* return holding the reference to this object */
 static struct qxl_bo *qxlhw_handle_to_bo(struct qxl_device *qdev,
 					 struct drm_file *file_priv, uint64_t handle,
-					 struct qxl_reloc_list *reloc_list)
+					 struct qxl_release *release)
 {
 	struct drm_gem_object *gobj;
 	struct qxl_bo *qobj;
 	int ret;
 
 	gobj = drm_gem_object_lookup(qdev->ddev, file_priv, handle);
-	if (!gobj) {
-		DRM_ERROR("bad bo handle %lld\n", handle);
+	if (!gobj)
 		return NULL;
-	}
+
 	qobj = gem_to_qxl_bo(gobj);
 
-	ret = qxl_bo_list_add(reloc_list, qobj);
+	ret = qxl_release_list_add(release, qobj);
 	if (ret)
 		return NULL;
 
@@ -129,6 +134,155 @@
  * However, the command as passed from user space must *not* contain the initial
  * QXLReleaseInfo struct (first XXX bytes)
  */
+static int qxl_process_single_command(struct qxl_device *qdev,
+				      struct drm_qxl_command *cmd,
+				      struct drm_file *file_priv)
+{
+	struct qxl_reloc_info *reloc_info;
+	int release_type;
+	struct qxl_release *release;
+	struct qxl_bo *cmd_bo;
+	void *fb_cmd;
+	int i, j, ret, num_relocs;
+	int unwritten;
+
+	switch (cmd->type) {
+	case QXL_CMD_DRAW:
+		release_type = QXL_RELEASE_DRAWABLE;
+		break;
+	case QXL_CMD_SURFACE:
+	case QXL_CMD_CURSOR:
+	default:
+		DRM_DEBUG("Only draw commands in execbuffers\n");
+		return -EINVAL;
+		break;
+	}
+
+	if (cmd->command_size > PAGE_SIZE - sizeof(union qxl_release_info))
+		return -EINVAL;
+
+	if (!access_ok(VERIFY_READ,
+		       (void *)(unsigned long)cmd->command,
+		       cmd->command_size))
+		return -EFAULT;
+
+	reloc_info = kmalloc(sizeof(struct qxl_reloc_info) * cmd->relocs_num, GFP_KERNEL);
+	if (!reloc_info)
+		return -ENOMEM;
+
+	ret = qxl_alloc_release_reserved(qdev,
+					 sizeof(union qxl_release_info) +
+					 cmd->command_size,
+					 release_type,
+					 &release,
+					 &cmd_bo);
+	if (ret)
+		goto out_free_reloc;
+
+	/* TODO copy slow path code from i915 */
+	fb_cmd = qxl_bo_kmap_atomic_page(qdev, cmd_bo, (release->release_offset & PAGE_SIZE));
+	unwritten = __copy_from_user_inatomic_nocache(fb_cmd + sizeof(union qxl_release_info) + (release->release_offset & ~PAGE_SIZE), (void *)(unsigned long)cmd->command, cmd->command_size);
+
+	{
+		struct qxl_drawable *draw = fb_cmd;
+		draw->mm_time = qdev->rom->mm_clock;
+	}
+
+	qxl_bo_kunmap_atomic_page(qdev, cmd_bo, fb_cmd);
+	if (unwritten) {
+		DRM_ERROR("got unwritten %d\n", unwritten);
+		ret = -EFAULT;
+		goto out_free_release;
+	}
+
+	/* fill out reloc info structs */
+	num_relocs = 0;
+	for (i = 0; i < cmd->relocs_num; ++i) {
+		struct drm_qxl_reloc reloc;
+
+		if (DRM_COPY_FROM_USER(&reloc,
+				       &((struct drm_qxl_reloc *)(uintptr_t)cmd->relocs)[i],
+				       sizeof(reloc))) {
+			ret = -EFAULT;
+			goto out_free_bos;
+		}
+
+		/* add the bos to the list of bos to validate -
+		   need to validate first then process relocs? */
+		if (reloc.reloc_type != QXL_RELOC_TYPE_BO && reloc.reloc_type != QXL_RELOC_TYPE_SURF) {
+			DRM_DEBUG("unknown reloc type %d\n", reloc_info[i].type);
+
+			ret = -EINVAL;
+			goto out_free_bos;
+		}
+		reloc_info[i].type = reloc.reloc_type;
+
+		if (reloc.dst_handle) {
+			reloc_info[i].dst_bo = qxlhw_handle_to_bo(qdev, file_priv,
+								  reloc.dst_handle, release);
+			if (!reloc_info[i].dst_bo) {
+				ret = -EINVAL;
+				reloc_info[i].src_bo = NULL;
+				goto out_free_bos;
+			}
+			reloc_info[i].dst_offset = reloc.dst_offset;
+		} else {
+			reloc_info[i].dst_bo = cmd_bo;
+			reloc_info[i].dst_offset = reloc.dst_offset + release->release_offset;
+		}
+		num_relocs++;
+
+		/* reserve and validate the reloc dst bo */
+		if (reloc.reloc_type == QXL_RELOC_TYPE_BO || reloc.src_handle > 0) {
+			reloc_info[i].src_bo =
+				qxlhw_handle_to_bo(qdev, file_priv,
+						   reloc.src_handle, release);
+			if (!reloc_info[i].src_bo) {
+				if (reloc_info[i].dst_bo != cmd_bo)
+					drm_gem_object_unreference_unlocked(&reloc_info[i].dst_bo->gem_base);
+				ret = -EINVAL;
+				goto out_free_bos;
+			}
+			reloc_info[i].src_offset = reloc.src_offset;
+		} else {
+			reloc_info[i].src_bo = NULL;
+			reloc_info[i].src_offset = 0;
+		}
+	}
+
+	/* validate all buffers */
+	ret = qxl_release_reserve_list(release, false);
+	if (ret)
+		goto out_free_bos;
+
+	for (i = 0; i < cmd->relocs_num; ++i) {
+		if (reloc_info[i].type == QXL_RELOC_TYPE_BO)
+			apply_reloc(qdev, &reloc_info[i]);
+		else if (reloc_info[i].type == QXL_RELOC_TYPE_SURF)
+			apply_surf_reloc(qdev, &reloc_info[i]);
+	}
+
+	ret = qxl_push_command_ring_release(qdev, release, cmd->type, true);
+	if (ret)
+		qxl_release_backoff_reserve_list(release);
+	else
+		qxl_release_fence_buffer_objects(release);
+
+out_free_bos:
+	for (j = 0; j < num_relocs; j++) {
+		if (reloc_info[j].dst_bo != cmd_bo)
+			drm_gem_object_unreference_unlocked(&reloc_info[j].dst_bo->gem_base);
+		if (reloc_info[j].src_bo && reloc_info[j].src_bo != cmd_bo)
+			drm_gem_object_unreference_unlocked(&reloc_info[j].src_bo->gem_base);
+	}
+out_free_release:
+	if (ret)
+		qxl_release_free(qdev, release);
+out_free_reloc:
+	kfree(reloc_info);
+	return ret;
+}
+
 static int qxl_execbuffer_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file_priv)
 {
@@ -136,144 +290,21 @@
 	struct drm_qxl_execbuffer *execbuffer = data;
 	struct drm_qxl_command user_cmd;
 	int cmd_num;
-	struct qxl_bo *reloc_src_bo;
-	struct qxl_bo *reloc_dst_bo;
-	struct drm_qxl_reloc reloc;
-	void *fb_cmd;
-	int i, ret;
-	struct qxl_reloc_list reloc_list;
-	int unwritten;
-	uint32_t reloc_dst_offset;
-	INIT_LIST_HEAD(&reloc_list.bos);
+	int ret;
 
 	for (cmd_num = 0; cmd_num < execbuffer->commands_num; ++cmd_num) {
-		struct qxl_release *release;
-		struct qxl_bo *cmd_bo;
-		int release_type;
+
 		struct drm_qxl_command *commands =
 			(struct drm_qxl_command *)(uintptr_t)execbuffer->commands;
 
 		if (DRM_COPY_FROM_USER(&user_cmd, &commands[cmd_num],
 				       sizeof(user_cmd)))
 			return -EFAULT;
-		switch (user_cmd.type) {
-		case QXL_CMD_DRAW:
-			release_type = QXL_RELEASE_DRAWABLE;
-			break;
-		case QXL_CMD_SURFACE:
-		case QXL_CMD_CURSOR:
-		default:
-			DRM_DEBUG("Only draw commands in execbuffers\n");
-			return -EINVAL;
-			break;
-		}
 
-		if (user_cmd.command_size > PAGE_SIZE - sizeof(union qxl_release_info))
-			return -EINVAL;
-
-		if (!access_ok(VERIFY_READ,
-			       (void *)(unsigned long)user_cmd.command,
-			       user_cmd.command_size))
-			return -EFAULT;
-
-		ret = qxl_alloc_release_reserved(qdev,
-						 sizeof(union qxl_release_info) +
-						 user_cmd.command_size,
-						 release_type,
-						 &release,
-						 &cmd_bo);
+		ret = qxl_process_single_command(qdev, &user_cmd, file_priv);
 		if (ret)
 			return ret;
-
-		/* TODO copy slow path code from i915 */
-		fb_cmd = qxl_bo_kmap_atomic_page(qdev, cmd_bo, (release->release_offset & PAGE_SIZE));
-		unwritten = __copy_from_user_inatomic_nocache(fb_cmd + sizeof(union qxl_release_info) + (release->release_offset & ~PAGE_SIZE), (void *)(unsigned long)user_cmd.command, user_cmd.command_size);
-
-		{
-			struct qxl_drawable *draw = fb_cmd;
-
-			draw->mm_time = qdev->rom->mm_clock;
-		}
-		qxl_bo_kunmap_atomic_page(qdev, cmd_bo, fb_cmd);
-		if (unwritten) {
-			DRM_ERROR("got unwritten %d\n", unwritten);
-			qxl_release_unreserve(qdev, release);
-			qxl_release_free(qdev, release);
-			return -EFAULT;
-		}
-
-		for (i = 0 ; i < user_cmd.relocs_num; ++i) {
-			if (DRM_COPY_FROM_USER(&reloc,
-					       &((struct drm_qxl_reloc *)(uintptr_t)user_cmd.relocs)[i],
-					       sizeof(reloc))) {
-				qxl_bo_list_unreserve(&reloc_list, true);
-				qxl_release_unreserve(qdev, release);
-				qxl_release_free(qdev, release);
-				return -EFAULT;
-			}
-
-			/* add the bos to the list of bos to validate -
-			   need to validate first then process relocs? */
-			if (reloc.dst_handle) {
-				reloc_dst_bo = qxlhw_handle_to_bo(qdev, file_priv,
-								  reloc.dst_handle, &reloc_list);
-				if (!reloc_dst_bo) {
-					qxl_bo_list_unreserve(&reloc_list, true);
-					qxl_release_unreserve(qdev, release);
-					qxl_release_free(qdev, release);
-					return -EINVAL;
-				}
-				reloc_dst_offset = 0;
-			} else {
-				reloc_dst_bo = cmd_bo;
-				reloc_dst_offset = release->release_offset;
-			}
-
-			/* reserve and validate the reloc dst bo */
-			if (reloc.reloc_type == QXL_RELOC_TYPE_BO || reloc.src_handle > 0) {
-				reloc_src_bo =
-					qxlhw_handle_to_bo(qdev, file_priv,
-							   reloc.src_handle, &reloc_list);
-				if (!reloc_src_bo) {
-					if (reloc_dst_bo != cmd_bo)
-						drm_gem_object_unreference_unlocked(&reloc_dst_bo->gem_base);
-					qxl_bo_list_unreserve(&reloc_list, true);
-					qxl_release_unreserve(qdev, release);
-					qxl_release_free(qdev, release);
-					return -EINVAL;
-				}
-			} else
-				reloc_src_bo = NULL;
-			if (reloc.reloc_type == QXL_RELOC_TYPE_BO) {
-				apply_reloc(qdev, reloc_dst_bo, reloc_dst_offset + reloc.dst_offset,
-					    reloc_src_bo, reloc.src_offset);
-			} else if (reloc.reloc_type == QXL_RELOC_TYPE_SURF) {
-				apply_surf_reloc(qdev, reloc_dst_bo, reloc_dst_offset + reloc.dst_offset, reloc_src_bo);
-			} else {
-				DRM_ERROR("unknown reloc type %d\n", reloc.reloc_type);
-				return -EINVAL;
-			}
-
-			if (reloc_src_bo && reloc_src_bo != cmd_bo) {
-				qxl_release_add_res(qdev, release, reloc_src_bo);
-				drm_gem_object_unreference_unlocked(&reloc_src_bo->gem_base);
-			}
-
-			if (reloc_dst_bo != cmd_bo)
-				drm_gem_object_unreference_unlocked(&reloc_dst_bo->gem_base);
-		}
-		qxl_fence_releaseable(qdev, release);
-
-		ret = qxl_push_command_ring_release(qdev, release, user_cmd.type, true);
-		if (ret == -ERESTARTSYS) {
-			qxl_release_unreserve(qdev, release);
-			qxl_release_free(qdev, release);
-			qxl_bo_list_unreserve(&reloc_list, true);
-			return ret;
-		}
-		qxl_release_unreserve(qdev, release);
 	}
-	qxl_bo_list_unreserve(&reloc_list, 0);
 	return 0;
 }
 
@@ -305,7 +336,7 @@
 		goto out;
 
 	if (!qobj->pin_count) {
-		qxl_ttm_placement_from_domain(qobj, qobj->type);
+		qxl_ttm_placement_from_domain(qobj, qobj->type, false);
 		ret = ttm_bo_validate(&qobj->tbo, &qobj->placement,
 				      true, false);
 		if (unlikely(ret))
@@ -402,7 +433,7 @@
 	return ret;
 }
 
-struct drm_ioctl_desc qxl_ioctls[] = {
+const struct drm_ioctl_desc qxl_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(QXL_ALLOC, qxl_alloc_ioctl, DRM_AUTH|DRM_UNLOCKED),
 
 	DRM_IOCTL_DEF_DRV(QXL_MAP, qxl_map_ioctl, DRM_AUTH|DRM_UNLOCKED),
diff --git a/drivers/gpu/drm/qxl/qxl_object.c b/drivers/gpu/drm/qxl/qxl_object.c
index 1191fe7..8691c76 100644
--- a/drivers/gpu/drm/qxl/qxl_object.c
+++ b/drivers/gpu/drm/qxl/qxl_object.c
@@ -51,20 +51,21 @@
 	return false;
 }
 
-void qxl_ttm_placement_from_domain(struct qxl_bo *qbo, u32 domain)
+void qxl_ttm_placement_from_domain(struct qxl_bo *qbo, u32 domain, bool pinned)
 {
 	u32 c = 0;
+	u32 pflag = pinned ? TTM_PL_FLAG_NO_EVICT : 0;
 
 	qbo->placement.fpfn = 0;
 	qbo->placement.lpfn = 0;
 	qbo->placement.placement = qbo->placements;
 	qbo->placement.busy_placement = qbo->placements;
 	if (domain == QXL_GEM_DOMAIN_VRAM)
-		qbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_VRAM;
+		qbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_VRAM | pflag;
 	if (domain == QXL_GEM_DOMAIN_SURFACE)
-		qbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_PRIV0;
+		qbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_PRIV0 | pflag;
 	if (domain == QXL_GEM_DOMAIN_CPU)
-		qbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
+		qbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM | pflag;
 	if (!c)
 		qbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
 	qbo->placement.num_placement = c;
@@ -73,7 +74,7 @@
 
 
 int qxl_bo_create(struct qxl_device *qdev,
-		  unsigned long size, bool kernel, u32 domain,
+		  unsigned long size, bool kernel, bool pinned, u32 domain,
 		  struct qxl_surface *surf,
 		  struct qxl_bo **bo_ptr)
 {
@@ -97,17 +98,16 @@
 		kfree(bo);
 		return r;
 	}
-	bo->gem_base.driver_private = NULL;
 	bo->type = domain;
-	bo->pin_count = 0;
+	bo->pin_count = pinned ? 1 : 0;
 	bo->surface_id = 0;
 	qxl_fence_init(qdev, &bo->fence);
 	INIT_LIST_HEAD(&bo->list);
-	atomic_set(&bo->reserve_count, 0);
+
 	if (surf)
 		bo->surf = *surf;
 
-	qxl_ttm_placement_from_domain(bo, domain);
+	qxl_ttm_placement_from_domain(bo, domain, pinned);
 
 	r = ttm_bo_init(&qdev->mman.bdev, &bo->tbo, size, type,
 			&bo->placement, 0, !kernel, NULL, size,
@@ -228,7 +228,7 @@
 int qxl_bo_pin(struct qxl_bo *bo, u32 domain, u64 *gpu_addr)
 {
 	struct qxl_device *qdev = (struct qxl_device *)bo->gem_base.dev->dev_private;
-	int r, i;
+	int r;
 
 	if (bo->pin_count) {
 		bo->pin_count++;
@@ -236,9 +236,7 @@
 			*gpu_addr = qxl_bo_gpu_offset(bo);
 		return 0;
 	}
-	qxl_ttm_placement_from_domain(bo, domain);
-	for (i = 0; i < bo->placement.num_placement; i++)
-		bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
+	qxl_ttm_placement_from_domain(bo, domain, true);
 	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
 	if (likely(r == 0)) {
 		bo->pin_count = 1;
@@ -317,53 +315,6 @@
 	return 0;
 }
 
-void qxl_bo_list_unreserve(struct qxl_reloc_list *reloc_list, bool failed)
-{
-	struct qxl_bo_list *entry, *sf;
-
-	list_for_each_entry_safe(entry, sf, &reloc_list->bos, lhead) {
-		qxl_bo_unreserve(entry->bo);
-		list_del(&entry->lhead);
-		kfree(entry);
-	}
-}
-
-int qxl_bo_list_add(struct qxl_reloc_list *reloc_list, struct qxl_bo *bo)
-{
-	struct qxl_bo_list *entry;
-	int ret;
-
-	list_for_each_entry(entry, &reloc_list->bos, lhead) {
-		if (entry->bo == bo)
-			return 0;
-	}
-
-	entry = kmalloc(sizeof(struct qxl_bo_list), GFP_KERNEL);
-	if (!entry)
-		return -ENOMEM;
-
-	entry->bo = bo;
-	list_add(&entry->lhead, &reloc_list->bos);
-
-	ret = qxl_bo_reserve(bo, false);
-	if (ret)
-		return ret;
-
-	if (!bo->pin_count) {
-		qxl_ttm_placement_from_domain(bo, bo->type);
-		ret = ttm_bo_validate(&bo->tbo, &bo->placement,
-				      true, false);
-		if (ret)
-			return ret;
-	}
-
-	/* allocate a surface for reserved + validated buffers */
-	ret = qxl_bo_check_id(bo->gem_base.dev->dev_private, bo);
-	if (ret)
-		return ret;
-	return 0;
-}
-
 int qxl_surf_evict(struct qxl_device *qdev)
 {
 	return ttm_bo_evict_mm(&qdev->mman.bdev, TTM_PL_PRIV0);
diff --git a/drivers/gpu/drm/qxl/qxl_object.h b/drivers/gpu/drm/qxl/qxl_object.h
index ee7ad79..d458a14 100644
--- a/drivers/gpu/drm/qxl/qxl_object.h
+++ b/drivers/gpu/drm/qxl/qxl_object.h
@@ -59,7 +59,7 @@
 
 static inline u64 qxl_bo_mmap_offset(struct qxl_bo *bo)
 {
-	return bo->tbo.addr_space_offset;
+	return drm_vma_node_offset_addr(&bo->tbo.vma_node);
 }
 
 static inline int qxl_bo_wait(struct qxl_bo *bo, u32 *mem_type,
@@ -88,7 +88,7 @@
 
 extern int qxl_bo_create(struct qxl_device *qdev,
 			 unsigned long size,
-			 bool kernel, u32 domain,
+			 bool kernel, bool pinned, u32 domain,
 			 struct qxl_surface *surf,
 			 struct qxl_bo **bo_ptr);
 extern int qxl_bo_kmap(struct qxl_bo *bo, void **ptr);
@@ -99,9 +99,7 @@
 extern void qxl_bo_unref(struct qxl_bo **bo);
 extern int qxl_bo_pin(struct qxl_bo *bo, u32 domain, u64 *gpu_addr);
 extern int qxl_bo_unpin(struct qxl_bo *bo);
-extern void qxl_ttm_placement_from_domain(struct qxl_bo *qbo, u32 domain);
+extern void qxl_ttm_placement_from_domain(struct qxl_bo *qbo, u32 domain, bool pinned);
 extern bool qxl_ttm_bo_is_qxl_bo(struct ttm_buffer_object *bo);
 
-extern int qxl_bo_list_add(struct qxl_reloc_list *reloc_list, struct qxl_bo *bo);
-extern void qxl_bo_list_unreserve(struct qxl_reloc_list *reloc_list, bool failed);
 #endif
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index b443d67..0109a96 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -38,7 +38,8 @@
 
 static const int release_size_per_bo[] = { RELEASE_SIZE, SURFACE_RELEASE_SIZE, RELEASE_SIZE };
 static const int releases_per_bo[] = { RELEASES_PER_BO, SURFACE_RELEASES_PER_BO, RELEASES_PER_BO };
-uint64_t
+
+static uint64_t
 qxl_release_alloc(struct qxl_device *qdev, int type,
 		  struct qxl_release **ret)
 {
@@ -53,9 +54,9 @@
 		return 0;
 	}
 	release->type = type;
-	release->bo_count = 0;
 	release->release_offset = 0;
 	release->surface_release_id = 0;
+	INIT_LIST_HEAD(&release->bos);
 
 	idr_preload(GFP_KERNEL);
 	spin_lock(&qdev->release_idr_lock);
@@ -77,20 +78,20 @@
 qxl_release_free(struct qxl_device *qdev,
 		 struct qxl_release *release)
 {
-	int i;
-
-	QXL_INFO(qdev, "release %d, type %d, %d bos\n", release->id,
-		 release->type, release->bo_count);
+	struct qxl_bo_list *entry, *tmp;
+	QXL_INFO(qdev, "release %d, type %d\n", release->id,
+		 release->type);
 
 	if (release->surface_release_id)
 		qxl_surface_id_dealloc(qdev, release->surface_release_id);
 
-	for (i = 0 ; i < release->bo_count; ++i) {
+	list_for_each_entry_safe(entry, tmp, &release->bos, tv.head) {
+		struct qxl_bo *bo = to_qxl_bo(entry->tv.bo);
 		QXL_INFO(qdev, "release %llx\n",
-			release->bos[i]->tbo.addr_space_offset
+			drm_vma_node_offset_addr(&entry->tv.bo->vma_node)
 						- DRM_FILE_OFFSET);
-		qxl_fence_remove_release(&release->bos[i]->fence, release->id);
-		qxl_bo_unref(&release->bos[i]);
+		qxl_fence_remove_release(&bo->fence, release->id);
+		qxl_bo_unref(&bo);
 	}
 	spin_lock(&qdev->release_idr_lock);
 	idr_remove(&qdev->release_idr, release->id);
@@ -98,83 +99,117 @@
 	kfree(release);
 }
 
-void
-qxl_release_add_res(struct qxl_device *qdev, struct qxl_release *release,
-		    struct qxl_bo *bo)
-{
-	int i;
-	for (i = 0; i < release->bo_count; i++)
-		if (release->bos[i] == bo)
-			return;
-
-	if (release->bo_count >= QXL_MAX_RES) {
-		DRM_ERROR("exceeded max resource on a qxl_release item\n");
-		return;
-	}
-	release->bos[release->bo_count++] = qxl_bo_ref(bo);
-}
-
 static int qxl_release_bo_alloc(struct qxl_device *qdev,
 				struct qxl_bo **bo)
 {
 	int ret;
-	ret = qxl_bo_create(qdev, PAGE_SIZE, false, QXL_GEM_DOMAIN_VRAM, NULL,
+	/* pin releases bo's they are too messy to evict */
+	ret = qxl_bo_create(qdev, PAGE_SIZE, false, true,
+			    QXL_GEM_DOMAIN_VRAM, NULL,
 			    bo);
 	return ret;
 }
 
-int qxl_release_reserve(struct qxl_device *qdev,
-			struct qxl_release *release, bool no_wait)
+int qxl_release_list_add(struct qxl_release *release, struct qxl_bo *bo)
+{
+	struct qxl_bo_list *entry;
+
+	list_for_each_entry(entry, &release->bos, tv.head) {
+		if (entry->tv.bo == &bo->tbo)
+			return 0;
+	}
+
+	entry = kmalloc(sizeof(struct qxl_bo_list), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	qxl_bo_ref(bo);
+	entry->tv.bo = &bo->tbo;
+	list_add_tail(&entry->tv.head, &release->bos);
+	return 0;
+}
+
+static int qxl_release_validate_bo(struct qxl_bo *bo)
 {
 	int ret;
-	if (atomic_inc_return(&release->bos[0]->reserve_count) == 1) {
-		ret = qxl_bo_reserve(release->bos[0], no_wait);
+
+	if (!bo->pin_count) {
+		qxl_ttm_placement_from_domain(bo, bo->type, false);
+		ret = ttm_bo_validate(&bo->tbo, &bo->placement,
+				      true, false);
 		if (ret)
 			return ret;
 	}
+
+	/* allocate a surface for reserved + validated buffers */
+	ret = qxl_bo_check_id(bo->gem_base.dev->dev_private, bo);
+	if (ret)
+		return ret;
+	return 0;
+}
+
+int qxl_release_reserve_list(struct qxl_release *release, bool no_intr)
+{
+	int ret;
+	struct qxl_bo_list *entry;
+
+	/* if only one object on the release its the release itself
+	   since these objects are pinned no need to reserve */
+	if (list_is_singular(&release->bos))
+		return 0;
+
+	ret = ttm_eu_reserve_buffers(&release->ticket, &release->bos);
+	if (ret)
+		return ret;
+
+	list_for_each_entry(entry, &release->bos, tv.head) {
+		struct qxl_bo *bo = to_qxl_bo(entry->tv.bo);
+
+		ret = qxl_release_validate_bo(bo);
+		if (ret) {
+			ttm_eu_backoff_reservation(&release->ticket, &release->bos);
+			return ret;
+		}
+	}
 	return 0;
 }
 
-void qxl_release_unreserve(struct qxl_device *qdev,
-			  struct qxl_release *release)
+void qxl_release_backoff_reserve_list(struct qxl_release *release)
 {
-	if (atomic_dec_and_test(&release->bos[0]->reserve_count))
-		qxl_bo_unreserve(release->bos[0]);
+	/* if only one object on the release its the release itself
+	   since these objects are pinned no need to reserve */
+	if (list_is_singular(&release->bos))
+		return;
+
+	ttm_eu_backoff_reservation(&release->ticket, &release->bos);
 }
 
+
 int qxl_alloc_surface_release_reserved(struct qxl_device *qdev,
 				       enum qxl_surface_cmd_type surface_cmd_type,
 				       struct qxl_release *create_rel,
 				       struct qxl_release **release)
 {
-	int ret;
-
 	if (surface_cmd_type == QXL_SURFACE_CMD_DESTROY && create_rel) {
 		int idr_ret;
+		struct qxl_bo_list *entry = list_first_entry(&create_rel->bos, struct qxl_bo_list, tv.head);
 		struct qxl_bo *bo;
 		union qxl_release_info *info;
 
 		/* stash the release after the create command */
 		idr_ret = qxl_release_alloc(qdev, QXL_RELEASE_SURFACE_CMD, release);
-		bo = qxl_bo_ref(create_rel->bos[0]);
+		bo = qxl_bo_ref(to_qxl_bo(entry->tv.bo));
 
 		(*release)->release_offset = create_rel->release_offset + 64;
 
-		qxl_release_add_res(qdev, *release, bo);
+		qxl_release_list_add(*release, bo);
 
-		ret = qxl_release_reserve(qdev, *release, false);
-		if (ret) {
-			DRM_ERROR("release reserve failed\n");
-			goto out_unref;
-		}
 		info = qxl_release_map(qdev, *release);
 		info->id = idr_ret;
 		qxl_release_unmap(qdev, *release, info);
 
-
-out_unref:
 		qxl_bo_unref(&bo);
-		return ret;
+		return 0;
 	}
 
 	return qxl_alloc_release_reserved(qdev, sizeof(struct qxl_surface_cmd),
@@ -187,7 +222,7 @@
 {
 	struct qxl_bo *bo;
 	int idr_ret;
-	int ret;
+	int ret = 0;
 	union qxl_release_info *info;
 	int cur_idx;
 
@@ -216,11 +251,6 @@
 			mutex_unlock(&qdev->release_mutex);
 			return ret;
 		}
-
-		/* pin releases bo's they are too messy to evict */
-		ret = qxl_bo_reserve(qdev->current_release_bo[cur_idx], false);
-		qxl_bo_pin(qdev->current_release_bo[cur_idx], QXL_GEM_DOMAIN_VRAM, NULL);
-		qxl_bo_unreserve(qdev->current_release_bo[cur_idx]);
 	}
 
 	bo = qxl_bo_ref(qdev->current_release_bo[cur_idx]);
@@ -231,36 +261,18 @@
 	if (rbo)
 		*rbo = bo;
 
-	qxl_release_add_res(qdev, *release, bo);
-
-	ret = qxl_release_reserve(qdev, *release, false);
 	mutex_unlock(&qdev->release_mutex);
-	if (ret)
-		goto out_unref;
+
+	qxl_release_list_add(*release, bo);
 
 	info = qxl_release_map(qdev, *release);
 	info->id = idr_ret;
 	qxl_release_unmap(qdev, *release, info);
 
-out_unref:
 	qxl_bo_unref(&bo);
 	return ret;
 }
 
-int qxl_fence_releaseable(struct qxl_device *qdev,
-			  struct qxl_release *release)
-{
-	int i, ret;
-	for (i = 0; i < release->bo_count; i++) {
-		if (!release->bos[i]->tbo.sync_obj)
-			release->bos[i]->tbo.sync_obj = &release->bos[i]->fence;
-		ret = qxl_fence_add_release(&release->bos[i]->fence, release->id);
-		if (ret)
-			return ret;
-	}
-	return 0;
-}
-
 struct qxl_release *qxl_release_from_id_locked(struct qxl_device *qdev,
 						   uint64_t id)
 {
@@ -273,10 +285,7 @@
 		DRM_ERROR("failed to find id in release_idr\n");
 		return NULL;
 	}
-	if (release->bo_count < 1) {
-		DRM_ERROR("read a released resource with 0 bos\n");
-		return NULL;
-	}
+
 	return release;
 }
 
@@ -285,9 +294,12 @@
 {
 	void *ptr;
 	union qxl_release_info *info;
-	struct qxl_bo *bo = release->bos[0];
+	struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head);
+	struct qxl_bo *bo = to_qxl_bo(entry->tv.bo);
 
 	ptr = qxl_bo_kmap_atomic_page(qdev, bo, release->release_offset & PAGE_SIZE);
+	if (!ptr)
+		return NULL;
 	info = ptr + (release->release_offset & ~PAGE_SIZE);
 	return info;
 }
@@ -296,9 +308,51 @@
 		       struct qxl_release *release,
 		       union qxl_release_info *info)
 {
-	struct qxl_bo *bo = release->bos[0];
+	struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head);
+	struct qxl_bo *bo = to_qxl_bo(entry->tv.bo);
 	void *ptr;
 
 	ptr = ((void *)info) - (release->release_offset & ~PAGE_SIZE);
 	qxl_bo_kunmap_atomic_page(qdev, bo, ptr);
 }
+
+void qxl_release_fence_buffer_objects(struct qxl_release *release)
+{
+	struct ttm_validate_buffer *entry;
+	struct ttm_buffer_object *bo;
+	struct ttm_bo_global *glob;
+	struct ttm_bo_device *bdev;
+	struct ttm_bo_driver *driver;
+	struct qxl_bo *qbo;
+
+	/* if only one object on the release its the release itself
+	   since these objects are pinned no need to reserve */
+	if (list_is_singular(&release->bos))
+		return;
+
+	bo = list_first_entry(&release->bos, struct ttm_validate_buffer, head)->bo;
+	bdev = bo->bdev;
+	driver = bdev->driver;
+	glob = bo->glob;
+
+	spin_lock(&glob->lru_lock);
+	spin_lock(&bdev->fence_lock);
+
+	list_for_each_entry(entry, &release->bos, head) {
+		bo = entry->bo;
+		qbo = to_qxl_bo(bo);
+
+		if (!entry->bo->sync_obj)
+			entry->bo->sync_obj = &qbo->fence;
+
+		qxl_fence_add_release_locked(&qbo->fence, release->id);
+
+		ttm_bo_add_to_lru(bo);
+		ww_mutex_unlock(&bo->resv->lock);
+		entry->reserved = false;
+	}
+	spin_unlock(&bdev->fence_lock);
+	spin_unlock(&glob->lru_lock);
+	ww_acquire_fini(&release->ticket);
+}
+
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index 489cb8c..037786d 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -206,13 +206,15 @@
 		return;
 	}
 	qbo = container_of(bo, struct qxl_bo, tbo);
-	qxl_ttm_placement_from_domain(qbo, QXL_GEM_DOMAIN_CPU);
+	qxl_ttm_placement_from_domain(qbo, QXL_GEM_DOMAIN_CPU, false);
 	*placement = qbo->placement;
 }
 
 static int qxl_verify_access(struct ttm_buffer_object *bo, struct file *filp)
 {
-	return 0;
+	struct qxl_bo *qbo = to_qxl_bo(bo);
+
+	return drm_vma_node_verify_access(&qbo->gem_base.vma_node, filp);
 }
 
 static int qxl_ttm_io_mem_reserve(struct ttm_bo_device *bdev,
diff --git a/drivers/gpu/drm/r128/r128_cce.c b/drivers/gpu/drm/r128/r128_cce.c
index d4660cf..c451257 100644
--- a/drivers/gpu/drm/r128/r128_cce.c
+++ b/drivers/gpu/drm/r128/r128_cce.c
@@ -540,7 +540,7 @@
 	dev_priv->ring.end = ((u32 *) dev_priv->cce_ring->handle
 			      + init->ring_size / sizeof(u32));
 	dev_priv->ring.size = init->ring_size;
-	dev_priv->ring.size_l2qw = drm_order(init->ring_size / 8);
+	dev_priv->ring.size_l2qw = order_base_2(init->ring_size / 8);
 
 	dev_priv->ring.tail_mask = (dev_priv->ring.size / sizeof(u32)) - 1;
 
diff --git a/drivers/gpu/drm/r128/r128_drv.c b/drivers/gpu/drm/r128/r128_drv.c
index 472c38f..5bd307c 100644
--- a/drivers/gpu/drm/r128/r128_drv.c
+++ b/drivers/gpu/drm/r128/r128_drv.c
@@ -48,7 +48,6 @@
 	.unlocked_ioctl = drm_ioctl,
 	.mmap = drm_mmap,
 	.poll = drm_poll,
-	.fasync = drm_fasync,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = r128_compat_ioctl,
 #endif
@@ -57,7 +56,7 @@
 
 static struct drm_driver driver = {
 	.driver_features =
-	    DRIVER_USE_AGP | DRIVER_USE_MTRR | DRIVER_PCI_DMA | DRIVER_SG |
+	    DRIVER_USE_AGP | DRIVER_PCI_DMA | DRIVER_SG |
 	    DRIVER_HAVE_DMA | DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED,
 	.dev_priv_size = sizeof(drm_r128_buf_priv_t),
 	.load = r128_driver_load,
diff --git a/drivers/gpu/drm/r128/r128_drv.h b/drivers/gpu/drm/r128/r128_drv.h
index 930c71b..56eb5e3 100644
--- a/drivers/gpu/drm/r128/r128_drv.h
+++ b/drivers/gpu/drm/r128/r128_drv.h
@@ -131,7 +131,7 @@
 	drm_r128_freelist_t *list_entry;
 } drm_r128_buf_priv_t;
 
-extern struct drm_ioctl_desc r128_ioctls[];
+extern const struct drm_ioctl_desc r128_ioctls[];
 extern int r128_max_ioctl;
 
 				/* r128_cce.c */
diff --git a/drivers/gpu/drm/r128/r128_state.c b/drivers/gpu/drm/r128/r128_state.c
index 19bb7e6..01dd9ae 100644
--- a/drivers/gpu/drm/r128/r128_state.c
+++ b/drivers/gpu/drm/r128/r128_state.c
@@ -1643,7 +1643,7 @@
 	r128_do_cleanup_cce(dev);
 }
 
-struct drm_ioctl_desc r128_ioctls[] = {
+const struct drm_ioctl_desc r128_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(R128_INIT, r128_cce_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(R128_CCE_START, r128_cce_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(R128_CCE_STOP, r128_cce_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index c3df52c..306364a 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -72,14 +72,32 @@
 	radeon_cs.o radeon_bios.o radeon_benchmark.o r100.o r300.o r420.o \
 	rs400.o rs600.o rs690.o rv515.o r520.o r600.o rv770.o radeon_test.o \
 	r200.o radeon_legacy_tv.o r600_cs.o r600_blit_shaders.o \
-	r600_blit_kms.o radeon_pm.o atombios_dp.o r600_audio.o r600_hdmi.o \
-	evergreen.o evergreen_cs.o evergreen_blit_shaders.o evergreen_blit_kms.o \
+	radeon_pm.o atombios_dp.o r600_audio.o r600_hdmi.o \
+	evergreen.o evergreen_cs.o evergreen_blit_shaders.o \
 	evergreen_hdmi.o radeon_trace_points.o ni.o cayman_blit_shaders.o \
 	atombios_encoders.o radeon_semaphore.o radeon_sa.o atombios_i2c.o si.o \
 	si_blit_shaders.o radeon_prime.o radeon_uvd.o cik.o cik_blit_shaders.o \
 	r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \
 	rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \
-	trinity_smc.o ni_dpm.o si_smc.o si_dpm.o
+	trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \
+	ci_dpm.o dce6_afmt.o
+
+# add async DMA block
+radeon-y += \
+	r600_dma.o \
+	rv770_dma.o \
+	evergreen_dma.o \
+	ni_dma.o \
+	si_dma.o \
+	cik_sdma.o \
+
+# add UVD block
+radeon-y += \
+	radeon_uvd.o \
+	uvd_v1_0.o \
+	uvd_v2_2.o \
+	uvd_v3_1.o \
+	uvd_v4_2.o
 
 radeon-$(CONFIG_COMPAT) += radeon_ioc32.o
 radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o
diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c
index fb441a7..15da7ef 100644
--- a/drivers/gpu/drm/radeon/atom.c
+++ b/drivers/gpu/drm/radeon/atom.c
@@ -1222,12 +1222,17 @@
 	int r;
 
 	mutex_lock(&ctx->mutex);
+	/* reset data block */
+	ctx->data_block = 0;
 	/* reset reg block */
 	ctx->reg_block = 0;
 	/* reset fb window */
 	ctx->fb_base = 0;
 	/* reset io mode */
 	ctx->io_mode = ATOM_IO_MM;
+	/* reset divmul */
+	ctx->divmul[0] = 0;
+	ctx->divmul[1] = 0;
 	r = atom_execute_table_locked(ctx, index, params);
 	mutex_unlock(&ctx->mutex);
 	return r;
diff --git a/drivers/gpu/drm/radeon/atombios.h b/drivers/gpu/drm/radeon/atombios.h
index 16b120c..af10f85 100644
--- a/drivers/gpu/drm/radeon/atombios.h
+++ b/drivers/gpu/drm/radeon/atombios.h
@@ -7661,618 +7661,6 @@
   ATOM_POWERMODE_INFO_V3 asPowerPlayInfo[ATOM_MAX_NUMBEROF_POWER_BLOCK];
 }ATOM_POWERPLAY_INFO_V3;
 
-/* New PPlib */
-/**************************************************************************/
-typedef struct _ATOM_PPLIB_THERMALCONTROLLER
-
-{
-    UCHAR ucType;           // one of ATOM_PP_THERMALCONTROLLER_*
-    UCHAR ucI2cLine;        // as interpreted by DAL I2C
-    UCHAR ucI2cAddress;
-    UCHAR ucFanParameters;  // Fan Control Parameters.
-    UCHAR ucFanMinRPM;      // Fan Minimum RPM (hundreds) -- for display purposes only.
-    UCHAR ucFanMaxRPM;      // Fan Maximum RPM (hundreds) -- for display purposes only.
-    UCHAR ucReserved;       // ----
-    UCHAR ucFlags;          // to be defined
-} ATOM_PPLIB_THERMALCONTROLLER;
-
-#define ATOM_PP_FANPARAMETERS_TACHOMETER_PULSES_PER_REVOLUTION_MASK 0x0f
-#define ATOM_PP_FANPARAMETERS_NOFAN                                 0x80    // No fan is connected to this controller.
-
-#define ATOM_PP_THERMALCONTROLLER_NONE      0
-#define ATOM_PP_THERMALCONTROLLER_LM63      1  // Not used by PPLib
-#define ATOM_PP_THERMALCONTROLLER_ADM1032   2  // Not used by PPLib
-#define ATOM_PP_THERMALCONTROLLER_ADM1030   3  // Not used by PPLib
-#define ATOM_PP_THERMALCONTROLLER_MUA6649   4  // Not used by PPLib
-#define ATOM_PP_THERMALCONTROLLER_LM64      5
-#define ATOM_PP_THERMALCONTROLLER_F75375    6  // Not used by PPLib
-#define ATOM_PP_THERMALCONTROLLER_RV6xx     7
-#define ATOM_PP_THERMALCONTROLLER_RV770     8
-#define ATOM_PP_THERMALCONTROLLER_ADT7473   9
-#define ATOM_PP_THERMALCONTROLLER_EXTERNAL_GPIO     11
-#define ATOM_PP_THERMALCONTROLLER_EVERGREEN 12
-#define ATOM_PP_THERMALCONTROLLER_EMC2103   13  /* 0x0D */ // Only fan control will be implemented, do NOT show this in PPGen.
-#define ATOM_PP_THERMALCONTROLLER_SUMO      14  /* 0x0E */ // Sumo type, used internally
-#define ATOM_PP_THERMALCONTROLLER_NISLANDS  15
-#define ATOM_PP_THERMALCONTROLLER_SISLANDS  16
-#define ATOM_PP_THERMALCONTROLLER_LM96163   17
-#define ATOM_PP_THERMALCONTROLLER_CISLANDS  18
-
-// Thermal controller 'combo type' to use an external controller for Fan control and an internal controller for thermal.
-// We probably should reserve the bit 0x80 for this use.
-// To keep the number of these types low we should also use the same code for all ASICs (i.e. do not distinguish RV6xx and RV7xx Internal here).
-// The driver can pick the correct internal controller based on the ASIC.
-
-#define ATOM_PP_THERMALCONTROLLER_ADT7473_WITH_INTERNAL   0x89    // ADT7473 Fan Control + Internal Thermal Controller
-#define ATOM_PP_THERMALCONTROLLER_EMC2103_WITH_INTERNAL   0x8D    // EMC2103 Fan Control + Internal Thermal Controller
-
-typedef struct _ATOM_PPLIB_STATE
-{
-    UCHAR ucNonClockStateIndex;
-    UCHAR ucClockStateIndices[1]; // variable-sized
-} ATOM_PPLIB_STATE;
-
-
-typedef struct _ATOM_PPLIB_FANTABLE
-{
-    UCHAR   ucFanTableFormat;                // Change this if the table format changes or version changes so that the other fields are not the same.
-    UCHAR   ucTHyst;                         // Temperature hysteresis. Integer.
-    USHORT  usTMin;                          // The temperature, in 0.01 centigrades, below which we just run at a minimal PWM.
-    USHORT  usTMed;                          // The middle temperature where we change slopes.
-    USHORT  usTHigh;                         // The high point above TMed for adjusting the second slope.
-    USHORT  usPWMMin;                        // The minimum PWM value in percent (0.01% increments).
-    USHORT  usPWMMed;                        // The PWM value (in percent) at TMed.
-    USHORT  usPWMHigh;                       // The PWM value at THigh.
-} ATOM_PPLIB_FANTABLE;
-
-typedef struct _ATOM_PPLIB_FANTABLE2
-{
-    ATOM_PPLIB_FANTABLE basicTable;
-    USHORT  usTMax;                          // The max temperature
-} ATOM_PPLIB_FANTABLE2;
-
-typedef struct _ATOM_PPLIB_EXTENDEDHEADER
-{
-    USHORT  usSize;
-    ULONG   ulMaxEngineClock;   // For Overdrive.
-    ULONG   ulMaxMemoryClock;   // For Overdrive.
-    // Add extra system parameters here, always adjust size to include all fields.
-    USHORT  usVCETableOffset; //points to ATOM_PPLIB_VCE_Table
-    USHORT  usUVDTableOffset;   //points to ATOM_PPLIB_UVD_Table
-    USHORT  usSAMUTableOffset;  //points to ATOM_PPLIB_SAMU_Table
-    USHORT  usPPMTableOffset;   //points to ATOM_PPLIB_PPM_Table
-} ATOM_PPLIB_EXTENDEDHEADER;
-
-//// ATOM_PPLIB_POWERPLAYTABLE::ulPlatformCaps
-#define ATOM_PP_PLATFORM_CAP_BACKBIAS 1
-#define ATOM_PP_PLATFORM_CAP_POWERPLAY 2
-#define ATOM_PP_PLATFORM_CAP_SBIOSPOWERSOURCE 4
-#define ATOM_PP_PLATFORM_CAP_ASPM_L0s 8
-#define ATOM_PP_PLATFORM_CAP_ASPM_L1 16
-#define ATOM_PP_PLATFORM_CAP_HARDWAREDC 32
-#define ATOM_PP_PLATFORM_CAP_GEMINIPRIMARY 64
-#define ATOM_PP_PLATFORM_CAP_STEPVDDC 128
-#define ATOM_PP_PLATFORM_CAP_VOLTAGECONTROL 256
-#define ATOM_PP_PLATFORM_CAP_SIDEPORTCONTROL 512
-#define ATOM_PP_PLATFORM_CAP_TURNOFFPLL_ASPML1 1024
-#define ATOM_PP_PLATFORM_CAP_HTLINKCONTROL 2048
-#define ATOM_PP_PLATFORM_CAP_MVDDCONTROL 4096
-#define ATOM_PP_PLATFORM_CAP_GOTO_BOOT_ON_ALERT 0x2000              // Go to boot state on alerts, e.g. on an AC->DC transition.
-#define ATOM_PP_PLATFORM_CAP_DONT_WAIT_FOR_VBLANK_ON_ALERT 0x4000   // Do NOT wait for VBLANK during an alert (e.g. AC->DC transition).
-#define ATOM_PP_PLATFORM_CAP_VDDCI_CONTROL 0x8000                   // Does the driver control VDDCI independently from VDDC.
-#define ATOM_PP_PLATFORM_CAP_REGULATOR_HOT 0x00010000               // Enable the 'regulator hot' feature.
-#define ATOM_PP_PLATFORM_CAP_BACO          0x00020000               // Does the driver supports BACO state.
-#define ATOM_PP_PLATFORM_CAP_NEW_CAC_VOLTAGE   0x00040000           // Does the driver supports new CAC voltage table.
-#define ATOM_PP_PLATFORM_CAP_REVERT_GPIO5_POLARITY   0x00080000     // Does the driver supports revert GPIO5 polarity.
-#define ATOM_PP_PLATFORM_CAP_OUTPUT_THERMAL2GPIO17   0x00100000     // Does the driver supports thermal2GPIO17.
-#define ATOM_PP_PLATFORM_CAP_VRHOT_GPIO_CONFIGURABLE   0x00200000   // Does the driver supports VR HOT GPIO Configurable.
-
-typedef struct _ATOM_PPLIB_POWERPLAYTABLE
-{
-      ATOM_COMMON_TABLE_HEADER sHeader;
-
-      UCHAR ucDataRevision;
-
-      UCHAR ucNumStates;
-      UCHAR ucStateEntrySize;
-      UCHAR ucClockInfoSize;
-      UCHAR ucNonClockSize;
-
-      // offset from start of this table to array of ucNumStates ATOM_PPLIB_STATE structures
-      USHORT usStateArrayOffset;
-
-      // offset from start of this table to array of ASIC-specific structures,
-      // currently ATOM_PPLIB_CLOCK_INFO.
-      USHORT usClockInfoArrayOffset;
-
-      // offset from start of this table to array of ATOM_PPLIB_NONCLOCK_INFO
-      USHORT usNonClockInfoArrayOffset;
-
-      USHORT usBackbiasTime;    // in microseconds
-      USHORT usVoltageTime;     // in microseconds
-      USHORT usTableSize;       //the size of this structure, or the extended structure
-
-      ULONG ulPlatformCaps;            // See ATOM_PPLIB_CAPS_*
-
-      ATOM_PPLIB_THERMALCONTROLLER    sThermalController;
-
-      USHORT usBootClockInfoOffset;
-      USHORT usBootNonClockInfoOffset;
-
-} ATOM_PPLIB_POWERPLAYTABLE;
-
-typedef struct _ATOM_PPLIB_POWERPLAYTABLE2
-{
-    ATOM_PPLIB_POWERPLAYTABLE basicTable;
-    UCHAR   ucNumCustomThermalPolicy;
-    USHORT  usCustomThermalPolicyArrayOffset;
-}ATOM_PPLIB_POWERPLAYTABLE2, *LPATOM_PPLIB_POWERPLAYTABLE2;
-
-typedef struct _ATOM_PPLIB_POWERPLAYTABLE3
-{
-    ATOM_PPLIB_POWERPLAYTABLE2 basicTable2;
-    USHORT                     usFormatID;                      // To be used ONLY by PPGen.
-    USHORT                     usFanTableOffset;
-    USHORT                     usExtendendedHeaderOffset;
-} ATOM_PPLIB_POWERPLAYTABLE3, *LPATOM_PPLIB_POWERPLAYTABLE3;
-
-typedef struct _ATOM_PPLIB_POWERPLAYTABLE4
-{
-    ATOM_PPLIB_POWERPLAYTABLE3 basicTable3;
-    ULONG                      ulGoldenPPID;                    // PPGen use only     
-    ULONG                      ulGoldenRevision;                // PPGen use only
-    USHORT                     usVddcDependencyOnSCLKOffset;
-    USHORT                     usVddciDependencyOnMCLKOffset;
-    USHORT                     usVddcDependencyOnMCLKOffset;
-    USHORT                     usMaxClockVoltageOnDCOffset;
-    USHORT                     usVddcPhaseShedLimitsTableOffset;    // Points to ATOM_PPLIB_PhaseSheddingLimits_Table
-    USHORT                     usMvddDependencyOnMCLKOffset;  
-} ATOM_PPLIB_POWERPLAYTABLE4, *LPATOM_PPLIB_POWERPLAYTABLE4;
-
-typedef struct _ATOM_PPLIB_POWERPLAYTABLE5
-{
-    ATOM_PPLIB_POWERPLAYTABLE4 basicTable4;
-    ULONG                      ulTDPLimit;
-    ULONG                      ulNearTDPLimit;
-    ULONG                      ulSQRampingThreshold;
-    USHORT                     usCACLeakageTableOffset;         // Points to ATOM_PPLIB_CAC_Leakage_Table
-    ULONG                      ulCACLeakage;                    // The iLeakage for driver calculated CAC leakage table
-    USHORT                     usTDPODLimit;
-    USHORT                     usLoadLineSlope;                 // in milliOhms * 100
-} ATOM_PPLIB_POWERPLAYTABLE5, *LPATOM_PPLIB_POWERPLAYTABLE5;
-
-//// ATOM_PPLIB_NONCLOCK_INFO::usClassification
-#define ATOM_PPLIB_CLASSIFICATION_UI_MASK          0x0007
-#define ATOM_PPLIB_CLASSIFICATION_UI_SHIFT         0
-#define ATOM_PPLIB_CLASSIFICATION_UI_NONE          0
-#define ATOM_PPLIB_CLASSIFICATION_UI_BATTERY       1
-#define ATOM_PPLIB_CLASSIFICATION_UI_BALANCED      3
-#define ATOM_PPLIB_CLASSIFICATION_UI_PERFORMANCE   5
-// 2, 4, 6, 7 are reserved
-
-#define ATOM_PPLIB_CLASSIFICATION_BOOT                   0x0008
-#define ATOM_PPLIB_CLASSIFICATION_THERMAL                0x0010
-#define ATOM_PPLIB_CLASSIFICATION_LIMITEDPOWERSOURCE     0x0020
-#define ATOM_PPLIB_CLASSIFICATION_REST                   0x0040
-#define ATOM_PPLIB_CLASSIFICATION_FORCED                 0x0080
-#define ATOM_PPLIB_CLASSIFICATION_3DPERFORMANCE          0x0100
-#define ATOM_PPLIB_CLASSIFICATION_OVERDRIVETEMPLATE      0x0200
-#define ATOM_PPLIB_CLASSIFICATION_UVDSTATE               0x0400
-#define ATOM_PPLIB_CLASSIFICATION_3DLOW                  0x0800
-#define ATOM_PPLIB_CLASSIFICATION_ACPI                   0x1000
-#define ATOM_PPLIB_CLASSIFICATION_HD2STATE               0x2000
-#define ATOM_PPLIB_CLASSIFICATION_HDSTATE                0x4000
-#define ATOM_PPLIB_CLASSIFICATION_SDSTATE                0x8000
-
-//// ATOM_PPLIB_NONCLOCK_INFO::usClassification2
-#define ATOM_PPLIB_CLASSIFICATION2_LIMITEDPOWERSOURCE_2     0x0001
-#define ATOM_PPLIB_CLASSIFICATION2_ULV                      0x0002
-#define ATOM_PPLIB_CLASSIFICATION2_MVC                      0x0004   //Multi-View Codec (BD-3D)
-
-//// ATOM_PPLIB_NONCLOCK_INFO::ulCapsAndSettings
-#define ATOM_PPLIB_SINGLE_DISPLAY_ONLY           0x00000001
-#define ATOM_PPLIB_SUPPORTS_VIDEO_PLAYBACK         0x00000002
-
-// 0 is 2.5Gb/s, 1 is 5Gb/s
-#define ATOM_PPLIB_PCIE_LINK_SPEED_MASK            0x00000004
-#define ATOM_PPLIB_PCIE_LINK_SPEED_SHIFT           2
-
-// lanes - 1: 1, 2, 4, 8, 12, 16 permitted by PCIE spec
-#define ATOM_PPLIB_PCIE_LINK_WIDTH_MASK            0x000000F8
-#define ATOM_PPLIB_PCIE_LINK_WIDTH_SHIFT           3
-
-// lookup into reduced refresh-rate table
-#define ATOM_PPLIB_LIMITED_REFRESHRATE_VALUE_MASK  0x00000F00
-#define ATOM_PPLIB_LIMITED_REFRESHRATE_VALUE_SHIFT 8
-
-#define ATOM_PPLIB_LIMITED_REFRESHRATE_UNLIMITED    0
-#define ATOM_PPLIB_LIMITED_REFRESHRATE_50HZ         1
-// 2-15 TBD as needed.
-
-#define ATOM_PPLIB_SOFTWARE_DISABLE_LOADBALANCING        0x00001000
-#define ATOM_PPLIB_SOFTWARE_ENABLE_SLEEP_FOR_TIMESTAMPS  0x00002000
-
-#define ATOM_PPLIB_DISALLOW_ON_DC                       0x00004000
-
-#define ATOM_PPLIB_ENABLE_VARIBRIGHT                     0x00008000
-
-//memory related flags
-#define ATOM_PPLIB_SWSTATE_MEMORY_DLL_OFF               0x000010000
-
-//M3 Arb    //2bits, current 3 sets of parameters in total
-#define ATOM_PPLIB_M3ARB_MASK                       0x00060000
-#define ATOM_PPLIB_M3ARB_SHIFT                      17
-
-#define ATOM_PPLIB_ENABLE_DRR                       0x00080000
-
-// remaining 16 bits are reserved
-typedef struct _ATOM_PPLIB_THERMAL_STATE
-{
-    UCHAR   ucMinTemperature;
-    UCHAR   ucMaxTemperature;
-    UCHAR   ucThermalAction;
-}ATOM_PPLIB_THERMAL_STATE, *LPATOM_PPLIB_THERMAL_STATE;
-
-// Contained in an array starting at the offset
-// in ATOM_PPLIB_POWERPLAYTABLE::usNonClockInfoArrayOffset.
-// referenced from ATOM_PPLIB_STATE_INFO::ucNonClockStateIndex
-#define ATOM_PPLIB_NONCLOCKINFO_VER1      12
-#define ATOM_PPLIB_NONCLOCKINFO_VER2      24
-typedef struct _ATOM_PPLIB_NONCLOCK_INFO
-{
-      USHORT usClassification;
-      UCHAR  ucMinTemperature;
-      UCHAR  ucMaxTemperature;
-      ULONG  ulCapsAndSettings;
-      UCHAR  ucRequiredPower;
-      USHORT usClassification2;
-      ULONG  ulVCLK;
-      ULONG  ulDCLK;
-      UCHAR  ucUnused[5];
-} ATOM_PPLIB_NONCLOCK_INFO;
-
-// Contained in an array starting at the offset
-// in ATOM_PPLIB_POWERPLAYTABLE::usClockInfoArrayOffset.
-// referenced from ATOM_PPLIB_STATE::ucClockStateIndices
-typedef struct _ATOM_PPLIB_R600_CLOCK_INFO
-{
-      USHORT usEngineClockLow;
-      UCHAR ucEngineClockHigh;
-
-      USHORT usMemoryClockLow;
-      UCHAR ucMemoryClockHigh;
-
-      USHORT usVDDC;
-      USHORT usUnused1;
-      USHORT usUnused2;
-
-      ULONG ulFlags; // ATOM_PPLIB_R600_FLAGS_*
-
-} ATOM_PPLIB_R600_CLOCK_INFO;
-
-// ulFlags in ATOM_PPLIB_R600_CLOCK_INFO
-#define ATOM_PPLIB_R600_FLAGS_PCIEGEN2          1
-#define ATOM_PPLIB_R600_FLAGS_UVDSAFE           2
-#define ATOM_PPLIB_R600_FLAGS_BACKBIASENABLE    4
-#define ATOM_PPLIB_R600_FLAGS_MEMORY_ODT_OFF    8
-#define ATOM_PPLIB_R600_FLAGS_MEMORY_DLL_OFF   16
-#define ATOM_PPLIB_R600_FLAGS_LOWPOWER         32   // On the RV770 use 'low power' setting (sequencer S0).
-
-typedef struct _ATOM_PPLIB_EVERGREEN_CLOCK_INFO
-{
-      USHORT usEngineClockLow;
-      UCHAR  ucEngineClockHigh;
-
-      USHORT usMemoryClockLow;
-      UCHAR  ucMemoryClockHigh;
-
-      USHORT usVDDC;
-      USHORT usVDDCI;
-      USHORT usUnused;
-
-      ULONG ulFlags; // ATOM_PPLIB_R600_FLAGS_*
-
-} ATOM_PPLIB_EVERGREEN_CLOCK_INFO;
-
-typedef struct _ATOM_PPLIB_SI_CLOCK_INFO
-{
-      USHORT usEngineClockLow;
-      UCHAR  ucEngineClockHigh;
-
-      USHORT usMemoryClockLow;
-      UCHAR  ucMemoryClockHigh;
-
-      USHORT usVDDC;
-      USHORT usVDDCI;
-      UCHAR  ucPCIEGen;
-      UCHAR  ucUnused1;
-
-      ULONG ulFlags; // ATOM_PPLIB_SI_FLAGS_*, no flag is necessary for now
-
-} ATOM_PPLIB_SI_CLOCK_INFO;
-
-typedef struct _ATOM_PPLIB_CI_CLOCK_INFO
-{
-      USHORT usEngineClockLow;
-      UCHAR  ucEngineClockHigh;
-
-      USHORT usMemoryClockLow;
-      UCHAR  ucMemoryClockHigh;
-      
-      UCHAR  ucPCIEGen;
-      USHORT usPCIELane;
-} ATOM_PPLIB_CI_CLOCK_INFO;
-
-typedef struct _ATOM_PPLIB_RS780_CLOCK_INFO
-
-{
-      USHORT usLowEngineClockLow;         // Low Engine clock in MHz (the same way as on the R600).
-      UCHAR  ucLowEngineClockHigh;
-      USHORT usHighEngineClockLow;        // High Engine clock in MHz.
-      UCHAR  ucHighEngineClockHigh;
-      USHORT usMemoryClockLow;            // For now one of the ATOM_PPLIB_RS780_SPMCLK_XXXX constants.
-      UCHAR  ucMemoryClockHigh;           // Currentyl unused.
-      UCHAR  ucPadding;                   // For proper alignment and size.
-      USHORT usVDDC;                      // For the 780, use: None, Low, High, Variable
-      UCHAR  ucMaxHTLinkWidth;            // From SBIOS - {2, 4, 8, 16}
-      UCHAR  ucMinHTLinkWidth;            // From SBIOS - {2, 4, 8, 16}. Effective only if CDLW enabled. Minimum down stream width could be bigger as display BW requriement.
-      USHORT usHTLinkFreq;                // See definition ATOM_PPLIB_RS780_HTLINKFREQ_xxx or in MHz(>=200).
-      ULONG  ulFlags; 
-} ATOM_PPLIB_RS780_CLOCK_INFO;
-
-#define ATOM_PPLIB_RS780_VOLTAGE_NONE       0 
-#define ATOM_PPLIB_RS780_VOLTAGE_LOW        1 
-#define ATOM_PPLIB_RS780_VOLTAGE_HIGH       2 
-#define ATOM_PPLIB_RS780_VOLTAGE_VARIABLE   3 
-
-#define ATOM_PPLIB_RS780_SPMCLK_NONE        0   // We cannot change the side port memory clock, leave it as it is.
-#define ATOM_PPLIB_RS780_SPMCLK_LOW         1
-#define ATOM_PPLIB_RS780_SPMCLK_HIGH        2
-
-#define ATOM_PPLIB_RS780_HTLINKFREQ_NONE       0 
-#define ATOM_PPLIB_RS780_HTLINKFREQ_LOW        1 
-#define ATOM_PPLIB_RS780_HTLINKFREQ_HIGH       2 
-
-typedef struct _ATOM_PPLIB_SUMO_CLOCK_INFO{
-      USHORT usEngineClockLow;  //clockfrequency & 0xFFFF. The unit is in 10khz
-      UCHAR  ucEngineClockHigh; //clockfrequency >> 16. 
-      UCHAR  vddcIndex;         //2-bit vddc index;
-      USHORT tdpLimit;
-      //please initalize to 0
-      USHORT rsv1;
-      //please initialize to 0s
-      ULONG rsv2[2];
-}ATOM_PPLIB_SUMO_CLOCK_INFO;
-
-
-
-typedef struct _ATOM_PPLIB_STATE_V2
-{
-      //number of valid dpm levels in this state; Driver uses it to calculate the whole 
-      //size of the state: sizeof(ATOM_PPLIB_STATE_V2) + (ucNumDPMLevels - 1) * sizeof(UCHAR)
-      UCHAR ucNumDPMLevels;
-      
-      //a index to the array of nonClockInfos
-      UCHAR nonClockInfoIndex;
-      /**
-      * Driver will read the first ucNumDPMLevels in this array
-      */
-      UCHAR clockInfoIndex[1];
-} ATOM_PPLIB_STATE_V2;
-
-typedef struct _StateArray{
-    //how many states we have 
-    UCHAR ucNumEntries;
-    
-    ATOM_PPLIB_STATE_V2 states[1];
-}StateArray;
-
-
-typedef struct _ClockInfoArray{
-    //how many clock levels we have
-    UCHAR ucNumEntries;
-    
-    //sizeof(ATOM_PPLIB_CLOCK_INFO)
-    UCHAR ucEntrySize;
-    
-    UCHAR clockInfo[1];
-}ClockInfoArray;
-
-typedef struct _NonClockInfoArray{
-
-    //how many non-clock levels we have. normally should be same as number of states
-    UCHAR ucNumEntries;
-    //sizeof(ATOM_PPLIB_NONCLOCK_INFO)
-    UCHAR ucEntrySize;
-    
-    ATOM_PPLIB_NONCLOCK_INFO nonClockInfo[1];
-}NonClockInfoArray;
-
-typedef struct _ATOM_PPLIB_Clock_Voltage_Dependency_Record
-{
-    USHORT usClockLow;
-    UCHAR  ucClockHigh;
-    USHORT usVoltage;
-}ATOM_PPLIB_Clock_Voltage_Dependency_Record;
-
-typedef struct _ATOM_PPLIB_Clock_Voltage_Dependency_Table
-{
-    UCHAR ucNumEntries;                                                // Number of entries.
-    ATOM_PPLIB_Clock_Voltage_Dependency_Record entries[1];             // Dynamically allocate entries.
-}ATOM_PPLIB_Clock_Voltage_Dependency_Table;
-
-typedef struct _ATOM_PPLIB_Clock_Voltage_Limit_Record
-{
-    USHORT usSclkLow;
-    UCHAR  ucSclkHigh;
-    USHORT usMclkLow;
-    UCHAR  ucMclkHigh;
-    USHORT usVddc;
-    USHORT usVddci;
-}ATOM_PPLIB_Clock_Voltage_Limit_Record;
-
-typedef struct _ATOM_PPLIB_Clock_Voltage_Limit_Table
-{
-    UCHAR ucNumEntries;                                                // Number of entries.
-    ATOM_PPLIB_Clock_Voltage_Limit_Record entries[1];                  // Dynamically allocate entries.
-}ATOM_PPLIB_Clock_Voltage_Limit_Table;
-
-typedef struct _ATOM_PPLIB_CAC_Leakage_Record
-{
-    USHORT usVddc;  // We use this field for the "fake" standardized VDDC for power calculations; For CI and newer, we use this as the real VDDC value.
-    ULONG  ulLeakageValue;  // For CI and newer we use this as the "fake" standar VDDC value.
-}ATOM_PPLIB_CAC_Leakage_Record;
-
-typedef struct _ATOM_PPLIB_CAC_Leakage_Table
-{
-    UCHAR ucNumEntries;                                                 // Number of entries.
-    ATOM_PPLIB_CAC_Leakage_Record entries[1];                           // Dynamically allocate entries.
-}ATOM_PPLIB_CAC_Leakage_Table;
-
-typedef struct _ATOM_PPLIB_PhaseSheddingLimits_Record
-{
-    USHORT usVoltage;
-    USHORT usSclkLow;
-    UCHAR  ucSclkHigh;
-    USHORT usMclkLow;
-    UCHAR  ucMclkHigh;
-}ATOM_PPLIB_PhaseSheddingLimits_Record;
-
-typedef struct _ATOM_PPLIB_PhaseSheddingLimits_Table
-{
-    UCHAR ucNumEntries;                                                 // Number of entries.
-    ATOM_PPLIB_PhaseSheddingLimits_Record entries[1];                   // Dynamically allocate entries.
-}ATOM_PPLIB_PhaseSheddingLimits_Table;
-
-typedef struct _VCEClockInfo{
-    USHORT usEVClkLow;
-    UCHAR  ucEVClkHigh;
-    USHORT usECClkLow;
-    UCHAR  ucECClkHigh;
-}VCEClockInfo;
-
-typedef struct _VCEClockInfoArray{
-    UCHAR ucNumEntries;
-    VCEClockInfo entries[1];
-}VCEClockInfoArray;
-
-typedef struct _ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record
-{
-    USHORT usVoltage;
-    UCHAR  ucVCEClockInfoIndex;
-}ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record;
-
-typedef struct _ATOM_PPLIB_VCE_Clock_Voltage_Limit_Table
-{
-    UCHAR numEntries;
-    ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record entries[1];
-}ATOM_PPLIB_VCE_Clock_Voltage_Limit_Table;
-
-typedef struct _ATOM_PPLIB_VCE_State_Record
-{
-    UCHAR  ucVCEClockInfoIndex;
-    UCHAR  ucClockInfoIndex; //highest 2 bits indicates memory p-states, lower 6bits indicates index to ClockInfoArrary
-}ATOM_PPLIB_VCE_State_Record;
-
-typedef struct _ATOM_PPLIB_VCE_State_Table
-{
-    UCHAR numEntries;
-    ATOM_PPLIB_VCE_State_Record entries[1];
-}ATOM_PPLIB_VCE_State_Table;
-
-
-typedef struct _ATOM_PPLIB_VCE_Table
-{
-      UCHAR revid;
-//    VCEClockInfoArray array;
-//    ATOM_PPLIB_VCE_Clock_Voltage_Limit_Table limits;
-//    ATOM_PPLIB_VCE_State_Table states;
-}ATOM_PPLIB_VCE_Table;
-
-
-typedef struct _UVDClockInfo{
-    USHORT usVClkLow;
-    UCHAR  ucVClkHigh;
-    USHORT usDClkLow;
-    UCHAR  ucDClkHigh;
-}UVDClockInfo;
-
-typedef struct _UVDClockInfoArray{
-    UCHAR ucNumEntries;
-    UVDClockInfo entries[1];
-}UVDClockInfoArray;
-
-typedef struct _ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record
-{
-    USHORT usVoltage;
-    UCHAR  ucUVDClockInfoIndex;
-}ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record;
-
-typedef struct _ATOM_PPLIB_UVD_Clock_Voltage_Limit_Table
-{
-    UCHAR numEntries;
-    ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record entries[1];
-}ATOM_PPLIB_UVD_Clock_Voltage_Limit_Table;
-
-typedef struct _ATOM_PPLIB_UVD_State_Record
-{
-    UCHAR  ucUVDClockInfoIndex;
-    UCHAR  ucClockInfoIndex; //highest 2 bits indicates memory p-states, lower 6bits indicates index to ClockInfoArrary
-}ATOM_PPLIB_UVD_State_Record;
-
-typedef struct _ATOM_PPLIB_UVD_State_Table
-{
-    UCHAR numEntries;
-    ATOM_PPLIB_UVD_State_Record entries[1];
-}ATOM_PPLIB_UVD_State_Table;
-
-
-typedef struct _ATOM_PPLIB_UVD_Table
-{
-      UCHAR revid;
-//    UVDClockInfoArray array;
-//    ATOM_PPLIB_UVD_Clock_Voltage_Limit_Table limits;
-//    ATOM_PPLIB_UVD_State_Table states;
-}ATOM_PPLIB_UVD_Table;
-
-
-typedef struct _ATOM_PPLIB_SAMClk_Voltage_Limit_Record
-{
-      USHORT usVoltage;
-      USHORT usSAMClockLow;
-      UCHAR  ucSAMClockHigh;
-}ATOM_PPLIB_SAMClk_Voltage_Limit_Record;
-
-typedef struct _ATOM_PPLIB_SAMClk_Voltage_Limit_Table{
-    UCHAR numEntries;
-    ATOM_PPLIB_SAMClk_Voltage_Limit_Record entries[1];
-}ATOM_PPLIB_SAMClk_Voltage_Limit_Table;
-
-typedef struct _ATOM_PPLIB_SAMU_Table
-{
-      UCHAR revid;
-      ATOM_PPLIB_SAMClk_Voltage_Limit_Table limits;
-}ATOM_PPLIB_SAMU_Table;
-
-#define ATOM_PPM_A_A    1
-#define ATOM_PPM_A_I    2
-typedef struct _ATOM_PPLIB_PPM_Table
-{
-      UCHAR  ucRevId;
-      UCHAR  ucPpmDesign;          //A+I or A+A
-      USHORT usCpuCoreNumber;
-      ULONG  ulPlatformTDP;
-      ULONG  ulSmallACPlatformTDP;
-      ULONG  ulPlatformTDC;
-      ULONG  ulSmallACPlatformTDC;
-      ULONG  ulApuTDP;
-      ULONG  ulDGpuTDP;  
-      ULONG  ulDGpuUlvPower;
-      ULONG  ulTjmax;
-} ATOM_PPLIB_PPM_Table;
-
-/**************************************************************************/
-
 
 // Following definitions are for compatibility issue in different SW components. 
 #define ATOM_MASTER_DATA_TABLE_REVISION   0x01
@@ -8485,3 +7873,6 @@
 
 
 #endif /* _ATOMBIOS_H */
+
+#include "pptable.h"
+
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index b9d3b43..bf87f6d 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -1910,6 +1910,12 @@
 	int i;
 
 	atombios_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
+	/* disable the GRPH */
+	if (ASIC_IS_DCE4(rdev))
+		WREG32(EVERGREEN_GRPH_ENABLE + radeon_crtc->crtc_offset, 0);
+	else if (ASIC_IS_AVIVO(rdev))
+		WREG32(AVIVO_D1GRPH_ENABLE + radeon_crtc->crtc_offset, 0);
+
 	if (ASIC_IS_DCE6(rdev))
 		atombios_powergate_crtc(crtc, ATOM_ENABLE);
 
diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index 064023b..0088541 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -44,6 +44,41 @@
 };
 
 /***** radeon AUX functions *****/
+
+/* Atom needs data in little endian format
+ * so swap as appropriate when copying data to
+ * or from atom. Note that atom operates on
+ * dw units.
+ */
+void radeon_atom_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le)
+{
+#ifdef __BIG_ENDIAN
+	u8 src_tmp[20], dst_tmp[20]; /* used for byteswapping */
+	u32 *dst32, *src32;
+	int i;
+
+	memcpy(src_tmp, src, num_bytes);
+	src32 = (u32 *)src_tmp;
+	dst32 = (u32 *)dst_tmp;
+	if (to_le) {
+		for (i = 0; i < ((num_bytes + 3) / 4); i++)
+			dst32[i] = cpu_to_le32(src32[i]);
+		memcpy(dst, dst_tmp, num_bytes);
+	} else {
+		u8 dws = num_bytes & ~3;
+		for (i = 0; i < ((num_bytes + 3) / 4); i++)
+			dst32[i] = le32_to_cpu(src32[i]);
+		memcpy(dst, dst_tmp, dws);
+		if (num_bytes % 4) {
+			for (i = 0; i < (num_bytes % 4); i++)
+				dst[dws+i] = dst_tmp[dws+i];
+		}
+	}
+#else
+	memcpy(dst, src, num_bytes);
+#endif
+}
+
 union aux_channel_transaction {
 	PROCESS_AUX_CHANNEL_TRANSACTION_PS_ALLOCATION v1;
 	PROCESS_AUX_CHANNEL_TRANSACTION_PARAMETERS_V2 v2;
@@ -65,10 +100,10 @@
 
 	base = (unsigned char *)(rdev->mode_info.atom_context->scratch + 1);
 
-	memcpy(base, send, send_bytes);
+	radeon_atom_copy_swap(base, send, send_bytes, true);
 
-	args.v1.lpAuxRequest = 0 + 4;
-	args.v1.lpDataOut = 16 + 4;
+	args.v1.lpAuxRequest = cpu_to_le16((u16)(0 + 4));
+	args.v1.lpDataOut = cpu_to_le16((u16)(16 + 4));
 	args.v1.ucDataOutLen = 0;
 	args.v1.ucChannelID = chan->rec.i2c_id;
 	args.v1.ucDelay = delay / 10;
@@ -102,7 +137,7 @@
 		recv_bytes = recv_size;
 
 	if (recv && recv_size)
-		memcpy(recv, base + 16, recv_bytes);
+		radeon_atom_copy_swap(recv, base + 16, recv_bytes, false);
 
 	return recv_bytes;
 }
@@ -550,7 +585,7 @@
 		return false;
 	}
 
-	DRM_DEBUG_KMS("link status %*ph\n", 6, link_status);
+	DRM_DEBUG_KMS("link status %6ph\n", link_status);
 	return true;
 }
 
diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index 092275d..dfac796 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c
@@ -682,8 +682,6 @@
 int
 atombios_get_encoder_mode(struct drm_encoder *encoder)
 {
-	struct drm_device *dev = encoder->dev;
-	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
 	struct drm_connector *connector;
 	struct radeon_connector *radeon_connector;
@@ -710,8 +708,7 @@
 	case DRM_MODE_CONNECTOR_DVII:
 	case DRM_MODE_CONNECTOR_HDMIB: /* HDMI-B is basically DL-DVI; analog works fine */
 		if (drm_detect_hdmi_monitor(radeon_connector->edid) &&
-		    radeon_audio &&
-		    !ASIC_IS_DCE6(rdev)) /* remove once we support DCE6 */
+		    radeon_audio)
 			return ATOM_ENCODER_MODE_HDMI;
 		else if (radeon_connector->use_digital)
 			return ATOM_ENCODER_MODE_DVI;
@@ -722,8 +719,7 @@
 	case DRM_MODE_CONNECTOR_HDMIA:
 	default:
 		if (drm_detect_hdmi_monitor(radeon_connector->edid) &&
-		    radeon_audio &&
-		    !ASIC_IS_DCE6(rdev)) /* remove once we support DCE6 */
+		    radeon_audio)
 			return ATOM_ENCODER_MODE_HDMI;
 		else
 			return ATOM_ENCODER_MODE_DVI;
@@ -737,8 +733,7 @@
 		    (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP))
 			return ATOM_ENCODER_MODE_DP;
 		else if (drm_detect_hdmi_monitor(radeon_connector->edid) &&
-			 radeon_audio &&
-			 !ASIC_IS_DCE6(rdev)) /* remove once we support DCE6 */
+			 radeon_audio)
 			return ATOM_ENCODER_MODE_HDMI;
 		else
 			return ATOM_ENCODER_MODE_DVI;
diff --git a/drivers/gpu/drm/radeon/atombios_i2c.c b/drivers/gpu/drm/radeon/atombios_i2c.c
index 082338d..deaf98c 100644
--- a/drivers/gpu/drm/radeon/atombios_i2c.c
+++ b/drivers/gpu/drm/radeon/atombios_i2c.c
@@ -27,10 +27,12 @@
 #include "radeon.h"
 #include "atom.h"
 
+extern void radeon_atom_copy_swap(u8 *dst, u8 *src, u8 num_bytes, bool to_le);
+
 #define TARGET_HW_I2C_CLOCK 50
 
 /* these are a limitation of ProcessI2cChannelTransaction not the hw */
-#define ATOM_MAX_HW_I2C_WRITE 2
+#define ATOM_MAX_HW_I2C_WRITE 3
 #define ATOM_MAX_HW_I2C_READ  255
 
 static int radeon_process_i2c_ch(struct radeon_i2c_chan *chan,
@@ -50,20 +52,24 @@
 
 	if (flags & HW_I2C_WRITE) {
 		if (num > ATOM_MAX_HW_I2C_WRITE) {
-			DRM_ERROR("hw i2c: tried to write too many bytes (%d vs 2)\n", num);
+			DRM_ERROR("hw i2c: tried to write too many bytes (%d vs 3)\n", num);
 			return -EINVAL;
 		}
-		memcpy(&out, buf, num);
+		args.ucRegIndex = buf[0];
+		if (num > 1)
+			memcpy(&out, &buf[1], num - 1);
 		args.lpI2CDataOut = cpu_to_le16(out);
 	} else {
 		if (num > ATOM_MAX_HW_I2C_READ) {
 			DRM_ERROR("hw i2c: tried to read too many bytes (%d vs 255)\n", num);
 			return -EINVAL;
 		}
+		args.ucRegIndex = 0;
+		args.lpI2CDataOut = 0;
 	}
 
+	args.ucFlag = flags;
 	args.ucI2CSpeed = TARGET_HW_I2C_CLOCK;
-	args.ucRegIndex = 0;
 	args.ucTransBytes = num;
 	args.ucSlaveAddr = slave_addr << 1;
 	args.ucLineNumber = chan->rec.i2c_id;
@@ -77,7 +83,7 @@
 	}
 
 	if (!(flags & HW_I2C_WRITE))
-		memcpy(buf, base, num);
+		radeon_atom_copy_swap(buf, base, num, false);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/btc_dpm.c b/drivers/gpu/drm/radeon/btc_dpm.c
index 0bfd55e..084e694 100644
--- a/drivers/gpu/drm/radeon/btc_dpm.c
+++ b/drivers/gpu/drm/radeon/btc_dpm.c
@@ -2548,9 +2548,6 @@
 {
 	struct rv7xx_power_info *pi;
 	struct evergreen_power_info *eg_pi;
-	int index = GetIndexIntoMasterTable(DATA, ASIC_InternalSS_Info);
-	u16 data_offset, size;
-	u8 frev, crev;
 	struct atom_clock_dividers dividers;
 	int ret;
 
@@ -2633,16 +2630,7 @@
 	eg_pi->vddci_control =
 		radeon_atom_is_voltage_gpio(rdev, SET_VOLTAGE_TYPE_ASIC_VDDCI, 0);
 
-	if (atom_parse_data_header(rdev->mode_info.atom_context, index, &size,
-                                   &frev, &crev, &data_offset)) {
-		pi->sclk_ss = true;
-		pi->mclk_ss = true;
-		pi->dynamic_ss = true;
-	} else {
-		pi->sclk_ss = false;
-		pi->mclk_ss = false;
-		pi->dynamic_ss = true;
-	}
+	rv770_get_engine_memory_ss(rdev);
 
 	pi->asi = RV770_ASI_DFLT;
 	pi->pasi = CYPRESS_HASI_DFLT;
@@ -2659,8 +2647,7 @@
 
 	pi->dynamic_pcie_gen2 = true;
 
-	if (pi->gfx_clock_gating &&
-	    (rdev->pm.int_thermal_type != THERMAL_TYPE_NONE))
+	if (rdev->pm.int_thermal_type != THERMAL_TYPE_NONE)
 		pi->thermal_protection = true;
 	else
 		pi->thermal_protection = false;
@@ -2712,6 +2699,12 @@
 	else
 		rdev->pm.dpm.dyn_state.sclk_mclk_delta = 10000;
 
+	/* make sure dc limits are valid */
+	if ((rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc.sclk == 0) ||
+	    (rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc.mclk == 0))
+		rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc =
+			rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/radeon/cayman_blit_shaders.c b/drivers/gpu/drm/radeon/cayman_blit_shaders.c
index 19a0114..98d009e 100644
--- a/drivers/gpu/drm/radeon/cayman_blit_shaders.c
+++ b/drivers/gpu/drm/radeon/cayman_blit_shaders.c
@@ -317,58 +317,4 @@
 	0x00000010, /*  */
 };
 
-const u32 cayman_vs[] =
-{
-	0x00000004,
-	0x80400400,
-	0x0000a03c,
-	0x95000688,
-	0x00004000,
-	0x15000688,
-	0x00000000,
-	0x88000000,
-	0x04000000,
-	0x67961001,
-#ifdef __BIG_ENDIAN
-	0x00020000,
-#else
-	0x00000000,
-#endif
-	0x00000000,
-	0x04000000,
-	0x67961000,
-#ifdef __BIG_ENDIAN
-	0x00020008,
-#else
-	0x00000008,
-#endif
-	0x00000000,
-};
-
-const u32 cayman_ps[] =
-{
-	0x00000004,
-	0xa00c0000,
-	0x00000008,
-	0x80400000,
-	0x00000000,
-	0x95000688,
-	0x00000000,
-	0x88000000,
-	0x00380400,
-	0x00146b10,
-	0x00380000,
-	0x20146b10,
-	0x00380400,
-	0x40146b00,
-	0x80380000,
-	0x60146b00,
-	0x00000010,
-	0x000d1000,
-	0xb0800000,
-	0x00000000,
-};
-
-const u32 cayman_ps_size = ARRAY_SIZE(cayman_ps);
-const u32 cayman_vs_size = ARRAY_SIZE(cayman_vs);
 const u32 cayman_default_size = ARRAY_SIZE(cayman_default_state);
diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c
new file mode 100644
index 0000000..916630f
--- /dev/null
+++ b/drivers/gpu/drm/radeon/ci_dpm.c
@@ -0,0 +1,5239 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "radeon.h"
+#include "cikd.h"
+#include "r600_dpm.h"
+#include "ci_dpm.h"
+#include "atom.h"
+#include <linux/seq_file.h>
+
+#define MC_CG_ARB_FREQ_F0           0x0a
+#define MC_CG_ARB_FREQ_F1           0x0b
+#define MC_CG_ARB_FREQ_F2           0x0c
+#define MC_CG_ARB_FREQ_F3           0x0d
+
+#define SMC_RAM_END 0x40000
+
+#define VOLTAGE_SCALE               4
+#define VOLTAGE_VID_OFFSET_SCALE1    625
+#define VOLTAGE_VID_OFFSET_SCALE2    100
+
+static const struct ci_pt_defaults defaults_bonaire_xt =
+{
+	1, 0xF, 0xFD, 0x19, 5, 45, 0, 0xB0000,
+	{ 0x79,  0x253, 0x25D, 0xAE,  0x72,  0x80,  0x83,  0x86,  0x6F,  0xC8,  0xC9,  0xC9,  0x2F,  0x4D,  0x61  },
+	{ 0x17C, 0x172, 0x180, 0x1BC, 0x1B3, 0x1BD, 0x206, 0x200, 0x203, 0x25D, 0x25A, 0x255, 0x2C3, 0x2C5, 0x2B4 }
+};
+
+static const struct ci_pt_defaults defaults_bonaire_pro =
+{
+	1, 0xF, 0xFD, 0x19, 5, 45, 0, 0x65062,
+	{ 0x8C,  0x23F, 0x244, 0xA6,  0x83,  0x85,  0x86,  0x86,  0x83,  0xDB,  0xDB,  0xDA,  0x67,  0x60,  0x5F  },
+	{ 0x187, 0x193, 0x193, 0x1C7, 0x1D1, 0x1D1, 0x210, 0x219, 0x219, 0x266, 0x26C, 0x26C, 0x2C9, 0x2CB, 0x2CB }
+};
+
+static const struct ci_pt_defaults defaults_saturn_xt =
+{
+	1, 0xF, 0xFD, 0x19, 5, 55, 0, 0x70000,
+	{ 0x8C,  0x247, 0x249, 0xA6,  0x80,  0x81,  0x8B,  0x89,  0x86,  0xC9,  0xCA,  0xC9,  0x4D,  0x4D,  0x4D  },
+	{ 0x187, 0x187, 0x187, 0x1C7, 0x1C7, 0x1C7, 0x210, 0x210, 0x210, 0x266, 0x266, 0x266, 0x2C9, 0x2C9, 0x2C9 }
+};
+
+static const struct ci_pt_defaults defaults_saturn_pro =
+{
+	1, 0xF, 0xFD, 0x19, 5, 55, 0, 0x30000,
+	{ 0x96,  0x21D, 0x23B, 0xA1,  0x85,  0x87,  0x83,  0x84,  0x81,  0xE6,  0xE6,  0xE6,  0x71,  0x6A,  0x6A  },
+	{ 0x193, 0x19E, 0x19E, 0x1D2, 0x1DC, 0x1DC, 0x21A, 0x223, 0x223, 0x26E, 0x27E, 0x274, 0x2CF, 0x2D2, 0x2D2 }
+};
+
+static const struct ci_pt_config_reg didt_config_ci[] =
+{
+	{ 0x10, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x10, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x10, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x10, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x11, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x11, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x11, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x11, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x12, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x12, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x12, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x12, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x2, 0x00003fff, 0, 0x4, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x2, 0x03ff0000, 16, 0x80, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x2, 0x78000000, 27, 0x3, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x1, 0x0000ffff, 0, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x1, 0xffff0000, 16, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x0, 0x00000001, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x30, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x30, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x30, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x30, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x31, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x31, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x31, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x31, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x32, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x32, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x32, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x32, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x22, 0x00003fff, 0, 0x4, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x22, 0x03ff0000, 16, 0x80, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x22, 0x78000000, 27, 0x3, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x21, 0x0000ffff, 0, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x21, 0xffff0000, 16, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x20, 0x00000001, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x50, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x50, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x50, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x50, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x51, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x51, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x51, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x51, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x52, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x52, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x52, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x52, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x42, 0x00003fff, 0, 0x4, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x42, 0x03ff0000, 16, 0x80, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x42, 0x78000000, 27, 0x3, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x41, 0x0000ffff, 0, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x41, 0xffff0000, 16, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x40, 0x00000001, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x70, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x70, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x70, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x70, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x71, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x71, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x71, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x71, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x72, 0x000000ff, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x72, 0x0000ff00, 8, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x72, 0x00ff0000, 16, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x72, 0xff000000, 24, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x62, 0x00003fff, 0, 0x4, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x62, 0x03ff0000, 16, 0x80, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x62, 0x78000000, 27, 0x3, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x61, 0x0000ffff, 0, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x61, 0xffff0000, 16, 0x3FFF, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0x60, 0x00000001, 0, 0x0, CISLANDS_CONFIGREG_DIDT_IND },
+	{ 0xFFFFFFFF }
+};
+
+extern u8 rv770_get_memory_module_index(struct radeon_device *rdev);
+extern int ni_copy_and_switch_arb_sets(struct radeon_device *rdev,
+				       u32 arb_freq_src, u32 arb_freq_dest);
+extern u8 si_get_ddr3_mclk_frequency_ratio(u32 memory_clock);
+extern u8 si_get_mclk_frequency_ratio(u32 memory_clock, bool strobe_mode);
+extern void si_trim_voltage_table_to_fit_state_table(struct radeon_device *rdev,
+						     u32 max_voltage_steps,
+						     struct atom_voltage_table *voltage_table);
+extern void cik_enter_rlc_safe_mode(struct radeon_device *rdev);
+extern void cik_exit_rlc_safe_mode(struct radeon_device *rdev);
+extern void cik_update_cg(struct radeon_device *rdev,
+			  u32 block, bool enable);
+
+static int ci_get_std_voltage_value_sidd(struct radeon_device *rdev,
+					 struct atom_voltage_table_entry *voltage_table,
+					 u16 *std_voltage_hi_sidd, u16 *std_voltage_lo_sidd);
+static int ci_set_power_limit(struct radeon_device *rdev, u32 n);
+static int ci_set_overdrive_target_tdp(struct radeon_device *rdev,
+				       u32 target_tdp);
+static int ci_update_uvd_dpm(struct radeon_device *rdev, bool gate);
+
+static struct ci_power_info *ci_get_pi(struct radeon_device *rdev)
+{
+        struct ci_power_info *pi = rdev->pm.dpm.priv;
+
+        return pi;
+}
+
+static struct ci_ps *ci_get_ps(struct radeon_ps *rps)
+{
+	struct ci_ps *ps = rps->ps_priv;
+
+	return ps;
+}
+
+static void ci_initialize_powertune_defaults(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+
+	switch (rdev->pdev->device) {
+        case 0x6650:
+        case 0x6658:
+        case 0x665C:
+        default:
+		pi->powertune_defaults = &defaults_bonaire_xt;
+		break;
+        case 0x6651:
+        case 0x665D:
+		pi->powertune_defaults = &defaults_bonaire_pro;
+		break;
+        case 0x6640:
+		pi->powertune_defaults = &defaults_saturn_xt;
+		break;
+        case 0x6641:
+		pi->powertune_defaults = &defaults_saturn_pro;
+		break;
+	}
+
+	pi->dte_tj_offset = 0;
+
+	pi->caps_power_containment = true;
+	pi->caps_cac = false;
+	pi->caps_sq_ramping = false;
+	pi->caps_db_ramping = false;
+	pi->caps_td_ramping = false;
+	pi->caps_tcp_ramping = false;
+
+	if (pi->caps_power_containment) {
+		pi->caps_cac = true;
+		pi->enable_bapm_feature = true;
+		pi->enable_tdc_limit_feature = true;
+		pi->enable_pkg_pwr_tracking_feature = true;
+	}
+}
+
+static u8 ci_convert_to_vid(u16 vddc)
+{
+	return (6200 - (vddc * VOLTAGE_SCALE)) / 25;
+}
+
+static int ci_populate_bapm_vddc_vid_sidd(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	u8 *hi_vid = pi->smc_powertune_table.BapmVddCVidHiSidd;
+	u8 *lo_vid = pi->smc_powertune_table.BapmVddCVidLoSidd;
+	u8 *hi2_vid = pi->smc_powertune_table.BapmVddCVidHiSidd2;
+	u32 i;
+
+	if (rdev->pm.dpm.dyn_state.cac_leakage_table.entries == NULL)
+		return -EINVAL;
+	if (rdev->pm.dpm.dyn_state.cac_leakage_table.count > 8)
+		return -EINVAL;
+	if (rdev->pm.dpm.dyn_state.cac_leakage_table.count !=
+	    rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.count)
+		return -EINVAL;
+
+	for (i = 0; i < rdev->pm.dpm.dyn_state.cac_leakage_table.count; i++) {
+		if (rdev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_EVV) {
+			lo_vid[i] = ci_convert_to_vid(rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].vddc1);
+			hi_vid[i] = ci_convert_to_vid(rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].vddc2);
+			hi2_vid[i] = ci_convert_to_vid(rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].vddc3);
+		} else {
+			lo_vid[i] = ci_convert_to_vid(rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].vddc);
+			hi_vid[i] = ci_convert_to_vid((u16)rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].leakage);
+		}
+	}
+	return 0;
+}
+
+static int ci_populate_vddc_vid(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	u8 *vid = pi->smc_powertune_table.VddCVid;
+	u32 i;
+
+	if (pi->vddc_voltage_table.count > 8)
+		return -EINVAL;
+
+	for (i = 0; i < pi->vddc_voltage_table.count; i++)
+		vid[i] = ci_convert_to_vid(pi->vddc_voltage_table.entries[i].value);
+
+	return 0;
+}
+
+static int ci_populate_svi_load_line(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	const struct ci_pt_defaults *pt_defaults = pi->powertune_defaults;
+
+	pi->smc_powertune_table.SviLoadLineEn = pt_defaults->svi_load_line_en;
+	pi->smc_powertune_table.SviLoadLineVddC = pt_defaults->svi_load_line_vddc;
+	pi->smc_powertune_table.SviLoadLineTrimVddC = 3;
+	pi->smc_powertune_table.SviLoadLineOffsetVddC = 0;
+
+	return 0;
+}
+
+static int ci_populate_tdc_limit(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	const struct ci_pt_defaults *pt_defaults = pi->powertune_defaults;
+	u16 tdc_limit;
+
+	tdc_limit = rdev->pm.dpm.dyn_state.cac_tdp_table->tdc * 256;
+	pi->smc_powertune_table.TDC_VDDC_PkgLimit = cpu_to_be16(tdc_limit);
+	pi->smc_powertune_table.TDC_VDDC_ThrottleReleaseLimitPerc =
+		pt_defaults->tdc_vddc_throttle_release_limit_perc;
+	pi->smc_powertune_table.TDC_MAWt = pt_defaults->tdc_mawt;
+
+	return 0;
+}
+
+static int ci_populate_dw8(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	const struct ci_pt_defaults *pt_defaults = pi->powertune_defaults;
+	int ret;
+
+	ret = ci_read_smc_sram_dword(rdev,
+				     SMU7_FIRMWARE_HEADER_LOCATION +
+				     offsetof(SMU7_Firmware_Header, PmFuseTable) +
+				     offsetof(SMU7_Discrete_PmFuses, TdcWaterfallCtl),
+				     (u32 *)&pi->smc_powertune_table.TdcWaterfallCtl,
+				     pi->sram_end);
+	if (ret)
+		return -EINVAL;
+	else
+		pi->smc_powertune_table.TdcWaterfallCtl = pt_defaults->tdc_waterfall_ctl;
+
+	return 0;
+}
+
+static int ci_min_max_v_gnbl_pm_lid_from_bapm_vddc(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	u8 *hi_vid = pi->smc_powertune_table.BapmVddCVidHiSidd;
+	u8 *lo_vid = pi->smc_powertune_table.BapmVddCVidLoSidd;
+	int i, min, max;
+
+	min = max = hi_vid[0];
+	for (i = 0; i < 8; i++) {
+		if (0 != hi_vid[i]) {
+			if (min > hi_vid[i])
+				min = hi_vid[i];
+			if (max < hi_vid[i])
+				max = hi_vid[i];
+		}
+
+		if (0 != lo_vid[i]) {
+			if (min > lo_vid[i])
+				min = lo_vid[i];
+			if (max < lo_vid[i])
+				max = lo_vid[i];
+		}
+	}
+
+	if ((min == 0) || (max == 0))
+		return -EINVAL;
+	pi->smc_powertune_table.GnbLPMLMaxVid = (u8)max;
+	pi->smc_powertune_table.GnbLPMLMinVid = (u8)min;
+
+	return 0;
+}
+
+static int ci_populate_bapm_vddc_base_leakage_sidd(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	u16 hi_sidd = pi->smc_powertune_table.BapmVddCBaseLeakageHiSidd;
+	u16 lo_sidd = pi->smc_powertune_table.BapmVddCBaseLeakageLoSidd;
+	struct radeon_cac_tdp_table *cac_tdp_table =
+		rdev->pm.dpm.dyn_state.cac_tdp_table;
+
+	hi_sidd = cac_tdp_table->high_cac_leakage / 100 * 256;
+	lo_sidd = cac_tdp_table->low_cac_leakage / 100 * 256;
+
+	pi->smc_powertune_table.BapmVddCBaseLeakageHiSidd = cpu_to_be16(hi_sidd);
+	pi->smc_powertune_table.BapmVddCBaseLeakageLoSidd = cpu_to_be16(lo_sidd);
+
+	return 0;
+}
+
+static int ci_populate_bapm_parameters_in_dpm_table(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	const struct ci_pt_defaults *pt_defaults = pi->powertune_defaults;
+	SMU7_Discrete_DpmTable  *dpm_table = &pi->smc_state_table;
+	struct radeon_cac_tdp_table *cac_tdp_table =
+		rdev->pm.dpm.dyn_state.cac_tdp_table;
+	struct radeon_ppm_table *ppm = rdev->pm.dpm.dyn_state.ppm_table;
+	int i, j, k;
+	const u16 *def1;
+	const u16 *def2;
+
+	dpm_table->DefaultTdp = cac_tdp_table->tdp * 256;
+	dpm_table->TargetTdp = cac_tdp_table->configurable_tdp * 256;
+
+	dpm_table->DTETjOffset = (u8)pi->dte_tj_offset;
+	dpm_table->GpuTjMax =
+		(u8)(pi->thermal_temp_setting.temperature_high / 1000);
+	dpm_table->GpuTjHyst = 8;
+
+	dpm_table->DTEAmbientTempBase = pt_defaults->dte_ambient_temp_base;
+
+	if (ppm) {
+		dpm_table->PPM_PkgPwrLimit = cpu_to_be16((u16)ppm->dgpu_tdp * 256 / 1000);
+		dpm_table->PPM_TemperatureLimit = cpu_to_be16((u16)ppm->tj_max * 256);
+	} else {
+		dpm_table->PPM_PkgPwrLimit = cpu_to_be16(0);
+		dpm_table->PPM_TemperatureLimit = cpu_to_be16(0);
+	}
+
+	dpm_table->BAPM_TEMP_GRADIENT = cpu_to_be32(pt_defaults->bapm_temp_gradient);
+	def1 = pt_defaults->bapmti_r;
+	def2 = pt_defaults->bapmti_rc;
+
+	for (i = 0; i < SMU7_DTE_ITERATIONS; i++) {
+		for (j = 0; j < SMU7_DTE_SOURCES; j++) {
+			for (k = 0; k < SMU7_DTE_SINKS; k++) {
+				dpm_table->BAPMTI_R[i][j][k] = cpu_to_be16(*def1);
+				dpm_table->BAPMTI_RC[i][j][k] = cpu_to_be16(*def2);
+				def1++;
+				def2++;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int ci_populate_pm_base(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	u32 pm_fuse_table_offset;
+	int ret;
+
+	if (pi->caps_power_containment) {
+		ret = ci_read_smc_sram_dword(rdev,
+					     SMU7_FIRMWARE_HEADER_LOCATION +
+					     offsetof(SMU7_Firmware_Header, PmFuseTable),
+					     &pm_fuse_table_offset, pi->sram_end);
+		if (ret)
+			return ret;
+		ret = ci_populate_bapm_vddc_vid_sidd(rdev);
+		if (ret)
+			return ret;
+		ret = ci_populate_vddc_vid(rdev);
+		if (ret)
+			return ret;
+		ret = ci_populate_svi_load_line(rdev);
+		if (ret)
+			return ret;
+		ret = ci_populate_tdc_limit(rdev);
+		if (ret)
+			return ret;
+		ret = ci_populate_dw8(rdev);
+		if (ret)
+			return ret;
+		ret = ci_min_max_v_gnbl_pm_lid_from_bapm_vddc(rdev);
+		if (ret)
+			return ret;
+		ret = ci_populate_bapm_vddc_base_leakage_sidd(rdev);
+		if (ret)
+			return ret;
+		ret = ci_copy_bytes_to_smc(rdev, pm_fuse_table_offset,
+					   (u8 *)&pi->smc_powertune_table,
+					   sizeof(SMU7_Discrete_PmFuses), pi->sram_end);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static void ci_do_enable_didt(struct radeon_device *rdev, const bool enable)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	u32 data;
+
+	if (pi->caps_sq_ramping) {
+		data = RREG32_DIDT(DIDT_SQ_CTRL0);
+		if (enable)
+			data |= DIDT_CTRL_EN;
+		else
+			data &= ~DIDT_CTRL_EN;
+		WREG32_DIDT(DIDT_SQ_CTRL0, data);
+	}
+
+	if (pi->caps_db_ramping) {
+		data = RREG32_DIDT(DIDT_DB_CTRL0);
+		if (enable)
+			data |= DIDT_CTRL_EN;
+		else
+			data &= ~DIDT_CTRL_EN;
+		WREG32_DIDT(DIDT_DB_CTRL0, data);
+	}
+
+	if (pi->caps_td_ramping) {
+		data = RREG32_DIDT(DIDT_TD_CTRL0);
+		if (enable)
+			data |= DIDT_CTRL_EN;
+		else
+			data &= ~DIDT_CTRL_EN;
+		WREG32_DIDT(DIDT_TD_CTRL0, data);
+	}
+
+	if (pi->caps_tcp_ramping) {
+		data = RREG32_DIDT(DIDT_TCP_CTRL0);
+		if (enable)
+			data |= DIDT_CTRL_EN;
+		else
+			data &= ~DIDT_CTRL_EN;
+		WREG32_DIDT(DIDT_TCP_CTRL0, data);
+	}
+}
+
+static int ci_program_pt_config_registers(struct radeon_device *rdev,
+					  const struct ci_pt_config_reg *cac_config_regs)
+{
+	const struct ci_pt_config_reg *config_regs = cac_config_regs;
+	u32 data;
+	u32 cache = 0;
+
+	if (config_regs == NULL)
+		return -EINVAL;
+
+	while (config_regs->offset != 0xFFFFFFFF) {
+		if (config_regs->type == CISLANDS_CONFIGREG_CACHE) {
+			cache |= ((config_regs->value << config_regs->shift) & config_regs->mask);
+		} else {
+			switch (config_regs->type) {
+			case CISLANDS_CONFIGREG_SMC_IND:
+				data = RREG32_SMC(config_regs->offset);
+				break;
+			case CISLANDS_CONFIGREG_DIDT_IND:
+				data = RREG32_DIDT(config_regs->offset);
+				break;
+			default:
+				data = RREG32(config_regs->offset << 2);
+				break;
+			}
+
+			data &= ~config_regs->mask;
+			data |= ((config_regs->value << config_regs->shift) & config_regs->mask);
+			data |= cache;
+
+			switch (config_regs->type) {
+			case CISLANDS_CONFIGREG_SMC_IND:
+				WREG32_SMC(config_regs->offset, data);
+				break;
+			case CISLANDS_CONFIGREG_DIDT_IND:
+				WREG32_DIDT(config_regs->offset, data);
+				break;
+			default:
+				WREG32(config_regs->offset << 2, data);
+				break;
+			}
+			cache = 0;
+		}
+		config_regs++;
+	}
+	return 0;
+}
+
+static int ci_enable_didt(struct radeon_device *rdev, bool enable)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	int ret;
+
+	if (pi->caps_sq_ramping || pi->caps_db_ramping ||
+	    pi->caps_td_ramping || pi->caps_tcp_ramping) {
+		cik_enter_rlc_safe_mode(rdev);
+
+		if (enable) {
+			ret = ci_program_pt_config_registers(rdev, didt_config_ci);
+			if (ret) {
+				cik_exit_rlc_safe_mode(rdev);
+				return ret;
+			}
+		}
+
+		ci_do_enable_didt(rdev, enable);
+
+		cik_exit_rlc_safe_mode(rdev);
+	}
+
+	return 0;
+}
+
+static int ci_enable_power_containment(struct radeon_device *rdev, bool enable)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	PPSMC_Result smc_result;
+	int ret = 0;
+
+	if (enable) {
+		pi->power_containment_features = 0;
+		if (pi->caps_power_containment) {
+			if (pi->enable_bapm_feature) {
+				smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_EnableDTE);
+				if (smc_result != PPSMC_Result_OK)
+					ret = -EINVAL;
+				else
+					pi->power_containment_features |= POWERCONTAINMENT_FEATURE_BAPM;
+			}
+
+			if (pi->enable_tdc_limit_feature) {
+				smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_TDCLimitEnable);
+				if (smc_result != PPSMC_Result_OK)
+					ret = -EINVAL;
+				else
+					pi->power_containment_features |= POWERCONTAINMENT_FEATURE_TDCLimit;
+			}
+
+			if (pi->enable_pkg_pwr_tracking_feature) {
+				smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_PkgPwrLimitEnable);
+				if (smc_result != PPSMC_Result_OK) {
+					ret = -EINVAL;
+				} else {
+					struct radeon_cac_tdp_table *cac_tdp_table =
+						rdev->pm.dpm.dyn_state.cac_tdp_table;
+					u32 default_pwr_limit =
+						(u32)(cac_tdp_table->maximum_power_delivery_limit * 256);
+
+					pi->power_containment_features |= POWERCONTAINMENT_FEATURE_PkgPwrLimit;
+
+					ci_set_power_limit(rdev, default_pwr_limit);
+				}
+			}
+		}
+	} else {
+		if (pi->caps_power_containment && pi->power_containment_features) {
+			if (pi->power_containment_features & POWERCONTAINMENT_FEATURE_TDCLimit)
+				ci_send_msg_to_smc(rdev, PPSMC_MSG_TDCLimitDisable);
+
+			if (pi->power_containment_features & POWERCONTAINMENT_FEATURE_BAPM)
+				ci_send_msg_to_smc(rdev, PPSMC_MSG_DisableDTE);
+
+			if (pi->power_containment_features & POWERCONTAINMENT_FEATURE_PkgPwrLimit)
+				ci_send_msg_to_smc(rdev, PPSMC_MSG_PkgPwrLimitDisable);
+			pi->power_containment_features = 0;
+		}
+	}
+
+	return ret;
+}
+
+static int ci_enable_smc_cac(struct radeon_device *rdev, bool enable)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	PPSMC_Result smc_result;
+	int ret = 0;
+
+	if (pi->caps_cac) {
+		if (enable) {
+			smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_EnableCac);
+			if (smc_result != PPSMC_Result_OK) {
+				ret = -EINVAL;
+				pi->cac_enabled = false;
+			} else {
+				pi->cac_enabled = true;
+			}
+		} else if (pi->cac_enabled) {
+			ci_send_msg_to_smc(rdev, PPSMC_MSG_DisableCac);
+			pi->cac_enabled = false;
+		}
+	}
+
+	return ret;
+}
+
+static int ci_power_control_set_level(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	struct radeon_cac_tdp_table *cac_tdp_table =
+		rdev->pm.dpm.dyn_state.cac_tdp_table;
+	s32 adjust_percent;
+	s32 target_tdp;
+	int ret = 0;
+	bool adjust_polarity = false; /* ??? */
+
+	if (pi->caps_power_containment &&
+	    (pi->power_containment_features & POWERCONTAINMENT_FEATURE_BAPM)) {
+		adjust_percent = adjust_polarity ?
+			rdev->pm.dpm.tdp_adjustment : (-1 * rdev->pm.dpm.tdp_adjustment);
+		target_tdp = ((100 + adjust_percent) *
+			      (s32)cac_tdp_table->configurable_tdp) / 100;
+		target_tdp *= 256;
+
+		ret = ci_set_overdrive_target_tdp(rdev, (u32)target_tdp);
+	}
+
+	return ret;
+}
+
+void ci_dpm_powergate_uvd(struct radeon_device *rdev, bool gate)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+
+	if (pi->uvd_power_gated == gate)
+		return;
+
+	pi->uvd_power_gated = gate;
+
+	ci_update_uvd_dpm(rdev, gate);
+}
+
+bool ci_dpm_vblank_too_short(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	u32 vblank_time = r600_dpm_get_vblank_time(rdev);
+	u32 switch_limit = pi->mem_gddr5 ? 450 : 300;
+
+	if (vblank_time < switch_limit)
+		return true;
+	else
+		return false;
+
+}
+
+static void ci_apply_state_adjust_rules(struct radeon_device *rdev,
+					struct radeon_ps *rps)
+{
+	struct ci_ps *ps = ci_get_ps(rps);
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	struct radeon_clock_and_voltage_limits *max_limits;
+	bool disable_mclk_switching;
+	u32 sclk, mclk;
+	int i;
+
+	if ((rdev->pm.dpm.new_active_crtc_count > 1) ||
+	    ci_dpm_vblank_too_short(rdev))
+		disable_mclk_switching = true;
+	else
+		disable_mclk_switching = false;
+
+	if ((rps->class & ATOM_PPLIB_CLASSIFICATION_UI_MASK) == ATOM_PPLIB_CLASSIFICATION_UI_BATTERY)
+		pi->battery_state = true;
+	else
+		pi->battery_state = false;
+
+	if (rdev->pm.dpm.ac_power)
+		max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
+	else
+		max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc;
+
+	if (rdev->pm.dpm.ac_power == false) {
+		for (i = 0; i < ps->performance_level_count; i++) {
+			if (ps->performance_levels[i].mclk > max_limits->mclk)
+				ps->performance_levels[i].mclk = max_limits->mclk;
+			if (ps->performance_levels[i].sclk > max_limits->sclk)
+				ps->performance_levels[i].sclk = max_limits->sclk;
+		}
+	}
+
+	/* XXX validate the min clocks required for display */
+
+	if (disable_mclk_switching) {
+		mclk  = ps->performance_levels[ps->performance_level_count - 1].mclk;
+		sclk = ps->performance_levels[0].sclk;
+	} else {
+		mclk = ps->performance_levels[0].mclk;
+		sclk = ps->performance_levels[0].sclk;
+	}
+
+	ps->performance_levels[0].sclk = sclk;
+	ps->performance_levels[0].mclk = mclk;
+
+	if (ps->performance_levels[1].sclk < ps->performance_levels[0].sclk)
+		ps->performance_levels[1].sclk = ps->performance_levels[0].sclk;
+
+	if (disable_mclk_switching) {
+		if (ps->performance_levels[0].mclk < ps->performance_levels[1].mclk)
+			ps->performance_levels[0].mclk = ps->performance_levels[1].mclk;
+	} else {
+		if (ps->performance_levels[1].mclk < ps->performance_levels[0].mclk)
+			ps->performance_levels[1].mclk = ps->performance_levels[0].mclk;
+	}
+}
+
+static int ci_set_thermal_temperature_range(struct radeon_device *rdev,
+					    int min_temp, int max_temp)
+{
+	int low_temp = 0 * 1000;
+	int high_temp = 255 * 1000;
+	u32 tmp;
+
+	if (low_temp < min_temp)
+		low_temp = min_temp;
+	if (high_temp > max_temp)
+		high_temp = max_temp;
+	if (high_temp < low_temp) {
+		DRM_ERROR("invalid thermal range: %d - %d\n", low_temp, high_temp);
+		return -EINVAL;
+	}
+
+	tmp = RREG32_SMC(CG_THERMAL_INT);
+	tmp &= ~(CI_DIG_THERM_INTH_MASK | CI_DIG_THERM_INTL_MASK);
+	tmp |= CI_DIG_THERM_INTH(high_temp / 1000) |
+		CI_DIG_THERM_INTL(low_temp / 1000);
+	WREG32_SMC(CG_THERMAL_INT, tmp);
+
+#if 0
+	/* XXX: need to figure out how to handle this properly */
+	tmp = RREG32_SMC(CG_THERMAL_CTRL);
+	tmp &= DIG_THERM_DPM_MASK;
+	tmp |= DIG_THERM_DPM(high_temp / 1000);
+	WREG32_SMC(CG_THERMAL_CTRL, tmp);
+#endif
+
+	return 0;
+}
+
+#if 0
+static int ci_read_smc_soft_register(struct radeon_device *rdev,
+				     u16 reg_offset, u32 *value)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+
+	return ci_read_smc_sram_dword(rdev,
+				      pi->soft_regs_start + reg_offset,
+				      value, pi->sram_end);
+}
+#endif
+
+static int ci_write_smc_soft_register(struct radeon_device *rdev,
+				      u16 reg_offset, u32 value)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+
+	return ci_write_smc_sram_dword(rdev,
+				       pi->soft_regs_start + reg_offset,
+				       value, pi->sram_end);
+}
+
+static void ci_init_fps_limits(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	SMU7_Discrete_DpmTable *table = &pi->smc_state_table;
+
+	if (pi->caps_fps) {
+		u16 tmp;
+
+		tmp = 45;
+		table->FpsHighT = cpu_to_be16(tmp);
+
+		tmp = 30;
+		table->FpsLowT = cpu_to_be16(tmp);
+	}
+}
+
+static int ci_update_sclk_t(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	int ret = 0;
+	u32 low_sclk_interrupt_t = 0;
+
+	if (pi->caps_sclk_throttle_low_notification) {
+		low_sclk_interrupt_t = cpu_to_be32(pi->low_sclk_interrupt_t);
+
+		ret = ci_copy_bytes_to_smc(rdev,
+					   pi->dpm_table_start +
+					   offsetof(SMU7_Discrete_DpmTable, LowSclkInterruptT),
+					   (u8 *)&low_sclk_interrupt_t,
+					   sizeof(u32), pi->sram_end);
+
+	}
+
+	return ret;
+}
+
+static void ci_get_leakage_voltages(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	u16 leakage_id, virtual_voltage_id;
+	u16 vddc, vddci;
+	int i;
+
+	pi->vddc_leakage.count = 0;
+	pi->vddci_leakage.count = 0;
+
+	if (radeon_atom_get_leakage_id_from_vbios(rdev, &leakage_id) == 0) {
+		for (i = 0; i < CISLANDS_MAX_LEAKAGE_COUNT; i++) {
+			virtual_voltage_id = ATOM_VIRTUAL_VOLTAGE_ID0 + i;
+			if (radeon_atom_get_leakage_vddc_based_on_leakage_params(rdev, &vddc, &vddci,
+										 virtual_voltage_id,
+										 leakage_id) == 0) {
+				if (vddc != 0 && vddc != virtual_voltage_id) {
+					pi->vddc_leakage.actual_voltage[pi->vddc_leakage.count] = vddc;
+					pi->vddc_leakage.leakage_id[pi->vddc_leakage.count] = virtual_voltage_id;
+					pi->vddc_leakage.count++;
+				}
+				if (vddci != 0 && vddci != virtual_voltage_id) {
+					pi->vddci_leakage.actual_voltage[pi->vddci_leakage.count] = vddci;
+					pi->vddci_leakage.leakage_id[pi->vddci_leakage.count] = virtual_voltage_id;
+					pi->vddci_leakage.count++;
+				}
+			}
+		}
+	}
+}
+
+static void ci_set_dpm_event_sources(struct radeon_device *rdev, u32 sources)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	bool want_thermal_protection;
+	enum radeon_dpm_event_src dpm_event_src;
+	u32 tmp;
+
+	switch (sources) {
+	case 0:
+	default:
+		want_thermal_protection = false;
+		break;
+	case (1 << RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL):
+		want_thermal_protection = true;
+		dpm_event_src = RADEON_DPM_EVENT_SRC_DIGITAL;
+		break;
+	case (1 << RADEON_DPM_AUTO_THROTTLE_SRC_EXTERNAL):
+		want_thermal_protection = true;
+		dpm_event_src = RADEON_DPM_EVENT_SRC_EXTERNAL;
+		break;
+	case ((1 << RADEON_DPM_AUTO_THROTTLE_SRC_EXTERNAL) |
+	      (1 << RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL)):
+		want_thermal_protection = true;
+		dpm_event_src = RADEON_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL;
+		break;
+	}
+
+	if (want_thermal_protection) {
+#if 0
+		/* XXX: need to figure out how to handle this properly */
+		tmp = RREG32_SMC(CG_THERMAL_CTRL);
+		tmp &= DPM_EVENT_SRC_MASK;
+		tmp |= DPM_EVENT_SRC(dpm_event_src);
+		WREG32_SMC(CG_THERMAL_CTRL, tmp);
+#endif
+
+		tmp = RREG32_SMC(GENERAL_PWRMGT);
+		if (pi->thermal_protection)
+			tmp &= ~THERMAL_PROTECTION_DIS;
+		else
+			tmp |= THERMAL_PROTECTION_DIS;
+		WREG32_SMC(GENERAL_PWRMGT, tmp);
+	} else {
+		tmp = RREG32_SMC(GENERAL_PWRMGT);
+		tmp |= THERMAL_PROTECTION_DIS;
+		WREG32_SMC(GENERAL_PWRMGT, tmp);
+	}
+}
+
+static void ci_enable_auto_throttle_source(struct radeon_device *rdev,
+					   enum radeon_dpm_auto_throttle_src source,
+					   bool enable)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+
+	if (enable) {
+		if (!(pi->active_auto_throttle_sources & (1 << source))) {
+			pi->active_auto_throttle_sources |= 1 << source;
+			ci_set_dpm_event_sources(rdev, pi->active_auto_throttle_sources);
+		}
+	} else {
+		if (pi->active_auto_throttle_sources & (1 << source)) {
+			pi->active_auto_throttle_sources &= ~(1 << source);
+			ci_set_dpm_event_sources(rdev, pi->active_auto_throttle_sources);
+		}
+	}
+}
+
+static void ci_enable_vr_hot_gpio_interrupt(struct radeon_device *rdev)
+{
+	if (rdev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_REGULATOR_HOT)
+		ci_send_msg_to_smc(rdev, PPSMC_MSG_EnableVRHotGPIOInterrupt);
+}
+
+static int ci_unfreeze_sclk_mclk_dpm(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	PPSMC_Result smc_result;
+
+	if (!pi->need_update_smu7_dpm_table)
+		return 0;
+
+	if ((!pi->sclk_dpm_key_disabled) &&
+	    (pi->need_update_smu7_dpm_table & (DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_UPDATE_SCLK))) {
+		smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_SCLKDPM_UnfreezeLevel);
+		if (smc_result != PPSMC_Result_OK)
+			return -EINVAL;
+	}
+
+	if ((!pi->mclk_dpm_key_disabled) &&
+	    (pi->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK)) {
+		smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_MCLKDPM_UnfreezeLevel);
+		if (smc_result != PPSMC_Result_OK)
+			return -EINVAL;
+	}
+
+	pi->need_update_smu7_dpm_table = 0;
+	return 0;
+}
+
+static int ci_enable_sclk_mclk_dpm(struct radeon_device *rdev, bool enable)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	PPSMC_Result smc_result;
+
+	if (enable) {
+		if (!pi->sclk_dpm_key_disabled) {
+			smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_DPM_Enable);
+			if (smc_result != PPSMC_Result_OK)
+				return -EINVAL;
+		}
+
+		if (!pi->mclk_dpm_key_disabled) {
+			smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_MCLKDPM_Enable);
+			if (smc_result != PPSMC_Result_OK)
+				return -EINVAL;
+
+			WREG32_P(MC_SEQ_CNTL_3, CAC_EN, ~CAC_EN);
+
+			WREG32_SMC(LCAC_MC0_CNTL, 0x05);
+			WREG32_SMC(LCAC_MC1_CNTL, 0x05);
+			WREG32_SMC(LCAC_CPL_CNTL, 0x100005);
+
+			udelay(10);
+
+			WREG32_SMC(LCAC_MC0_CNTL, 0x400005);
+			WREG32_SMC(LCAC_MC1_CNTL, 0x400005);
+			WREG32_SMC(LCAC_CPL_CNTL, 0x500005);
+		}
+	} else {
+		if (!pi->sclk_dpm_key_disabled) {
+			smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_DPM_Disable);
+			if (smc_result != PPSMC_Result_OK)
+				return -EINVAL;
+		}
+
+		if (!pi->mclk_dpm_key_disabled) {
+			smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_MCLKDPM_Disable);
+			if (smc_result != PPSMC_Result_OK)
+				return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int ci_start_dpm(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	PPSMC_Result smc_result;
+	int ret;
+	u32 tmp;
+
+	tmp = RREG32_SMC(GENERAL_PWRMGT);
+	tmp |= GLOBAL_PWRMGT_EN;
+	WREG32_SMC(GENERAL_PWRMGT, tmp);
+
+	tmp = RREG32_SMC(SCLK_PWRMGT_CNTL);
+	tmp |= DYNAMIC_PM_EN;
+	WREG32_SMC(SCLK_PWRMGT_CNTL, tmp);
+
+	ci_write_smc_soft_register(rdev, offsetof(SMU7_SoftRegisters, VoltageChangeTimeout), 0x1000);
+
+	WREG32_P(BIF_LNCNT_RESET, 0, ~RESET_LNCNT_EN);
+
+	smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_Voltage_Cntl_Enable);
+	if (smc_result != PPSMC_Result_OK)
+		return -EINVAL;
+
+	ret = ci_enable_sclk_mclk_dpm(rdev, true);
+	if (ret)
+		return ret;
+
+	if (!pi->pcie_dpm_key_disabled) {
+		smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_PCIeDPM_Enable);
+		if (smc_result != PPSMC_Result_OK)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ci_freeze_sclk_mclk_dpm(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	PPSMC_Result smc_result;
+
+	if (!pi->need_update_smu7_dpm_table)
+		return 0;
+
+	if ((!pi->sclk_dpm_key_disabled) &&
+	    (pi->need_update_smu7_dpm_table & (DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_UPDATE_SCLK))) {
+		smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_SCLKDPM_FreezeLevel);
+		if (smc_result != PPSMC_Result_OK)
+			return -EINVAL;
+	}
+
+	if ((!pi->mclk_dpm_key_disabled) &&
+	    (pi->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK)) {
+		smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_MCLKDPM_FreezeLevel);
+		if (smc_result != PPSMC_Result_OK)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ci_stop_dpm(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	PPSMC_Result smc_result;
+	int ret;
+	u32 tmp;
+
+	tmp = RREG32_SMC(GENERAL_PWRMGT);
+	tmp &= ~GLOBAL_PWRMGT_EN;
+	WREG32_SMC(GENERAL_PWRMGT, tmp);
+
+	tmp = RREG32(SCLK_PWRMGT_CNTL);
+	tmp &= ~DYNAMIC_PM_EN;
+	WREG32_SMC(SCLK_PWRMGT_CNTL, tmp);
+
+	if (!pi->pcie_dpm_key_disabled) {
+		smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_PCIeDPM_Disable);
+		if (smc_result != PPSMC_Result_OK)
+			return -EINVAL;
+	}
+
+	ret = ci_enable_sclk_mclk_dpm(rdev, false);
+	if (ret)
+		return ret;
+
+	smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_Voltage_Cntl_Disable);
+	if (smc_result != PPSMC_Result_OK)
+		return -EINVAL;
+
+	return 0;
+}
+
+static void ci_enable_sclk_control(struct radeon_device *rdev, bool enable)
+{
+	u32 tmp = RREG32_SMC(SCLK_PWRMGT_CNTL);
+
+	if (enable)
+		tmp &= ~SCLK_PWRMGT_OFF;
+	else
+		tmp |= SCLK_PWRMGT_OFF;
+	WREG32_SMC(SCLK_PWRMGT_CNTL, tmp);
+}
+
+#if 0
+static int ci_notify_hw_of_power_source(struct radeon_device *rdev,
+					bool ac_power)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	struct radeon_cac_tdp_table *cac_tdp_table =
+		rdev->pm.dpm.dyn_state.cac_tdp_table;
+	u32 power_limit;
+
+	if (ac_power)
+		power_limit = (u32)(cac_tdp_table->maximum_power_delivery_limit * 256);
+	else
+		power_limit = (u32)(cac_tdp_table->battery_power_limit * 256);
+
+        ci_set_power_limit(rdev, power_limit);
+
+	if (pi->caps_automatic_dc_transition) {
+		if (ac_power)
+			ci_send_msg_to_smc(rdev, PPSMC_MSG_RunningOnAC);
+		else
+			ci_send_msg_to_smc(rdev, PPSMC_MSG_Remove_DC_Clamp);
+	}
+
+	return 0;
+}
+#endif
+
+static PPSMC_Result ci_send_msg_to_smc_with_parameter(struct radeon_device *rdev,
+						      PPSMC_Msg msg, u32 parameter)
+{
+	WREG32(SMC_MSG_ARG_0, parameter);
+	return ci_send_msg_to_smc(rdev, msg);
+}
+
+static PPSMC_Result ci_send_msg_to_smc_return_parameter(struct radeon_device *rdev,
+							PPSMC_Msg msg, u32 *parameter)
+{
+	PPSMC_Result smc_result;
+
+	smc_result = ci_send_msg_to_smc(rdev, msg);
+
+	if ((smc_result == PPSMC_Result_OK) && parameter)
+		*parameter = RREG32(SMC_MSG_ARG_0);
+
+	return smc_result;
+}
+
+static int ci_dpm_force_state_sclk(struct radeon_device *rdev, u32 n)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+
+	if (!pi->sclk_dpm_key_disabled) {
+		PPSMC_Result smc_result =
+			ci_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_DPM_ForceState, n);
+		if (smc_result != PPSMC_Result_OK)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ci_dpm_force_state_mclk(struct radeon_device *rdev, u32 n)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+
+	if (!pi->mclk_dpm_key_disabled) {
+		PPSMC_Result smc_result =
+			ci_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_MCLKDPM_ForceState, n);
+		if (smc_result != PPSMC_Result_OK)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ci_dpm_force_state_pcie(struct radeon_device *rdev, u32 n)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+
+	if (!pi->pcie_dpm_key_disabled) {
+		PPSMC_Result smc_result =
+			ci_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_PCIeDPM_ForceLevel, n);
+		if (smc_result != PPSMC_Result_OK)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ci_set_power_limit(struct radeon_device *rdev, u32 n)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+
+	if (pi->power_containment_features & POWERCONTAINMENT_FEATURE_PkgPwrLimit) {
+		PPSMC_Result smc_result =
+			ci_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_PkgPwrSetLimit, n);
+		if (smc_result != PPSMC_Result_OK)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ci_set_overdrive_target_tdp(struct radeon_device *rdev,
+				       u32 target_tdp)
+{
+	PPSMC_Result smc_result =
+		ci_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_OverDriveSetTargetTdp, target_tdp);
+	if (smc_result != PPSMC_Result_OK)
+		return -EINVAL;
+	return 0;
+}
+
+static int ci_set_boot_state(struct radeon_device *rdev)
+{
+	return ci_enable_sclk_mclk_dpm(rdev, false);
+}
+
+static u32 ci_get_average_sclk_freq(struct radeon_device *rdev)
+{
+	u32 sclk_freq;
+	PPSMC_Result smc_result =
+		ci_send_msg_to_smc_return_parameter(rdev,
+						    PPSMC_MSG_API_GetSclkFrequency,
+						    &sclk_freq);
+	if (smc_result != PPSMC_Result_OK)
+		sclk_freq = 0;
+
+	return sclk_freq;
+}
+
+static u32 ci_get_average_mclk_freq(struct radeon_device *rdev)
+{
+	u32 mclk_freq;
+	PPSMC_Result smc_result =
+		ci_send_msg_to_smc_return_parameter(rdev,
+						    PPSMC_MSG_API_GetMclkFrequency,
+						    &mclk_freq);
+	if (smc_result != PPSMC_Result_OK)
+		mclk_freq = 0;
+
+	return mclk_freq;
+}
+
+static void ci_dpm_start_smc(struct radeon_device *rdev)
+{
+	int i;
+
+	ci_program_jump_on_start(rdev);
+	ci_start_smc_clock(rdev);
+	ci_start_smc(rdev);
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		if (RREG32_SMC(FIRMWARE_FLAGS) & INTERRUPTS_ENABLED)
+			break;
+	}
+}
+
+static void ci_dpm_stop_smc(struct radeon_device *rdev)
+{
+	ci_reset_smc(rdev);
+	ci_stop_smc_clock(rdev);
+}
+
+static int ci_process_firmware_header(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	u32 tmp;
+	int ret;
+
+	ret = ci_read_smc_sram_dword(rdev,
+				     SMU7_FIRMWARE_HEADER_LOCATION +
+				     offsetof(SMU7_Firmware_Header, DpmTable),
+				     &tmp, pi->sram_end);
+	if (ret)
+		return ret;
+
+	pi->dpm_table_start = tmp;
+
+	ret = ci_read_smc_sram_dword(rdev,
+				     SMU7_FIRMWARE_HEADER_LOCATION +
+				     offsetof(SMU7_Firmware_Header, SoftRegisters),
+				     &tmp, pi->sram_end);
+	if (ret)
+		return ret;
+
+	pi->soft_regs_start = tmp;
+
+	ret = ci_read_smc_sram_dword(rdev,
+				     SMU7_FIRMWARE_HEADER_LOCATION +
+				     offsetof(SMU7_Firmware_Header, mcRegisterTable),
+				     &tmp, pi->sram_end);
+	if (ret)
+		return ret;
+
+	pi->mc_reg_table_start = tmp;
+
+	ret = ci_read_smc_sram_dword(rdev,
+				     SMU7_FIRMWARE_HEADER_LOCATION +
+				     offsetof(SMU7_Firmware_Header, FanTable),
+				     &tmp, pi->sram_end);
+	if (ret)
+		return ret;
+
+	pi->fan_table_start = tmp;
+
+	ret = ci_read_smc_sram_dword(rdev,
+				     SMU7_FIRMWARE_HEADER_LOCATION +
+				     offsetof(SMU7_Firmware_Header, mcArbDramTimingTable),
+				     &tmp, pi->sram_end);
+	if (ret)
+		return ret;
+
+	pi->arb_table_start = tmp;
+
+	return 0;
+}
+
+static void ci_read_clock_registers(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+
+	pi->clock_registers.cg_spll_func_cntl =
+		RREG32_SMC(CG_SPLL_FUNC_CNTL);
+	pi->clock_registers.cg_spll_func_cntl_2 =
+		RREG32_SMC(CG_SPLL_FUNC_CNTL_2);
+	pi->clock_registers.cg_spll_func_cntl_3 =
+		RREG32_SMC(CG_SPLL_FUNC_CNTL_3);
+	pi->clock_registers.cg_spll_func_cntl_4 =
+		RREG32_SMC(CG_SPLL_FUNC_CNTL_4);
+	pi->clock_registers.cg_spll_spread_spectrum =
+		RREG32_SMC(CG_SPLL_SPREAD_SPECTRUM);
+	pi->clock_registers.cg_spll_spread_spectrum_2 =
+		RREG32_SMC(CG_SPLL_SPREAD_SPECTRUM_2);
+	pi->clock_registers.dll_cntl = RREG32(DLL_CNTL);
+	pi->clock_registers.mclk_pwrmgt_cntl = RREG32(MCLK_PWRMGT_CNTL);
+	pi->clock_registers.mpll_ad_func_cntl = RREG32(MPLL_AD_FUNC_CNTL);
+	pi->clock_registers.mpll_dq_func_cntl = RREG32(MPLL_DQ_FUNC_CNTL);
+	pi->clock_registers.mpll_func_cntl = RREG32(MPLL_FUNC_CNTL);
+	pi->clock_registers.mpll_func_cntl_1 = RREG32(MPLL_FUNC_CNTL_1);
+	pi->clock_registers.mpll_func_cntl_2 = RREG32(MPLL_FUNC_CNTL_2);
+	pi->clock_registers.mpll_ss1 = RREG32(MPLL_SS1);
+	pi->clock_registers.mpll_ss2 = RREG32(MPLL_SS2);
+}
+
+static void ci_init_sclk_t(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+
+	pi->low_sclk_interrupt_t = 0;
+}
+
+static void ci_enable_thermal_protection(struct radeon_device *rdev,
+					 bool enable)
+{
+	u32 tmp = RREG32_SMC(GENERAL_PWRMGT);
+
+	if (enable)
+		tmp &= ~THERMAL_PROTECTION_DIS;
+	else
+		tmp |= THERMAL_PROTECTION_DIS;
+	WREG32_SMC(GENERAL_PWRMGT, tmp);
+}
+
+static void ci_enable_acpi_power_management(struct radeon_device *rdev)
+{
+	u32 tmp = RREG32_SMC(GENERAL_PWRMGT);
+
+	tmp |= STATIC_PM_EN;
+
+	WREG32_SMC(GENERAL_PWRMGT, tmp);
+}
+
+#if 0
+static int ci_enter_ulp_state(struct radeon_device *rdev)
+{
+
+	WREG32(SMC_MESSAGE_0, PPSMC_MSG_SwitchToMinimumPower);
+
+	udelay(25000);
+
+	return 0;
+}
+
+static int ci_exit_ulp_state(struct radeon_device *rdev)
+{
+	int i;
+
+	WREG32(SMC_MESSAGE_0, PPSMC_MSG_ResumeFromMinimumPower);
+
+	udelay(7000);
+
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		if (RREG32(SMC_RESP_0) == 1)
+			break;
+		udelay(1000);
+	}
+
+	return 0;
+}
+#endif
+
+static int ci_notify_smc_display_change(struct radeon_device *rdev,
+					bool has_display)
+{
+	PPSMC_Msg msg = has_display ? PPSMC_MSG_HasDisplay : PPSMC_MSG_NoDisplay;
+
+	return (ci_send_msg_to_smc(rdev, msg) == PPSMC_Result_OK) ?  0 : -EINVAL;
+}
+
+static int ci_enable_ds_master_switch(struct radeon_device *rdev,
+				      bool enable)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+
+	if (enable) {
+		if (pi->caps_sclk_ds) {
+			if (ci_send_msg_to_smc(rdev, PPSMC_MSG_MASTER_DeepSleep_ON) != PPSMC_Result_OK)
+				return -EINVAL;
+		} else {
+			if (ci_send_msg_to_smc(rdev, PPSMC_MSG_MASTER_DeepSleep_OFF) != PPSMC_Result_OK)
+				return -EINVAL;
+		}
+	} else {
+		if (pi->caps_sclk_ds) {
+			if (ci_send_msg_to_smc(rdev, PPSMC_MSG_MASTER_DeepSleep_OFF) != PPSMC_Result_OK)
+				return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static void ci_program_display_gap(struct radeon_device *rdev)
+{
+	u32 tmp = RREG32_SMC(CG_DISPLAY_GAP_CNTL);
+	u32 pre_vbi_time_in_us;
+	u32 frame_time_in_us;
+	u32 ref_clock = rdev->clock.spll.reference_freq;
+	u32 refresh_rate = r600_dpm_get_vrefresh(rdev);
+	u32 vblank_time = r600_dpm_get_vblank_time(rdev);
+
+	tmp &= ~DISP_GAP_MASK;
+	if (rdev->pm.dpm.new_active_crtc_count > 0)
+		tmp |= DISP_GAP(R600_PM_DISPLAY_GAP_VBLANK_OR_WM);
+	else
+		tmp |= DISP_GAP(R600_PM_DISPLAY_GAP_IGNORE);
+	WREG32_SMC(CG_DISPLAY_GAP_CNTL, tmp);
+
+	if (refresh_rate == 0)
+		refresh_rate = 60;
+	if (vblank_time == 0xffffffff)
+		vblank_time = 500;
+	frame_time_in_us = 1000000 / refresh_rate;
+	pre_vbi_time_in_us =
+		frame_time_in_us - 200 - vblank_time;
+	tmp = pre_vbi_time_in_us * (ref_clock / 100);
+
+	WREG32_SMC(CG_DISPLAY_GAP_CNTL2, tmp);
+	ci_write_smc_soft_register(rdev, offsetof(SMU7_SoftRegisters, PreVBlankGap), 0x64);
+	ci_write_smc_soft_register(rdev, offsetof(SMU7_SoftRegisters, VBlankTimeout), (frame_time_in_us - pre_vbi_time_in_us));
+
+
+	ci_notify_smc_display_change(rdev, (rdev->pm.dpm.new_active_crtc_count == 1));
+
+}
+
+static void ci_enable_spread_spectrum(struct radeon_device *rdev, bool enable)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	u32 tmp;
+
+	if (enable) {
+		if (pi->caps_sclk_ss_support) {
+			tmp = RREG32_SMC(GENERAL_PWRMGT);
+			tmp |= DYN_SPREAD_SPECTRUM_EN;
+			WREG32_SMC(GENERAL_PWRMGT, tmp);
+		}
+	} else {
+		tmp = RREG32_SMC(CG_SPLL_SPREAD_SPECTRUM);
+		tmp &= ~SSEN;
+		WREG32_SMC(CG_SPLL_SPREAD_SPECTRUM, tmp);
+
+		tmp = RREG32_SMC(GENERAL_PWRMGT);
+		tmp &= ~DYN_SPREAD_SPECTRUM_EN;
+		WREG32_SMC(GENERAL_PWRMGT, tmp);
+	}
+}
+
+static void ci_program_sstp(struct radeon_device *rdev)
+{
+	WREG32_SMC(CG_SSP, (SSTU(R600_SSTU_DFLT) | SST(R600_SST_DFLT)));
+}
+
+static void ci_enable_display_gap(struct radeon_device *rdev)
+{
+	u32 tmp = RREG32_SMC(CG_DISPLAY_GAP_CNTL);
+
+        tmp &= ~(DISP_GAP_MASK | DISP_GAP_MCHG_MASK);
+        tmp |= (DISP_GAP(R600_PM_DISPLAY_GAP_IGNORE) |
+                DISP_GAP_MCHG(R600_PM_DISPLAY_GAP_VBLANK));
+
+	WREG32_SMC(CG_DISPLAY_GAP_CNTL, tmp);
+}
+
+static void ci_program_vc(struct radeon_device *rdev)
+{
+	u32 tmp;
+
+	tmp = RREG32_SMC(SCLK_PWRMGT_CNTL);
+	tmp &= ~(RESET_SCLK_CNT | RESET_BUSY_CNT);
+	WREG32_SMC(SCLK_PWRMGT_CNTL, tmp);
+
+	WREG32_SMC(CG_FTV_0, CISLANDS_VRC_DFLT0);
+	WREG32_SMC(CG_FTV_1, CISLANDS_VRC_DFLT1);
+	WREG32_SMC(CG_FTV_2, CISLANDS_VRC_DFLT2);
+	WREG32_SMC(CG_FTV_3, CISLANDS_VRC_DFLT3);
+	WREG32_SMC(CG_FTV_4, CISLANDS_VRC_DFLT4);
+	WREG32_SMC(CG_FTV_5, CISLANDS_VRC_DFLT5);
+	WREG32_SMC(CG_FTV_6, CISLANDS_VRC_DFLT6);
+	WREG32_SMC(CG_FTV_7, CISLANDS_VRC_DFLT7);
+}
+
+static void ci_clear_vc(struct radeon_device *rdev)
+{
+	u32 tmp;
+
+	tmp = RREG32_SMC(SCLK_PWRMGT_CNTL);
+	tmp |= (RESET_SCLK_CNT | RESET_BUSY_CNT);
+	WREG32_SMC(SCLK_PWRMGT_CNTL, tmp);
+
+	WREG32_SMC(CG_FTV_0, 0);
+	WREG32_SMC(CG_FTV_1, 0);
+	WREG32_SMC(CG_FTV_2, 0);
+	WREG32_SMC(CG_FTV_3, 0);
+	WREG32_SMC(CG_FTV_4, 0);
+	WREG32_SMC(CG_FTV_5, 0);
+	WREG32_SMC(CG_FTV_6, 0);
+	WREG32_SMC(CG_FTV_7, 0);
+}
+
+static int ci_upload_firmware(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	int i, ret;
+
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		if (RREG32_SMC(RCU_UC_EVENTS) & BOOT_SEQ_DONE)
+			break;
+	}
+	WREG32_SMC(SMC_SYSCON_MISC_CNTL, 1);
+
+	ci_stop_smc_clock(rdev);
+	ci_reset_smc(rdev);
+
+	ret = ci_load_smc_ucode(rdev, pi->sram_end);
+
+	return ret;
+
+}
+
+static int ci_get_svi2_voltage_table(struct radeon_device *rdev,
+				     struct radeon_clock_voltage_dependency_table *voltage_dependency_table,
+				     struct atom_voltage_table *voltage_table)
+{
+	u32 i;
+
+	if (voltage_dependency_table == NULL)
+		return -EINVAL;
+
+	voltage_table->mask_low = 0;
+	voltage_table->phase_delay = 0;
+
+	voltage_table->count = voltage_dependency_table->count;
+	for (i = 0; i < voltage_table->count; i++) {
+		voltage_table->entries[i].value = voltage_dependency_table->entries[i].v;
+		voltage_table->entries[i].smio_low = 0;
+	}
+
+	return 0;
+}
+
+static int ci_construct_voltage_tables(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	int ret;
+
+	if (pi->voltage_control == CISLANDS_VOLTAGE_CONTROL_BY_GPIO) {
+		ret = radeon_atom_get_voltage_table(rdev, VOLTAGE_TYPE_VDDC,
+						    VOLTAGE_OBJ_GPIO_LUT,
+						    &pi->vddc_voltage_table);
+		if (ret)
+			return ret;
+	} else if (pi->voltage_control == CISLANDS_VOLTAGE_CONTROL_BY_SVID2) {
+		ret = ci_get_svi2_voltage_table(rdev,
+						&rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk,
+						&pi->vddc_voltage_table);
+		if (ret)
+			return ret;
+	}
+
+	if (pi->vddc_voltage_table.count > SMU7_MAX_LEVELS_VDDC)
+		si_trim_voltage_table_to_fit_state_table(rdev, SMU7_MAX_LEVELS_VDDC,
+							 &pi->vddc_voltage_table);
+
+	if (pi->vddci_control == CISLANDS_VOLTAGE_CONTROL_BY_GPIO) {
+		ret = radeon_atom_get_voltage_table(rdev, VOLTAGE_TYPE_VDDCI,
+						    VOLTAGE_OBJ_GPIO_LUT,
+						    &pi->vddci_voltage_table);
+		if (ret)
+			return ret;
+	} else if (pi->vddci_control == CISLANDS_VOLTAGE_CONTROL_BY_SVID2) {
+		ret = ci_get_svi2_voltage_table(rdev,
+						&rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk,
+						&pi->vddci_voltage_table);
+		if (ret)
+			return ret;
+	}
+
+	if (pi->vddci_voltage_table.count > SMU7_MAX_LEVELS_VDDCI)
+		si_trim_voltage_table_to_fit_state_table(rdev, SMU7_MAX_LEVELS_VDDCI,
+							 &pi->vddci_voltage_table);
+
+	if (pi->mvdd_control == CISLANDS_VOLTAGE_CONTROL_BY_GPIO) {
+		ret = radeon_atom_get_voltage_table(rdev, VOLTAGE_TYPE_MVDDC,
+						    VOLTAGE_OBJ_GPIO_LUT,
+						    &pi->mvdd_voltage_table);
+		if (ret)
+			return ret;
+	} else if (pi->mvdd_control == CISLANDS_VOLTAGE_CONTROL_BY_SVID2) {
+		ret = ci_get_svi2_voltage_table(rdev,
+						&rdev->pm.dpm.dyn_state.mvdd_dependency_on_mclk,
+						&pi->mvdd_voltage_table);
+		if (ret)
+			return ret;
+	}
+
+	if (pi->mvdd_voltage_table.count > SMU7_MAX_LEVELS_MVDD)
+		si_trim_voltage_table_to_fit_state_table(rdev, SMU7_MAX_LEVELS_MVDD,
+							 &pi->mvdd_voltage_table);
+
+	return 0;
+}
+
+static void ci_populate_smc_voltage_table(struct radeon_device *rdev,
+					  struct atom_voltage_table_entry *voltage_table,
+					  SMU7_Discrete_VoltageLevel *smc_voltage_table)
+{
+	int ret;
+
+	ret = ci_get_std_voltage_value_sidd(rdev, voltage_table,
+					    &smc_voltage_table->StdVoltageHiSidd,
+					    &smc_voltage_table->StdVoltageLoSidd);
+
+	if (ret) {
+		smc_voltage_table->StdVoltageHiSidd = voltage_table->value * VOLTAGE_SCALE;
+		smc_voltage_table->StdVoltageLoSidd = voltage_table->value * VOLTAGE_SCALE;
+	}
+
+	smc_voltage_table->Voltage = cpu_to_be16(voltage_table->value * VOLTAGE_SCALE);
+	smc_voltage_table->StdVoltageHiSidd =
+		cpu_to_be16(smc_voltage_table->StdVoltageHiSidd);
+	smc_voltage_table->StdVoltageLoSidd =
+		cpu_to_be16(smc_voltage_table->StdVoltageLoSidd);
+}
+
+static int ci_populate_smc_vddc_table(struct radeon_device *rdev,
+				      SMU7_Discrete_DpmTable *table)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	unsigned int count;
+
+	table->VddcLevelCount = pi->vddc_voltage_table.count;
+	for (count = 0; count < table->VddcLevelCount; count++) {
+		ci_populate_smc_voltage_table(rdev,
+					      &pi->vddc_voltage_table.entries[count],
+					      &table->VddcLevel[count]);
+
+		if (pi->voltage_control == CISLANDS_VOLTAGE_CONTROL_BY_GPIO)
+			table->VddcLevel[count].Smio |=
+				pi->vddc_voltage_table.entries[count].smio_low;
+		else
+			table->VddcLevel[count].Smio = 0;
+	}
+	table->VddcLevelCount = cpu_to_be32(table->VddcLevelCount);
+
+	return 0;
+}
+
+static int ci_populate_smc_vddci_table(struct radeon_device *rdev,
+				       SMU7_Discrete_DpmTable *table)
+{
+	unsigned int count;
+	struct ci_power_info *pi = ci_get_pi(rdev);
+
+	table->VddciLevelCount = pi->vddci_voltage_table.count;
+	for (count = 0; count < table->VddciLevelCount; count++) {
+		ci_populate_smc_voltage_table(rdev,
+					      &pi->vddci_voltage_table.entries[count],
+					      &table->VddciLevel[count]);
+
+		if (pi->vddci_control == CISLANDS_VOLTAGE_CONTROL_BY_GPIO)
+			table->VddciLevel[count].Smio |=
+				pi->vddci_voltage_table.entries[count].smio_low;
+		else
+			table->VddciLevel[count].Smio = 0;
+	}
+	table->VddciLevelCount = cpu_to_be32(table->VddciLevelCount);
+
+	return 0;
+}
+
+static int ci_populate_smc_mvdd_table(struct radeon_device *rdev,
+				      SMU7_Discrete_DpmTable *table)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	unsigned int count;
+
+	table->MvddLevelCount = pi->mvdd_voltage_table.count;
+	for (count = 0; count < table->MvddLevelCount; count++) {
+		ci_populate_smc_voltage_table(rdev,
+					      &pi->mvdd_voltage_table.entries[count],
+					      &table->MvddLevel[count]);
+
+		if (pi->mvdd_control == CISLANDS_VOLTAGE_CONTROL_BY_GPIO)
+			table->MvddLevel[count].Smio |=
+				pi->mvdd_voltage_table.entries[count].smio_low;
+		else
+			table->MvddLevel[count].Smio = 0;
+	}
+	table->MvddLevelCount = cpu_to_be32(table->MvddLevelCount);
+
+	return 0;
+}
+
+static int ci_populate_smc_voltage_tables(struct radeon_device *rdev,
+					  SMU7_Discrete_DpmTable *table)
+{
+	int ret;
+
+	ret = ci_populate_smc_vddc_table(rdev, table);
+	if (ret)
+		return ret;
+
+	ret = ci_populate_smc_vddci_table(rdev, table);
+	if (ret)
+		return ret;
+
+	ret = ci_populate_smc_mvdd_table(rdev, table);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int ci_populate_mvdd_value(struct radeon_device *rdev, u32 mclk,
+				  SMU7_Discrete_VoltageLevel *voltage)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	u32 i = 0;
+
+	if (pi->mvdd_control != CISLANDS_VOLTAGE_CONTROL_NONE) {
+		for (i = 0; i < rdev->pm.dpm.dyn_state.mvdd_dependency_on_mclk.count; i++) {
+			if (mclk <= rdev->pm.dpm.dyn_state.mvdd_dependency_on_mclk.entries[i].clk) {
+				voltage->Voltage = pi->mvdd_voltage_table.entries[i].value;
+				break;
+			}
+		}
+
+		if (i >= rdev->pm.dpm.dyn_state.mvdd_dependency_on_mclk.count)
+			return -EINVAL;
+	}
+
+	return -EINVAL;
+}
+
+static int ci_get_std_voltage_value_sidd(struct radeon_device *rdev,
+					 struct atom_voltage_table_entry *voltage_table,
+					 u16 *std_voltage_hi_sidd, u16 *std_voltage_lo_sidd)
+{
+	u16 v_index, idx;
+	bool voltage_found = false;
+	*std_voltage_hi_sidd = voltage_table->value * VOLTAGE_SCALE;
+	*std_voltage_lo_sidd = voltage_table->value * VOLTAGE_SCALE;
+
+	if (rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries == NULL)
+		return -EINVAL;
+
+	if (rdev->pm.dpm.dyn_state.cac_leakage_table.entries) {
+		for (v_index = 0; (u32)v_index < rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.count; v_index++) {
+			if (voltage_table->value ==
+			    rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[v_index].v) {
+				voltage_found = true;
+				if ((u32)v_index < rdev->pm.dpm.dyn_state.cac_leakage_table.count)
+					idx = v_index;
+				else
+					idx = rdev->pm.dpm.dyn_state.cac_leakage_table.count - 1;
+				*std_voltage_lo_sidd =
+					rdev->pm.dpm.dyn_state.cac_leakage_table.entries[idx].vddc * VOLTAGE_SCALE;
+				*std_voltage_hi_sidd =
+					rdev->pm.dpm.dyn_state.cac_leakage_table.entries[idx].leakage * VOLTAGE_SCALE;
+				break;
+			}
+		}
+
+		if (!voltage_found) {
+			for (v_index = 0; (u32)v_index < rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.count; v_index++) {
+				if (voltage_table->value <=
+				    rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[v_index].v) {
+					voltage_found = true;
+					if ((u32)v_index < rdev->pm.dpm.dyn_state.cac_leakage_table.count)
+						idx = v_index;
+					else
+						idx = rdev->pm.dpm.dyn_state.cac_leakage_table.count - 1;
+					*std_voltage_lo_sidd =
+						rdev->pm.dpm.dyn_state.cac_leakage_table.entries[idx].vddc * VOLTAGE_SCALE;
+					*std_voltage_hi_sidd =
+						rdev->pm.dpm.dyn_state.cac_leakage_table.entries[idx].leakage * VOLTAGE_SCALE;
+					break;
+				}
+			}
+		}
+	}
+
+	return 0;
+}
+
+static void ci_populate_phase_value_based_on_sclk(struct radeon_device *rdev,
+						  const struct radeon_phase_shedding_limits_table *limits,
+						  u32 sclk,
+						  u32 *phase_shedding)
+{
+	unsigned int i;
+
+	*phase_shedding = 1;
+
+	for (i = 0; i < limits->count; i++) {
+		if (sclk < limits->entries[i].sclk) {
+			*phase_shedding = i;
+			break;
+		}
+	}
+}
+
+static void ci_populate_phase_value_based_on_mclk(struct radeon_device *rdev,
+						  const struct radeon_phase_shedding_limits_table *limits,
+						  u32 mclk,
+						  u32 *phase_shedding)
+{
+	unsigned int i;
+
+	*phase_shedding = 1;
+
+	for (i = 0; i < limits->count; i++) {
+		if (mclk < limits->entries[i].mclk) {
+			*phase_shedding = i;
+			break;
+		}
+	}
+}
+
+static int ci_init_arb_table_index(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	u32 tmp;
+	int ret;
+
+	ret = ci_read_smc_sram_dword(rdev, pi->arb_table_start,
+				     &tmp, pi->sram_end);
+	if (ret)
+		return ret;
+
+	tmp &= 0x00FFFFFF;
+	tmp |= MC_CG_ARB_FREQ_F1 << 24;
+
+	return ci_write_smc_sram_dword(rdev, pi->arb_table_start,
+				       tmp, pi->sram_end);
+}
+
+static int ci_get_dependency_volt_by_clk(struct radeon_device *rdev,
+					 struct radeon_clock_voltage_dependency_table *allowed_clock_voltage_table,
+					 u32 clock, u32 *voltage)
+{
+	u32 i = 0;
+
+	if (allowed_clock_voltage_table->count == 0)
+		return -EINVAL;
+
+	for (i = 0; i < allowed_clock_voltage_table->count; i++) {
+		if (allowed_clock_voltage_table->entries[i].clk >= clock) {
+			*voltage = allowed_clock_voltage_table->entries[i].v;
+			return 0;
+		}
+	}
+
+	*voltage = allowed_clock_voltage_table->entries[i-1].v;
+
+	return 0;
+}
+
+static u8 ci_get_sleep_divider_id_from_clock(struct radeon_device *rdev,
+					     u32 sclk, u32 min_sclk_in_sr)
+{
+	u32 i;
+	u32 tmp;
+	u32 min = (min_sclk_in_sr > CISLAND_MINIMUM_ENGINE_CLOCK) ?
+		min_sclk_in_sr : CISLAND_MINIMUM_ENGINE_CLOCK;
+
+	if (sclk < min)
+		return 0;
+
+	for (i = CISLAND_MAX_DEEPSLEEP_DIVIDER_ID;  ; i--) {
+		tmp = sclk / (1 << i);
+		if (tmp >= min || i == 0)
+			break;
+	}
+
+	return (u8)i;
+}
+
+static int ci_initial_switch_from_arb_f0_to_f1(struct radeon_device *rdev)
+{
+	return ni_copy_and_switch_arb_sets(rdev, MC_CG_ARB_FREQ_F0, MC_CG_ARB_FREQ_F1);
+}
+
+static int ci_reset_to_default(struct radeon_device *rdev)
+{
+	return (ci_send_msg_to_smc(rdev, PPSMC_MSG_ResetToDefaults) == PPSMC_Result_OK) ?
+		0 : -EINVAL;
+}
+
+static int ci_force_switch_to_arb_f0(struct radeon_device *rdev)
+{
+	u32 tmp;
+
+	tmp = (RREG32_SMC(SMC_SCRATCH9) & 0x0000ff00) >> 8;
+
+	if (tmp == MC_CG_ARB_FREQ_F0)
+		return 0;
+
+	return ni_copy_and_switch_arb_sets(rdev, tmp, MC_CG_ARB_FREQ_F0);
+}
+
+static int ci_populate_memory_timing_parameters(struct radeon_device *rdev,
+						u32 sclk,
+						u32 mclk,
+						SMU7_Discrete_MCArbDramTimingTableEntry *arb_regs)
+{
+	u32 dram_timing;
+	u32 dram_timing2;
+	u32 burst_time;
+
+	radeon_atom_set_engine_dram_timings(rdev, sclk, mclk);
+
+	dram_timing  = RREG32(MC_ARB_DRAM_TIMING);
+	dram_timing2 = RREG32(MC_ARB_DRAM_TIMING2);
+	burst_time = RREG32(MC_ARB_BURST_TIME) & STATE0_MASK;
+
+	arb_regs->McArbDramTiming  = cpu_to_be32(dram_timing);
+	arb_regs->McArbDramTiming2 = cpu_to_be32(dram_timing2);
+	arb_regs->McArbBurstTime = (u8)burst_time;
+
+	return 0;
+}
+
+static int ci_do_program_memory_timing_parameters(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	SMU7_Discrete_MCArbDramTimingTable arb_regs;
+	u32 i, j;
+	int ret =  0;
+
+	memset(&arb_regs, 0, sizeof(SMU7_Discrete_MCArbDramTimingTable));
+
+	for (i = 0; i < pi->dpm_table.sclk_table.count; i++) {
+		for (j = 0; j < pi->dpm_table.mclk_table.count; j++) {
+			ret = ci_populate_memory_timing_parameters(rdev,
+								   pi->dpm_table.sclk_table.dpm_levels[i].value,
+								   pi->dpm_table.mclk_table.dpm_levels[j].value,
+								   &arb_regs.entries[i][j]);
+			if (ret)
+				break;
+		}
+	}
+
+	if (ret == 0)
+		ret = ci_copy_bytes_to_smc(rdev,
+					   pi->arb_table_start,
+					   (u8 *)&arb_regs,
+					   sizeof(SMU7_Discrete_MCArbDramTimingTable),
+					   pi->sram_end);
+
+	return ret;
+}
+
+static int ci_program_memory_timing_parameters(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+
+	if (pi->need_update_smu7_dpm_table == 0)
+		return 0;
+
+	return ci_do_program_memory_timing_parameters(rdev);
+}
+
+static void ci_populate_smc_initial_state(struct radeon_device *rdev,
+					  struct radeon_ps *radeon_boot_state)
+{
+	struct ci_ps *boot_state = ci_get_ps(radeon_boot_state);
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	u32 level = 0;
+
+	for (level = 0; level < rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.count; level++) {
+		if (rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[level].clk >=
+		    boot_state->performance_levels[0].sclk) {
+			pi->smc_state_table.GraphicsBootLevel = level;
+			break;
+		}
+	}
+
+	for (level = 0; level < rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk.count; level++) {
+		if (rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk.entries[level].clk >=
+		    boot_state->performance_levels[0].mclk) {
+			pi->smc_state_table.MemoryBootLevel = level;
+			break;
+		}
+	}
+}
+
+static u32 ci_get_dpm_level_enable_mask_value(struct ci_single_dpm_table *dpm_table)
+{
+	u32 i;
+	u32 mask_value = 0;
+
+	for (i = dpm_table->count; i > 0; i--) {
+		mask_value = mask_value << 1;
+		if (dpm_table->dpm_levels[i-1].enabled)
+			mask_value |= 0x1;
+		else
+			mask_value &= 0xFFFFFFFE;
+	}
+
+	return mask_value;
+}
+
+static void ci_populate_smc_link_level(struct radeon_device *rdev,
+				       SMU7_Discrete_DpmTable *table)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	struct ci_dpm_table *dpm_table = &pi->dpm_table;
+	u32 i;
+
+	for (i = 0; i < dpm_table->pcie_speed_table.count; i++) {
+		table->LinkLevel[i].PcieGenSpeed =
+			(u8)dpm_table->pcie_speed_table.dpm_levels[i].value;
+		table->LinkLevel[i].PcieLaneCount =
+			r600_encode_pci_lane_width(dpm_table->pcie_speed_table.dpm_levels[i].param1);
+		table->LinkLevel[i].EnabledForActivity = 1;
+		table->LinkLevel[i].DownT = cpu_to_be32(5);
+		table->LinkLevel[i].UpT = cpu_to_be32(30);
+	}
+
+	pi->smc_state_table.LinkLevelCount = (u8)dpm_table->pcie_speed_table.count;
+	pi->dpm_level_enable_mask.pcie_dpm_enable_mask =
+		ci_get_dpm_level_enable_mask_value(&dpm_table->pcie_speed_table);
+}
+
+static int ci_populate_smc_uvd_level(struct radeon_device *rdev,
+				     SMU7_Discrete_DpmTable *table)
+{
+	u32 count;
+	struct atom_clock_dividers dividers;
+	int ret = -EINVAL;
+
+	table->UvdLevelCount =
+		rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.count;
+
+	for (count = 0; count < table->UvdLevelCount; count++) {
+		table->UvdLevel[count].VclkFrequency =
+			rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries[count].vclk;
+		table->UvdLevel[count].DclkFrequency =
+			rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries[count].dclk;
+		table->UvdLevel[count].MinVddc =
+			rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries[count].v * VOLTAGE_SCALE;
+		table->UvdLevel[count].MinVddcPhases = 1;
+
+		ret = radeon_atom_get_clock_dividers(rdev,
+						     COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
+						     table->UvdLevel[count].VclkFrequency, false, &dividers);
+		if (ret)
+			return ret;
+
+		table->UvdLevel[count].VclkDivider = (u8)dividers.post_divider;
+
+		ret = radeon_atom_get_clock_dividers(rdev,
+						     COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
+						     table->UvdLevel[count].DclkFrequency, false, &dividers);
+		if (ret)
+			return ret;
+
+		table->UvdLevel[count].DclkDivider = (u8)dividers.post_divider;
+
+		table->UvdLevel[count].VclkFrequency = cpu_to_be32(table->UvdLevel[count].VclkFrequency);
+		table->UvdLevel[count].DclkFrequency = cpu_to_be32(table->UvdLevel[count].DclkFrequency);
+		table->UvdLevel[count].MinVddc = cpu_to_be16(table->UvdLevel[count].MinVddc);
+	}
+
+	return ret;
+}
+
+static int ci_populate_smc_vce_level(struct radeon_device *rdev,
+				     SMU7_Discrete_DpmTable *table)
+{
+	u32 count;
+	struct atom_clock_dividers dividers;
+	int ret = -EINVAL;
+
+	table->VceLevelCount =
+		rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.count;
+
+	for (count = 0; count < table->VceLevelCount; count++) {
+		table->VceLevel[count].Frequency =
+			rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries[count].evclk;
+		table->VceLevel[count].MinVoltage =
+			(u16)rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries[count].v * VOLTAGE_SCALE;
+		table->VceLevel[count].MinPhases = 1;
+
+		ret = radeon_atom_get_clock_dividers(rdev,
+						     COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
+						     table->VceLevel[count].Frequency, false, &dividers);
+		if (ret)
+			return ret;
+
+		table->VceLevel[count].Divider = (u8)dividers.post_divider;
+
+		table->VceLevel[count].Frequency = cpu_to_be32(table->VceLevel[count].Frequency);
+		table->VceLevel[count].MinVoltage = cpu_to_be16(table->VceLevel[count].MinVoltage);
+	}
+
+	return ret;
+
+}
+
+static int ci_populate_smc_acp_level(struct radeon_device *rdev,
+				     SMU7_Discrete_DpmTable *table)
+{
+	u32 count;
+	struct atom_clock_dividers dividers;
+	int ret = -EINVAL;
+
+	table->AcpLevelCount = (u8)
+		(rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.count);
+
+	for (count = 0; count < table->AcpLevelCount; count++) {
+		table->AcpLevel[count].Frequency =
+			rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries[count].clk;
+		table->AcpLevel[count].MinVoltage =
+			rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries[count].v;
+		table->AcpLevel[count].MinPhases = 1;
+
+		ret = radeon_atom_get_clock_dividers(rdev,
+						     COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
+						     table->AcpLevel[count].Frequency, false, &dividers);
+		if (ret)
+			return ret;
+
+		table->AcpLevel[count].Divider = (u8)dividers.post_divider;
+
+		table->AcpLevel[count].Frequency = cpu_to_be32(table->AcpLevel[count].Frequency);
+		table->AcpLevel[count].MinVoltage = cpu_to_be16(table->AcpLevel[count].MinVoltage);
+	}
+
+	return ret;
+}
+
+static int ci_populate_smc_samu_level(struct radeon_device *rdev,
+				      SMU7_Discrete_DpmTable *table)
+{
+	u32 count;
+	struct atom_clock_dividers dividers;
+	int ret = -EINVAL;
+
+	table->SamuLevelCount =
+		rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.count;
+
+	for (count = 0; count < table->SamuLevelCount; count++) {
+		table->SamuLevel[count].Frequency =
+			rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries[count].clk;
+		table->SamuLevel[count].MinVoltage =
+			rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries[count].v * VOLTAGE_SCALE;
+		table->SamuLevel[count].MinPhases = 1;
+
+		ret = radeon_atom_get_clock_dividers(rdev,
+						     COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
+						     table->SamuLevel[count].Frequency, false, &dividers);
+		if (ret)
+			return ret;
+
+		table->SamuLevel[count].Divider = (u8)dividers.post_divider;
+
+		table->SamuLevel[count].Frequency = cpu_to_be32(table->SamuLevel[count].Frequency);
+		table->SamuLevel[count].MinVoltage = cpu_to_be16(table->SamuLevel[count].MinVoltage);
+	}
+
+	return ret;
+}
+
+static int ci_calculate_mclk_params(struct radeon_device *rdev,
+				    u32 memory_clock,
+				    SMU7_Discrete_MemoryLevel *mclk,
+				    bool strobe_mode,
+				    bool dll_state_on)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	u32  dll_cntl = pi->clock_registers.dll_cntl;
+	u32  mclk_pwrmgt_cntl = pi->clock_registers.mclk_pwrmgt_cntl;
+	u32  mpll_ad_func_cntl = pi->clock_registers.mpll_ad_func_cntl;
+	u32  mpll_dq_func_cntl = pi->clock_registers.mpll_dq_func_cntl;
+	u32  mpll_func_cntl = pi->clock_registers.mpll_func_cntl;
+	u32  mpll_func_cntl_1 = pi->clock_registers.mpll_func_cntl_1;
+	u32  mpll_func_cntl_2 = pi->clock_registers.mpll_func_cntl_2;
+	u32  mpll_ss1 = pi->clock_registers.mpll_ss1;
+	u32  mpll_ss2 = pi->clock_registers.mpll_ss2;
+	struct atom_mpll_param mpll_param;
+	int ret;
+
+	ret = radeon_atom_get_memory_pll_dividers(rdev, memory_clock, strobe_mode, &mpll_param);
+	if (ret)
+		return ret;
+
+	mpll_func_cntl &= ~BWCTRL_MASK;
+	mpll_func_cntl |= BWCTRL(mpll_param.bwcntl);
+
+	mpll_func_cntl_1 &= ~(CLKF_MASK | CLKFRAC_MASK | VCO_MODE_MASK);
+	mpll_func_cntl_1 |= CLKF(mpll_param.clkf) |
+		CLKFRAC(mpll_param.clkfrac) | VCO_MODE(mpll_param.vco_mode);
+
+	mpll_ad_func_cntl &= ~YCLK_POST_DIV_MASK;
+	mpll_ad_func_cntl |= YCLK_POST_DIV(mpll_param.post_div);
+
+	if (pi->mem_gddr5) {
+		mpll_dq_func_cntl &= ~(YCLK_SEL_MASK | YCLK_POST_DIV_MASK);
+		mpll_dq_func_cntl |= YCLK_SEL(mpll_param.yclk_sel) |
+			YCLK_POST_DIV(mpll_param.post_div);
+	}
+
+	if (pi->caps_mclk_ss_support) {
+		struct radeon_atom_ss ss;
+		u32 freq_nom;
+		u32 tmp;
+		u32 reference_clock = rdev->clock.mpll.reference_freq;
+
+		if (pi->mem_gddr5)
+			freq_nom = memory_clock * 4;
+		else
+			freq_nom = memory_clock * 2;
+
+		tmp = (freq_nom / reference_clock);
+		tmp = tmp * tmp;
+		if (radeon_atombios_get_asic_ss_info(rdev, &ss,
+						     ASIC_INTERNAL_MEMORY_SS, freq_nom)) {
+			u32 clks = reference_clock * 5 / ss.rate;
+			u32 clkv = (u32)((((131 * ss.percentage * ss.rate) / 100) * tmp) / freq_nom);
+
+			mpll_ss1 &= ~CLKV_MASK;
+			mpll_ss1 |= CLKV(clkv);
+
+			mpll_ss2 &= ~CLKS_MASK;
+			mpll_ss2 |= CLKS(clks);
+		}
+	}
+
+	mclk_pwrmgt_cntl &= ~DLL_SPEED_MASK;
+	mclk_pwrmgt_cntl |= DLL_SPEED(mpll_param.dll_speed);
+
+	if (dll_state_on)
+		mclk_pwrmgt_cntl |= MRDCK0_PDNB | MRDCK1_PDNB;
+	else
+		mclk_pwrmgt_cntl &= ~(MRDCK0_PDNB | MRDCK1_PDNB);
+
+	mclk->MclkFrequency = memory_clock;
+	mclk->MpllFuncCntl = mpll_func_cntl;
+	mclk->MpllFuncCntl_1 = mpll_func_cntl_1;
+	mclk->MpllFuncCntl_2 = mpll_func_cntl_2;
+	mclk->MpllAdFuncCntl = mpll_ad_func_cntl;
+	mclk->MpllDqFuncCntl = mpll_dq_func_cntl;
+	mclk->MclkPwrmgtCntl = mclk_pwrmgt_cntl;
+	mclk->DllCntl = dll_cntl;
+	mclk->MpllSs1 = mpll_ss1;
+	mclk->MpllSs2 = mpll_ss2;
+
+	return 0;
+}
+
+static int ci_populate_single_memory_level(struct radeon_device *rdev,
+					   u32 memory_clock,
+					   SMU7_Discrete_MemoryLevel *memory_level)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	int ret;
+	bool dll_state_on;
+
+	if (rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk.entries) {
+		ret = ci_get_dependency_volt_by_clk(rdev,
+						    &rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk,
+						    memory_clock, &memory_level->MinVddc);
+		if (ret)
+			return ret;
+	}
+
+	if (rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk.entries) {
+		ret = ci_get_dependency_volt_by_clk(rdev,
+						    &rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk,
+						    memory_clock, &memory_level->MinVddci);
+		if (ret)
+			return ret;
+	}
+
+	if (rdev->pm.dpm.dyn_state.mvdd_dependency_on_mclk.entries) {
+		ret = ci_get_dependency_volt_by_clk(rdev,
+						    &rdev->pm.dpm.dyn_state.mvdd_dependency_on_mclk,
+						    memory_clock, &memory_level->MinMvdd);
+		if (ret)
+			return ret;
+	}
+
+	memory_level->MinVddcPhases = 1;
+
+	if (pi->vddc_phase_shed_control)
+		ci_populate_phase_value_based_on_mclk(rdev,
+						      &rdev->pm.dpm.dyn_state.phase_shedding_limits_table,
+						      memory_clock,
+						      &memory_level->MinVddcPhases);
+
+	memory_level->EnabledForThrottle = 1;
+	memory_level->EnabledForActivity = 1;
+	memory_level->UpH = 0;
+	memory_level->DownH = 100;
+	memory_level->VoltageDownH = 0;
+	memory_level->ActivityLevel = (u16)pi->mclk_activity_target;
+
+	memory_level->StutterEnable = false;
+	memory_level->StrobeEnable = false;
+	memory_level->EdcReadEnable = false;
+	memory_level->EdcWriteEnable = false;
+	memory_level->RttEnable = false;
+
+	memory_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW;
+
+	if (pi->mclk_stutter_mode_threshold &&
+	    (memory_clock <= pi->mclk_stutter_mode_threshold) &&
+	    (pi->uvd_enabled == false) &&
+	    (RREG32(DPG_PIPE_STUTTER_CONTROL) & STUTTER_ENABLE) &&
+	    (rdev->pm.dpm.new_active_crtc_count <= 2))
+		memory_level->StutterEnable = true;
+
+	if (pi->mclk_strobe_mode_threshold &&
+	    (memory_clock <= pi->mclk_strobe_mode_threshold))
+		memory_level->StrobeEnable = 1;
+
+	if (pi->mem_gddr5) {
+		memory_level->StrobeRatio =
+			si_get_mclk_frequency_ratio(memory_clock, memory_level->StrobeEnable);
+		if (pi->mclk_edc_enable_threshold &&
+		    (memory_clock > pi->mclk_edc_enable_threshold))
+			memory_level->EdcReadEnable = true;
+
+		if (pi->mclk_edc_wr_enable_threshold &&
+		    (memory_clock > pi->mclk_edc_wr_enable_threshold))
+			memory_level->EdcWriteEnable = true;
+
+		if (memory_level->StrobeEnable) {
+			if (si_get_mclk_frequency_ratio(memory_clock, true) >=
+			    ((RREG32(MC_SEQ_MISC7) >> 16) & 0xf))
+				dll_state_on = ((RREG32(MC_SEQ_MISC5) >> 1) & 0x1) ? true : false;
+			else
+				dll_state_on = ((RREG32(MC_SEQ_MISC6) >> 1) & 0x1) ? true : false;
+		} else {
+			dll_state_on = pi->dll_default_on;
+		}
+	} else {
+		memory_level->StrobeRatio = si_get_ddr3_mclk_frequency_ratio(memory_clock);
+		dll_state_on = ((RREG32(MC_SEQ_MISC5) >> 1) & 0x1) ? true : false;
+	}
+
+	ret = ci_calculate_mclk_params(rdev, memory_clock, memory_level, memory_level->StrobeEnable, dll_state_on);
+	if (ret)
+		return ret;
+
+	memory_level->MinVddc = cpu_to_be32(memory_level->MinVddc * VOLTAGE_SCALE);
+	memory_level->MinVddcPhases = cpu_to_be32(memory_level->MinVddcPhases);
+        memory_level->MinVddci = cpu_to_be32(memory_level->MinVddci * VOLTAGE_SCALE);
+        memory_level->MinMvdd = cpu_to_be32(memory_level->MinMvdd * VOLTAGE_SCALE);
+
+	memory_level->MclkFrequency = cpu_to_be32(memory_level->MclkFrequency);
+	memory_level->ActivityLevel = cpu_to_be16(memory_level->ActivityLevel);
+	memory_level->MpllFuncCntl = cpu_to_be32(memory_level->MpllFuncCntl);
+	memory_level->MpllFuncCntl_1 = cpu_to_be32(memory_level->MpllFuncCntl_1);
+	memory_level->MpllFuncCntl_2 = cpu_to_be32(memory_level->MpllFuncCntl_2);
+	memory_level->MpllAdFuncCntl = cpu_to_be32(memory_level->MpllAdFuncCntl);
+	memory_level->MpllDqFuncCntl = cpu_to_be32(memory_level->MpllDqFuncCntl);
+	memory_level->MclkPwrmgtCntl = cpu_to_be32(memory_level->MclkPwrmgtCntl);
+	memory_level->DllCntl = cpu_to_be32(memory_level->DllCntl);
+	memory_level->MpllSs1 = cpu_to_be32(memory_level->MpllSs1);
+	memory_level->MpllSs2 = cpu_to_be32(memory_level->MpllSs2);
+
+	return 0;
+}
+
+static int ci_populate_smc_acpi_level(struct radeon_device *rdev,
+				      SMU7_Discrete_DpmTable *table)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	struct atom_clock_dividers dividers;
+	SMU7_Discrete_VoltageLevel voltage_level;
+	u32 spll_func_cntl = pi->clock_registers.cg_spll_func_cntl;
+	u32 spll_func_cntl_2 = pi->clock_registers.cg_spll_func_cntl_2;
+	u32 dll_cntl = pi->clock_registers.dll_cntl;
+	u32 mclk_pwrmgt_cntl = pi->clock_registers.mclk_pwrmgt_cntl;
+	int ret;
+
+	table->ACPILevel.Flags &= ~PPSMC_SWSTATE_FLAG_DC;
+
+	if (pi->acpi_vddc)
+		table->ACPILevel.MinVddc = cpu_to_be32(pi->acpi_vddc * VOLTAGE_SCALE);
+	else
+		table->ACPILevel.MinVddc = cpu_to_be32(pi->min_vddc_in_pp_table * VOLTAGE_SCALE);
+
+	table->ACPILevel.MinVddcPhases = pi->vddc_phase_shed_control ? 0 : 1;
+
+	table->ACPILevel.SclkFrequency = rdev->clock.spll.reference_freq;
+
+	ret = radeon_atom_get_clock_dividers(rdev,
+					     COMPUTE_GPUCLK_INPUT_FLAG_SCLK,
+					     table->ACPILevel.SclkFrequency, false, &dividers);
+	if (ret)
+		return ret;
+
+	table->ACPILevel.SclkDid = (u8)dividers.post_divider;
+	table->ACPILevel.DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW;
+	table->ACPILevel.DeepSleepDivId = 0;
+
+	spll_func_cntl &= ~SPLL_PWRON;
+	spll_func_cntl |= SPLL_RESET;
+
+	spll_func_cntl_2 &= ~SCLK_MUX_SEL_MASK;
+	spll_func_cntl_2 |= SCLK_MUX_SEL(4);
+
+	table->ACPILevel.CgSpllFuncCntl = spll_func_cntl;
+	table->ACPILevel.CgSpllFuncCntl2 = spll_func_cntl_2;
+	table->ACPILevel.CgSpllFuncCntl3 = pi->clock_registers.cg_spll_func_cntl_3;
+	table->ACPILevel.CgSpllFuncCntl4 = pi->clock_registers.cg_spll_func_cntl_4;
+	table->ACPILevel.SpllSpreadSpectrum = pi->clock_registers.cg_spll_spread_spectrum;
+	table->ACPILevel.SpllSpreadSpectrum2 = pi->clock_registers.cg_spll_spread_spectrum_2;
+	table->ACPILevel.CcPwrDynRm = 0;
+	table->ACPILevel.CcPwrDynRm1 = 0;
+
+	table->ACPILevel.Flags = cpu_to_be32(table->ACPILevel.Flags);
+	table->ACPILevel.MinVddcPhases = cpu_to_be32(table->ACPILevel.MinVddcPhases);
+	table->ACPILevel.SclkFrequency = cpu_to_be32(table->ACPILevel.SclkFrequency);
+	table->ACPILevel.CgSpllFuncCntl = cpu_to_be32(table->ACPILevel.CgSpllFuncCntl);
+	table->ACPILevel.CgSpllFuncCntl2 = cpu_to_be32(table->ACPILevel.CgSpllFuncCntl2);
+	table->ACPILevel.CgSpllFuncCntl3 = cpu_to_be32(table->ACPILevel.CgSpllFuncCntl3);
+	table->ACPILevel.CgSpllFuncCntl4 = cpu_to_be32(table->ACPILevel.CgSpllFuncCntl4);
+	table->ACPILevel.SpllSpreadSpectrum = cpu_to_be32(table->ACPILevel.SpllSpreadSpectrum);
+	table->ACPILevel.SpllSpreadSpectrum2 = cpu_to_be32(table->ACPILevel.SpllSpreadSpectrum2);
+	table->ACPILevel.CcPwrDynRm = cpu_to_be32(table->ACPILevel.CcPwrDynRm);
+	table->ACPILevel.CcPwrDynRm1 = cpu_to_be32(table->ACPILevel.CcPwrDynRm1);
+
+	table->MemoryACPILevel.MinVddc = table->ACPILevel.MinVddc;
+	table->MemoryACPILevel.MinVddcPhases = table->ACPILevel.MinVddcPhases;
+
+	if (pi->vddci_control != CISLANDS_VOLTAGE_CONTROL_NONE) {
+		if (pi->acpi_vddci)
+			table->MemoryACPILevel.MinVddci =
+				cpu_to_be32(pi->acpi_vddci * VOLTAGE_SCALE);
+		else
+			table->MemoryACPILevel.MinVddci =
+				cpu_to_be32(pi->min_vddci_in_pp_table * VOLTAGE_SCALE);
+	}
+
+	if (ci_populate_mvdd_value(rdev, 0, &voltage_level))
+		table->MemoryACPILevel.MinMvdd = 0;
+	else
+		table->MemoryACPILevel.MinMvdd =
+			cpu_to_be32(voltage_level.Voltage * VOLTAGE_SCALE);
+
+	mclk_pwrmgt_cntl |= MRDCK0_RESET | MRDCK1_RESET;
+	mclk_pwrmgt_cntl &= ~(MRDCK0_PDNB | MRDCK1_PDNB);
+
+	dll_cntl &= ~(MRDCK0_BYPASS | MRDCK1_BYPASS);
+
+	table->MemoryACPILevel.DllCntl = cpu_to_be32(dll_cntl);
+	table->MemoryACPILevel.MclkPwrmgtCntl = cpu_to_be32(mclk_pwrmgt_cntl);
+	table->MemoryACPILevel.MpllAdFuncCntl =
+		cpu_to_be32(pi->clock_registers.mpll_ad_func_cntl);
+	table->MemoryACPILevel.MpllDqFuncCntl =
+		cpu_to_be32(pi->clock_registers.mpll_dq_func_cntl);
+	table->MemoryACPILevel.MpllFuncCntl =
+		cpu_to_be32(pi->clock_registers.mpll_func_cntl);
+	table->MemoryACPILevel.MpllFuncCntl_1 =
+		cpu_to_be32(pi->clock_registers.mpll_func_cntl_1);
+	table->MemoryACPILevel.MpllFuncCntl_2 =
+		cpu_to_be32(pi->clock_registers.mpll_func_cntl_2);
+	table->MemoryACPILevel.MpllSs1 = cpu_to_be32(pi->clock_registers.mpll_ss1);
+	table->MemoryACPILevel.MpllSs2 = cpu_to_be32(pi->clock_registers.mpll_ss2);
+
+	table->MemoryACPILevel.EnabledForThrottle = 0;
+	table->MemoryACPILevel.EnabledForActivity = 0;
+	table->MemoryACPILevel.UpH = 0;
+	table->MemoryACPILevel.DownH = 100;
+	table->MemoryACPILevel.VoltageDownH = 0;
+	table->MemoryACPILevel.ActivityLevel =
+		cpu_to_be16((u16)pi->mclk_activity_target);
+
+	table->MemoryACPILevel.StutterEnable = false;
+	table->MemoryACPILevel.StrobeEnable = false;
+	table->MemoryACPILevel.EdcReadEnable = false;
+	table->MemoryACPILevel.EdcWriteEnable = false;
+	table->MemoryACPILevel.RttEnable = false;
+
+	return 0;
+}
+
+
+static int ci_enable_ulv(struct radeon_device *rdev, bool enable)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	struct ci_ulv_parm *ulv = &pi->ulv;
+
+	if (ulv->supported) {
+		if (enable)
+			return (ci_send_msg_to_smc(rdev, PPSMC_MSG_EnableULV) == PPSMC_Result_OK) ?
+				0 : -EINVAL;
+		else
+			return (ci_send_msg_to_smc(rdev, PPSMC_MSG_DisableULV) == PPSMC_Result_OK) ?
+				0 : -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ci_populate_ulv_level(struct radeon_device *rdev,
+				 SMU7_Discrete_Ulv *state)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	u16 ulv_voltage = rdev->pm.dpm.backbias_response_time;
+
+	state->CcPwrDynRm = 0;
+	state->CcPwrDynRm1 = 0;
+
+	if (ulv_voltage == 0) {
+		pi->ulv.supported = false;
+		return 0;
+	}
+
+	if (pi->voltage_control != CISLANDS_VOLTAGE_CONTROL_BY_SVID2) {
+		if (ulv_voltage > rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[0].v)
+			state->VddcOffset = 0;
+		else
+			state->VddcOffset =
+				rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[0].v - ulv_voltage;
+	} else {
+		if (ulv_voltage > rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[0].v)
+			state->VddcOffsetVid = 0;
+		else
+			state->VddcOffsetVid = (u8)
+				((rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries[0].v - ulv_voltage) *
+				 VOLTAGE_VID_OFFSET_SCALE2 / VOLTAGE_VID_OFFSET_SCALE1);
+	}
+	state->VddcPhase = pi->vddc_phase_shed_control ? 0 : 1;
+
+	state->CcPwrDynRm = cpu_to_be32(state->CcPwrDynRm);
+	state->CcPwrDynRm1 = cpu_to_be32(state->CcPwrDynRm1);
+	state->VddcOffset = cpu_to_be16(state->VddcOffset);
+
+	return 0;
+}
+
+static int ci_calculate_sclk_params(struct radeon_device *rdev,
+				    u32 engine_clock,
+				    SMU7_Discrete_GraphicsLevel *sclk)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	struct atom_clock_dividers dividers;
+	u32 spll_func_cntl_3 = pi->clock_registers.cg_spll_func_cntl_3;
+	u32 spll_func_cntl_4 = pi->clock_registers.cg_spll_func_cntl_4;
+	u32 cg_spll_spread_spectrum = pi->clock_registers.cg_spll_spread_spectrum;
+	u32 cg_spll_spread_spectrum_2 = pi->clock_registers.cg_spll_spread_spectrum_2;
+	u32 reference_clock = rdev->clock.spll.reference_freq;
+	u32 reference_divider;
+	u32 fbdiv;
+	int ret;
+
+	ret = radeon_atom_get_clock_dividers(rdev,
+					     COMPUTE_GPUCLK_INPUT_FLAG_SCLK,
+					     engine_clock, false, &dividers);
+	if (ret)
+		return ret;
+
+	reference_divider = 1 + dividers.ref_div;
+	fbdiv = dividers.fb_div & 0x3FFFFFF;
+
+	spll_func_cntl_3 &= ~SPLL_FB_DIV_MASK;
+	spll_func_cntl_3 |= SPLL_FB_DIV(fbdiv);
+        spll_func_cntl_3 |= SPLL_DITHEN;
+
+	if (pi->caps_sclk_ss_support) {
+		struct radeon_atom_ss ss;
+		u32 vco_freq = engine_clock * dividers.post_div;
+
+		if (radeon_atombios_get_asic_ss_info(rdev, &ss,
+						     ASIC_INTERNAL_ENGINE_SS, vco_freq)) {
+			u32 clk_s = reference_clock * 5 / (reference_divider * ss.rate);
+			u32 clk_v = 4 * ss.percentage * fbdiv / (clk_s * 10000);
+
+			cg_spll_spread_spectrum &= ~CLK_S_MASK;
+			cg_spll_spread_spectrum |= CLK_S(clk_s);
+			cg_spll_spread_spectrum |= SSEN;
+
+			cg_spll_spread_spectrum_2 &= ~CLK_V_MASK;
+			cg_spll_spread_spectrum_2 |= CLK_V(clk_v);
+		}
+	}
+
+	sclk->SclkFrequency = engine_clock;
+	sclk->CgSpllFuncCntl3 = spll_func_cntl_3;
+	sclk->CgSpllFuncCntl4 = spll_func_cntl_4;
+	sclk->SpllSpreadSpectrum = cg_spll_spread_spectrum;
+	sclk->SpllSpreadSpectrum2  = cg_spll_spread_spectrum_2;
+	sclk->SclkDid = (u8)dividers.post_divider;
+
+	return 0;
+}
+
+static int ci_populate_single_graphic_level(struct radeon_device *rdev,
+					    u32 engine_clock,
+					    u16 sclk_activity_level_t,
+					    SMU7_Discrete_GraphicsLevel *graphic_level)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	int ret;
+
+	ret = ci_calculate_sclk_params(rdev, engine_clock, graphic_level);
+	if (ret)
+		return ret;
+
+	ret = ci_get_dependency_volt_by_clk(rdev,
+					    &rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk,
+					    engine_clock, &graphic_level->MinVddc);
+	if (ret)
+		return ret;
+
+	graphic_level->SclkFrequency = engine_clock;
+
+	graphic_level->Flags =  0;
+	graphic_level->MinVddcPhases = 1;
+
+	if (pi->vddc_phase_shed_control)
+		ci_populate_phase_value_based_on_sclk(rdev,
+						      &rdev->pm.dpm.dyn_state.phase_shedding_limits_table,
+						      engine_clock,
+						      &graphic_level->MinVddcPhases);
+
+	graphic_level->ActivityLevel = sclk_activity_level_t;
+
+	graphic_level->CcPwrDynRm = 0;
+	graphic_level->CcPwrDynRm1 = 0;
+	graphic_level->EnabledForActivity = 1;
+	graphic_level->EnabledForThrottle = 1;
+	graphic_level->UpH = 0;
+	graphic_level->DownH = 0;
+	graphic_level->VoltageDownH = 0;
+	graphic_level->PowerThrottle = 0;
+
+	if (pi->caps_sclk_ds)
+		graphic_level->DeepSleepDivId = ci_get_sleep_divider_id_from_clock(rdev,
+										   engine_clock,
+										   CISLAND_MINIMUM_ENGINE_CLOCK);
+
+	graphic_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW;
+
+	graphic_level->Flags = cpu_to_be32(graphic_level->Flags);
+        graphic_level->MinVddc = cpu_to_be32(graphic_level->MinVddc * VOLTAGE_SCALE);
+	graphic_level->MinVddcPhases = cpu_to_be32(graphic_level->MinVddcPhases);
+	graphic_level->SclkFrequency = cpu_to_be32(graphic_level->SclkFrequency);
+	graphic_level->ActivityLevel = cpu_to_be16(graphic_level->ActivityLevel);
+	graphic_level->CgSpllFuncCntl3 = cpu_to_be32(graphic_level->CgSpllFuncCntl3);
+	graphic_level->CgSpllFuncCntl4 = cpu_to_be32(graphic_level->CgSpllFuncCntl4);
+	graphic_level->SpllSpreadSpectrum = cpu_to_be32(graphic_level->SpllSpreadSpectrum);
+	graphic_level->SpllSpreadSpectrum2 = cpu_to_be32(graphic_level->SpllSpreadSpectrum2);
+	graphic_level->CcPwrDynRm = cpu_to_be32(graphic_level->CcPwrDynRm);
+	graphic_level->CcPwrDynRm1 = cpu_to_be32(graphic_level->CcPwrDynRm1);
+
+	return 0;
+}
+
+static int ci_populate_all_graphic_levels(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	struct ci_dpm_table *dpm_table = &pi->dpm_table;
+	u32 level_array_address = pi->dpm_table_start +
+		offsetof(SMU7_Discrete_DpmTable, GraphicsLevel);
+	u32 level_array_size = sizeof(SMU7_Discrete_GraphicsLevel) *
+		SMU7_MAX_LEVELS_GRAPHICS;
+	SMU7_Discrete_GraphicsLevel *levels = pi->smc_state_table.GraphicsLevel;
+	u32 i, ret;
+
+	memset(levels, 0, level_array_size);
+
+	for (i = 0; i < dpm_table->sclk_table.count; i++) {
+		ret = ci_populate_single_graphic_level(rdev,
+						       dpm_table->sclk_table.dpm_levels[i].value,
+						       (u16)pi->activity_target[i],
+						       &pi->smc_state_table.GraphicsLevel[i]);
+		if (ret)
+			return ret;
+		if (i == (dpm_table->sclk_table.count - 1))
+			pi->smc_state_table.GraphicsLevel[i].DisplayWatermark =
+				PPSMC_DISPLAY_WATERMARK_HIGH;
+	}
+
+	pi->smc_state_table.GraphicsDpmLevelCount = (u8)dpm_table->sclk_table.count;
+	pi->dpm_level_enable_mask.sclk_dpm_enable_mask =
+		ci_get_dpm_level_enable_mask_value(&dpm_table->sclk_table);
+
+	ret = ci_copy_bytes_to_smc(rdev, level_array_address,
+				   (u8 *)levels, level_array_size,
+				   pi->sram_end);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int ci_populate_ulv_state(struct radeon_device *rdev,
+				 SMU7_Discrete_Ulv *ulv_level)
+{
+	return ci_populate_ulv_level(rdev, ulv_level);
+}
+
+static int ci_populate_all_memory_levels(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	struct ci_dpm_table *dpm_table = &pi->dpm_table;
+	u32 level_array_address = pi->dpm_table_start +
+		offsetof(SMU7_Discrete_DpmTable, MemoryLevel);
+	u32 level_array_size = sizeof(SMU7_Discrete_MemoryLevel) *
+		SMU7_MAX_LEVELS_MEMORY;
+	SMU7_Discrete_MemoryLevel *levels = pi->smc_state_table.MemoryLevel;
+	u32 i, ret;
+
+	memset(levels, 0, level_array_size);
+
+	for (i = 0; i < dpm_table->mclk_table.count; i++) {
+		if (dpm_table->mclk_table.dpm_levels[i].value == 0)
+			return -EINVAL;
+		ret = ci_populate_single_memory_level(rdev,
+						      dpm_table->mclk_table.dpm_levels[i].value,
+						      &pi->smc_state_table.MemoryLevel[i]);
+		if (ret)
+			return ret;
+	}
+
+	pi->smc_state_table.MemoryLevel[0].ActivityLevel = cpu_to_be16(0x1F);
+
+	pi->smc_state_table.MemoryDpmLevelCount = (u8)dpm_table->mclk_table.count;
+	pi->dpm_level_enable_mask.mclk_dpm_enable_mask =
+		ci_get_dpm_level_enable_mask_value(&dpm_table->mclk_table);
+
+	pi->smc_state_table.MemoryLevel[dpm_table->mclk_table.count - 1].DisplayWatermark =
+		PPSMC_DISPLAY_WATERMARK_HIGH;
+
+	ret = ci_copy_bytes_to_smc(rdev, level_array_address,
+				   (u8 *)levels, level_array_size,
+				   pi->sram_end);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static void ci_reset_single_dpm_table(struct radeon_device *rdev,
+				      struct ci_single_dpm_table* dpm_table,
+				      u32 count)
+{
+	u32 i;
+
+	dpm_table->count = count;
+	for (i = 0; i < MAX_REGULAR_DPM_NUMBER; i++)
+		dpm_table->dpm_levels[i].enabled = false;
+}
+
+static void ci_setup_pcie_table_entry(struct ci_single_dpm_table* dpm_table,
+				      u32 index, u32 pcie_gen, u32 pcie_lanes)
+{
+	dpm_table->dpm_levels[index].value = pcie_gen;
+	dpm_table->dpm_levels[index].param1 = pcie_lanes;
+	dpm_table->dpm_levels[index].enabled = true;
+}
+
+static int ci_setup_default_pcie_tables(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+
+	if (!pi->use_pcie_performance_levels && !pi->use_pcie_powersaving_levels)
+		return -EINVAL;
+
+	if (pi->use_pcie_performance_levels && !pi->use_pcie_powersaving_levels) {
+		pi->pcie_gen_powersaving = pi->pcie_gen_performance;
+		pi->pcie_lane_powersaving = pi->pcie_lane_performance;
+	} else if (!pi->use_pcie_performance_levels && pi->use_pcie_powersaving_levels) {
+		pi->pcie_gen_performance = pi->pcie_gen_powersaving;
+		pi->pcie_lane_performance = pi->pcie_lane_powersaving;
+	}
+
+	ci_reset_single_dpm_table(rdev,
+				  &pi->dpm_table.pcie_speed_table,
+				  SMU7_MAX_LEVELS_LINK);
+
+	ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 0,
+				  pi->pcie_gen_powersaving.min,
+				  pi->pcie_lane_powersaving.min);
+	ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 1,
+				  pi->pcie_gen_performance.min,
+				  pi->pcie_lane_performance.min);
+	ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 2,
+				  pi->pcie_gen_powersaving.min,
+				  pi->pcie_lane_powersaving.max);
+	ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 3,
+				  pi->pcie_gen_performance.min,
+				  pi->pcie_lane_performance.max);
+	ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 4,
+				  pi->pcie_gen_powersaving.max,
+				  pi->pcie_lane_powersaving.max);
+	ci_setup_pcie_table_entry(&pi->dpm_table.pcie_speed_table, 5,
+				  pi->pcie_gen_performance.max,
+				  pi->pcie_lane_performance.max);
+
+	pi->dpm_table.pcie_speed_table.count = 6;
+
+	return 0;
+}
+
+static int ci_setup_default_dpm_tables(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	struct radeon_clock_voltage_dependency_table *allowed_sclk_vddc_table =
+		&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk;
+	struct radeon_clock_voltage_dependency_table *allowed_mclk_table =
+		&rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk;
+	struct radeon_cac_leakage_table *std_voltage_table =
+		&rdev->pm.dpm.dyn_state.cac_leakage_table;
+	u32 i;
+
+	if (allowed_sclk_vddc_table == NULL)
+		return -EINVAL;
+	if (allowed_sclk_vddc_table->count < 1)
+		return -EINVAL;
+	if (allowed_mclk_table == NULL)
+		return -EINVAL;
+	if (allowed_mclk_table->count < 1)
+		return -EINVAL;
+
+	memset(&pi->dpm_table, 0, sizeof(struct ci_dpm_table));
+
+	ci_reset_single_dpm_table(rdev,
+				  &pi->dpm_table.sclk_table,
+				  SMU7_MAX_LEVELS_GRAPHICS);
+	ci_reset_single_dpm_table(rdev,
+				  &pi->dpm_table.mclk_table,
+				  SMU7_MAX_LEVELS_MEMORY);
+	ci_reset_single_dpm_table(rdev,
+				  &pi->dpm_table.vddc_table,
+				  SMU7_MAX_LEVELS_VDDC);
+	ci_reset_single_dpm_table(rdev,
+				  &pi->dpm_table.vddci_table,
+				  SMU7_MAX_LEVELS_VDDCI);
+	ci_reset_single_dpm_table(rdev,
+				  &pi->dpm_table.mvdd_table,
+				  SMU7_MAX_LEVELS_MVDD);
+
+	pi->dpm_table.sclk_table.count = 0;
+	for (i = 0; i < allowed_sclk_vddc_table->count; i++) {
+		if ((i == 0) ||
+		    (pi->dpm_table.sclk_table.dpm_levels[pi->dpm_table.sclk_table.count-1].value !=
+		     allowed_sclk_vddc_table->entries[i].clk)) {
+			pi->dpm_table.sclk_table.dpm_levels[pi->dpm_table.sclk_table.count].value =
+				allowed_sclk_vddc_table->entries[i].clk;
+			pi->dpm_table.sclk_table.dpm_levels[pi->dpm_table.sclk_table.count].enabled = true;
+			pi->dpm_table.sclk_table.count++;
+		}
+	}
+
+	pi->dpm_table.mclk_table.count = 0;
+	for (i = 0; i < allowed_mclk_table->count; i++) {
+		if ((i==0) ||
+		    (pi->dpm_table.mclk_table.dpm_levels[pi->dpm_table.mclk_table.count-1].value !=
+		     allowed_mclk_table->entries[i].clk)) {
+			pi->dpm_table.mclk_table.dpm_levels[pi->dpm_table.mclk_table.count].value =
+				allowed_mclk_table->entries[i].clk;
+			pi->dpm_table.mclk_table.dpm_levels[pi->dpm_table.mclk_table.count].enabled = true;
+			pi->dpm_table.mclk_table.count++;
+		}
+	}
+
+	for (i = 0; i < allowed_sclk_vddc_table->count; i++) {
+		pi->dpm_table.vddc_table.dpm_levels[i].value =
+			allowed_sclk_vddc_table->entries[i].v;
+		pi->dpm_table.vddc_table.dpm_levels[i].param1 =
+			std_voltage_table->entries[i].leakage;
+		pi->dpm_table.vddc_table.dpm_levels[i].enabled = true;
+	}
+	pi->dpm_table.vddc_table.count = allowed_sclk_vddc_table->count;
+
+	allowed_mclk_table = &rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk;
+	if (allowed_mclk_table) {
+		for (i = 0; i < allowed_mclk_table->count; i++) {
+			pi->dpm_table.vddci_table.dpm_levels[i].value =
+				allowed_mclk_table->entries[i].v;
+			pi->dpm_table.vddci_table.dpm_levels[i].enabled = true;
+		}
+		pi->dpm_table.vddci_table.count = allowed_mclk_table->count;
+	}
+
+	allowed_mclk_table = &rdev->pm.dpm.dyn_state.mvdd_dependency_on_mclk;
+	if (allowed_mclk_table) {
+		for (i = 0; i < allowed_mclk_table->count; i++) {
+			pi->dpm_table.mvdd_table.dpm_levels[i].value =
+				allowed_mclk_table->entries[i].v;
+			pi->dpm_table.mvdd_table.dpm_levels[i].enabled = true;
+		}
+		pi->dpm_table.mvdd_table.count = allowed_mclk_table->count;
+	}
+
+	ci_setup_default_pcie_tables(rdev);
+
+	return 0;
+}
+
+static int ci_find_boot_level(struct ci_single_dpm_table *table,
+			      u32 value, u32 *boot_level)
+{
+	u32 i;
+	int ret = -EINVAL;
+
+	for(i = 0; i < table->count; i++) {
+		if (value == table->dpm_levels[i].value) {
+			*boot_level = i;
+			ret = 0;
+		}
+	}
+
+	return ret;
+}
+
+static int ci_init_smc_table(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	struct ci_ulv_parm *ulv = &pi->ulv;
+	struct radeon_ps *radeon_boot_state = rdev->pm.dpm.boot_ps;
+	SMU7_Discrete_DpmTable *table = &pi->smc_state_table;
+	int ret;
+
+	ret = ci_setup_default_dpm_tables(rdev);
+	if (ret)
+		return ret;
+
+	if (pi->voltage_control != CISLANDS_VOLTAGE_CONTROL_NONE)
+		ci_populate_smc_voltage_tables(rdev, table);
+
+	ci_init_fps_limits(rdev);
+
+	if (rdev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_HARDWAREDC)
+		table->SystemFlags |= PPSMC_SYSTEMFLAG_GPIO_DC;
+
+	if (rdev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_STEPVDDC)
+		table->SystemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC;
+
+	if (pi->mem_gddr5)
+		table->SystemFlags |= PPSMC_SYSTEMFLAG_GDDR5;
+
+	if (ulv->supported) {
+		ret = ci_populate_ulv_state(rdev, &pi->smc_state_table.Ulv);
+		if (ret)
+			return ret;
+		WREG32_SMC(CG_ULV_PARAMETER, ulv->cg_ulv_parameter);
+	}
+
+	ret = ci_populate_all_graphic_levels(rdev);
+	if (ret)
+		return ret;
+
+	ret = ci_populate_all_memory_levels(rdev);
+	if (ret)
+		return ret;
+
+	ci_populate_smc_link_level(rdev, table);
+
+	ret = ci_populate_smc_acpi_level(rdev, table);
+	if (ret)
+		return ret;
+
+	ret = ci_populate_smc_vce_level(rdev, table);
+	if (ret)
+		return ret;
+
+	ret = ci_populate_smc_acp_level(rdev, table);
+	if (ret)
+		return ret;
+
+	ret = ci_populate_smc_samu_level(rdev, table);
+	if (ret)
+		return ret;
+
+	ret = ci_do_program_memory_timing_parameters(rdev);
+	if (ret)
+		return ret;
+
+	ret = ci_populate_smc_uvd_level(rdev, table);
+	if (ret)
+		return ret;
+
+	table->UvdBootLevel  = 0;
+	table->VceBootLevel  = 0;
+	table->AcpBootLevel  = 0;
+	table->SamuBootLevel  = 0;
+	table->GraphicsBootLevel  = 0;
+	table->MemoryBootLevel  = 0;
+
+	ret = ci_find_boot_level(&pi->dpm_table.sclk_table,
+				 pi->vbios_boot_state.sclk_bootup_value,
+				 (u32 *)&pi->smc_state_table.GraphicsBootLevel);
+
+	ret = ci_find_boot_level(&pi->dpm_table.mclk_table,
+				 pi->vbios_boot_state.mclk_bootup_value,
+				 (u32 *)&pi->smc_state_table.MemoryBootLevel);
+
+	table->BootVddc = pi->vbios_boot_state.vddc_bootup_value;
+	table->BootVddci = pi->vbios_boot_state.vddci_bootup_value;
+	table->BootMVdd = pi->vbios_boot_state.mvdd_bootup_value;
+
+	ci_populate_smc_initial_state(rdev, radeon_boot_state);
+
+	ret = ci_populate_bapm_parameters_in_dpm_table(rdev);
+	if (ret)
+		return ret;
+
+	table->UVDInterval = 1;
+	table->VCEInterval = 1;
+	table->ACPInterval = 1;
+	table->SAMUInterval = 1;
+	table->GraphicsVoltageChangeEnable = 1;
+	table->GraphicsThermThrottleEnable = 1;
+	table->GraphicsInterval = 1;
+	table->VoltageInterval = 1;
+	table->ThermalInterval = 1;
+	table->TemperatureLimitHigh = (u16)((pi->thermal_temp_setting.temperature_high *
+					     CISLANDS_Q88_FORMAT_CONVERSION_UNIT) / 1000);
+	table->TemperatureLimitLow = (u16)((pi->thermal_temp_setting.temperature_low *
+					    CISLANDS_Q88_FORMAT_CONVERSION_UNIT) / 1000);
+	table->MemoryVoltageChangeEnable = 1;
+	table->MemoryInterval = 1;
+	table->VoltageResponseTime = 0;
+	table->VddcVddciDelta = 4000;
+	table->PhaseResponseTime = 0;
+	table->MemoryThermThrottleEnable = 1;
+	table->PCIeBootLinkLevel = 0;
+	table->PCIeGenInterval = 1;
+	if (pi->voltage_control == CISLANDS_VOLTAGE_CONTROL_BY_SVID2)
+		table->SVI2Enable  = 1;
+	else
+		table->SVI2Enable  = 0;
+
+	table->ThermGpio = 17;
+	table->SclkStepSize = 0x4000;
+
+	table->SystemFlags = cpu_to_be32(table->SystemFlags);
+	table->SmioMaskVddcVid = cpu_to_be32(table->SmioMaskVddcVid);
+	table->SmioMaskVddcPhase = cpu_to_be32(table->SmioMaskVddcPhase);
+	table->SmioMaskVddciVid = cpu_to_be32(table->SmioMaskVddciVid);
+	table->SmioMaskMvddVid = cpu_to_be32(table->SmioMaskMvddVid);
+	table->SclkStepSize = cpu_to_be32(table->SclkStepSize);
+	table->TemperatureLimitHigh = cpu_to_be16(table->TemperatureLimitHigh);
+	table->TemperatureLimitLow = cpu_to_be16(table->TemperatureLimitLow);
+	table->VddcVddciDelta = cpu_to_be16(table->VddcVddciDelta);
+	table->VoltageResponseTime = cpu_to_be16(table->VoltageResponseTime);
+	table->PhaseResponseTime = cpu_to_be16(table->PhaseResponseTime);
+	table->BootVddc = cpu_to_be16(table->BootVddc * VOLTAGE_SCALE);
+	table->BootVddci = cpu_to_be16(table->BootVddci * VOLTAGE_SCALE);
+	table->BootMVdd = cpu_to_be16(table->BootMVdd * VOLTAGE_SCALE);
+
+	ret = ci_copy_bytes_to_smc(rdev,
+				   pi->dpm_table_start +
+				   offsetof(SMU7_Discrete_DpmTable, SystemFlags),
+				   (u8 *)&table->SystemFlags,
+				   sizeof(SMU7_Discrete_DpmTable) - 3 * sizeof(SMU7_PIDController),
+				   pi->sram_end);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static void ci_trim_single_dpm_states(struct radeon_device *rdev,
+				      struct ci_single_dpm_table *dpm_table,
+				      u32 low_limit, u32 high_limit)
+{
+	u32 i;
+
+	for (i = 0; i < dpm_table->count; i++) {
+		if ((dpm_table->dpm_levels[i].value < low_limit) ||
+		    (dpm_table->dpm_levels[i].value > high_limit))
+			dpm_table->dpm_levels[i].enabled = false;
+		else
+			dpm_table->dpm_levels[i].enabled = true;
+	}
+}
+
+static void ci_trim_pcie_dpm_states(struct radeon_device *rdev,
+				    u32 speed_low, u32 lanes_low,
+				    u32 speed_high, u32 lanes_high)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	struct ci_single_dpm_table *pcie_table = &pi->dpm_table.pcie_speed_table;
+	u32 i, j;
+
+	for (i = 0; i < pcie_table->count; i++) {
+		if ((pcie_table->dpm_levels[i].value < speed_low) ||
+		    (pcie_table->dpm_levels[i].param1 < lanes_low) ||
+		    (pcie_table->dpm_levels[i].value > speed_high) ||
+		    (pcie_table->dpm_levels[i].param1 > lanes_high))
+			pcie_table->dpm_levels[i].enabled = false;
+		else
+			pcie_table->dpm_levels[i].enabled = true;
+	}
+
+	for (i = 0; i < pcie_table->count; i++) {
+		if (pcie_table->dpm_levels[i].enabled) {
+			for (j = i + 1; j < pcie_table->count; j++) {
+				if (pcie_table->dpm_levels[j].enabled) {
+					if ((pcie_table->dpm_levels[i].value == pcie_table->dpm_levels[j].value) &&
+					    (pcie_table->dpm_levels[i].param1 == pcie_table->dpm_levels[j].param1))
+						pcie_table->dpm_levels[j].enabled = false;
+				}
+			}
+		}
+	}
+}
+
+static int ci_trim_dpm_states(struct radeon_device *rdev,
+			      struct radeon_ps *radeon_state)
+{
+	struct ci_ps *state = ci_get_ps(radeon_state);
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	u32 high_limit_count;
+
+	if (state->performance_level_count < 1)
+		return -EINVAL;
+
+	if (state->performance_level_count == 1)
+		high_limit_count = 0;
+	else
+		high_limit_count = 1;
+
+	ci_trim_single_dpm_states(rdev,
+				  &pi->dpm_table.sclk_table,
+				  state->performance_levels[0].sclk,
+				  state->performance_levels[high_limit_count].sclk);
+
+	ci_trim_single_dpm_states(rdev,
+				  &pi->dpm_table.mclk_table,
+				  state->performance_levels[0].mclk,
+				  state->performance_levels[high_limit_count].mclk);
+
+	ci_trim_pcie_dpm_states(rdev,
+				state->performance_levels[0].pcie_gen,
+				state->performance_levels[0].pcie_lane,
+				state->performance_levels[high_limit_count].pcie_gen,
+				state->performance_levels[high_limit_count].pcie_lane);
+
+	return 0;
+}
+
+static int ci_apply_disp_minimum_voltage_request(struct radeon_device *rdev)
+{
+	struct radeon_clock_voltage_dependency_table *disp_voltage_table =
+		&rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk;
+	struct radeon_clock_voltage_dependency_table *vddc_table =
+		&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk;
+	u32 requested_voltage = 0;
+	u32 i;
+
+	if (disp_voltage_table == NULL)
+		return -EINVAL;
+	if (!disp_voltage_table->count)
+		return -EINVAL;
+
+	for (i = 0; i < disp_voltage_table->count; i++) {
+		if (rdev->clock.current_dispclk == disp_voltage_table->entries[i].clk)
+			requested_voltage = disp_voltage_table->entries[i].v;
+	}
+
+	for (i = 0; i < vddc_table->count; i++) {
+		if (requested_voltage <= vddc_table->entries[i].v) {
+			requested_voltage = vddc_table->entries[i].v;
+			return (ci_send_msg_to_smc_with_parameter(rdev,
+								  PPSMC_MSG_VddC_Request,
+								  requested_voltage * VOLTAGE_SCALE) == PPSMC_Result_OK) ?
+				0 : -EINVAL;
+		}
+	}
+
+	return -EINVAL;
+}
+
+static int ci_upload_dpm_level_enable_mask(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	PPSMC_Result result;
+
+	if (!pi->sclk_dpm_key_disabled) {
+		if (pi->dpm_level_enable_mask.sclk_dpm_enable_mask) {
+			result = ci_send_msg_to_smc_with_parameter(rdev,
+								   PPSMC_MSG_SCLKDPM_SetEnabledMask,
+								   pi->dpm_level_enable_mask.sclk_dpm_enable_mask);
+			if (result != PPSMC_Result_OK)
+				return -EINVAL;
+		}
+	}
+
+	if (!pi->mclk_dpm_key_disabled) {
+		if (pi->dpm_level_enable_mask.mclk_dpm_enable_mask) {
+			result = ci_send_msg_to_smc_with_parameter(rdev,
+								   PPSMC_MSG_MCLKDPM_SetEnabledMask,
+								   pi->dpm_level_enable_mask.mclk_dpm_enable_mask);
+			if (result != PPSMC_Result_OK)
+				return -EINVAL;
+		}
+	}
+
+	if (!pi->pcie_dpm_key_disabled) {
+		if (pi->dpm_level_enable_mask.pcie_dpm_enable_mask) {
+			result = ci_send_msg_to_smc_with_parameter(rdev,
+								   PPSMC_MSG_PCIeDPM_SetEnabledMask,
+								   pi->dpm_level_enable_mask.pcie_dpm_enable_mask);
+			if (result != PPSMC_Result_OK)
+				return -EINVAL;
+		}
+	}
+
+	ci_apply_disp_minimum_voltage_request(rdev);
+
+	return 0;
+}
+
+static void ci_find_dpm_states_clocks_in_dpm_table(struct radeon_device *rdev,
+						   struct radeon_ps *radeon_state)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	struct ci_ps *state = ci_get_ps(radeon_state);
+	struct ci_single_dpm_table *sclk_table = &pi->dpm_table.sclk_table;
+	u32 sclk = state->performance_levels[state->performance_level_count-1].sclk;
+	struct ci_single_dpm_table *mclk_table = &pi->dpm_table.mclk_table;
+	u32 mclk = state->performance_levels[state->performance_level_count-1].mclk;
+	u32 i;
+
+	pi->need_update_smu7_dpm_table = 0;
+
+	for (i = 0; i < sclk_table->count; i++) {
+		if (sclk == sclk_table->dpm_levels[i].value)
+			break;
+	}
+
+	if (i >= sclk_table->count) {
+		pi->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK;
+	} else {
+		/* XXX check display min clock requirements */
+		if (0 != CISLAND_MINIMUM_ENGINE_CLOCK)
+			pi->need_update_smu7_dpm_table |= DPMTABLE_UPDATE_SCLK;
+	}
+
+	for (i = 0; i < mclk_table->count; i++) {
+		if (mclk == mclk_table->dpm_levels[i].value)
+			break;
+	}
+
+	if (i >= mclk_table->count)
+		pi->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_MCLK;
+
+	if (rdev->pm.dpm.current_active_crtc_count !=
+	    rdev->pm.dpm.new_active_crtc_count)
+		pi->need_update_smu7_dpm_table |= DPMTABLE_UPDATE_MCLK;
+}
+
+static int ci_populate_and_upload_sclk_mclk_dpm_levels(struct radeon_device *rdev,
+						       struct radeon_ps *radeon_state)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	struct ci_ps *state = ci_get_ps(radeon_state);
+	u32 sclk = state->performance_levels[state->performance_level_count-1].sclk;
+	u32 mclk = state->performance_levels[state->performance_level_count-1].mclk;
+	struct ci_dpm_table *dpm_table = &pi->dpm_table;
+	int ret;
+
+	if (!pi->need_update_smu7_dpm_table)
+		return 0;
+
+	if (pi->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_SCLK)
+		dpm_table->sclk_table.dpm_levels[dpm_table->sclk_table.count-1].value = sclk;
+
+	if (pi->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK)
+		dpm_table->mclk_table.dpm_levels[dpm_table->mclk_table.count-1].value = mclk;
+
+	if (pi->need_update_smu7_dpm_table & (DPMTABLE_OD_UPDATE_SCLK | DPMTABLE_UPDATE_SCLK)) {
+		ret = ci_populate_all_graphic_levels(rdev);
+		if (ret)
+			return ret;
+	}
+
+	if (pi->need_update_smu7_dpm_table & (DPMTABLE_OD_UPDATE_MCLK | DPMTABLE_UPDATE_MCLK)) {
+		ret = ci_populate_all_memory_levels(rdev);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int ci_enable_uvd_dpm(struct radeon_device *rdev, bool enable)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	const struct radeon_clock_and_voltage_limits *max_limits;
+	int i;
+
+	if (rdev->pm.dpm.ac_power)
+		max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
+	else
+		max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc;
+
+	if (enable) {
+		pi->dpm_level_enable_mask.uvd_dpm_enable_mask = 0;
+
+		for (i = rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.count - 1; i >= 0; i--) {
+			if (rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries[i].v <= max_limits->vddc) {
+				pi->dpm_level_enable_mask.uvd_dpm_enable_mask |= 1 << i;
+
+				if (!pi->caps_uvd_dpm)
+					break;
+			}
+		}
+
+		ci_send_msg_to_smc_with_parameter(rdev,
+						  PPSMC_MSG_UVDDPM_SetEnabledMask,
+						  pi->dpm_level_enable_mask.uvd_dpm_enable_mask);
+
+		if (pi->last_mclk_dpm_enable_mask & 0x1) {
+			pi->uvd_enabled = true;
+			pi->dpm_level_enable_mask.mclk_dpm_enable_mask &= 0xFFFFFFFE;
+			ci_send_msg_to_smc_with_parameter(rdev,
+							  PPSMC_MSG_MCLKDPM_SetEnabledMask,
+							  pi->dpm_level_enable_mask.mclk_dpm_enable_mask);
+		}
+	} else {
+		if (pi->last_mclk_dpm_enable_mask & 0x1) {
+			pi->uvd_enabled = false;
+			pi->dpm_level_enable_mask.mclk_dpm_enable_mask |= 1;
+			ci_send_msg_to_smc_with_parameter(rdev,
+							  PPSMC_MSG_MCLKDPM_SetEnabledMask,
+							  pi->dpm_level_enable_mask.mclk_dpm_enable_mask);
+		}
+	}
+
+	return (ci_send_msg_to_smc(rdev, enable ?
+				   PPSMC_MSG_UVDDPM_Enable : PPSMC_MSG_UVDDPM_Disable) == PPSMC_Result_OK) ?
+		0 : -EINVAL;
+}
+
+#if 0
+static int ci_enable_vce_dpm(struct radeon_device *rdev, bool enable)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	const struct radeon_clock_and_voltage_limits *max_limits;
+	int i;
+
+	if (rdev->pm.dpm.ac_power)
+		max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
+	else
+		max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc;
+
+	if (enable) {
+		pi->dpm_level_enable_mask.vce_dpm_enable_mask = 0;
+		for (i = rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.count - 1; i >= 0; i--) {
+			if (rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries[i].v <= max_limits->vddc) {
+				pi->dpm_level_enable_mask.vce_dpm_enable_mask |= 1 << i;
+
+				if (!pi->caps_vce_dpm)
+					break;
+			}
+		}
+
+		ci_send_msg_to_smc_with_parameter(rdev,
+						  PPSMC_MSG_VCEDPM_SetEnabledMask,
+						  pi->dpm_level_enable_mask.vce_dpm_enable_mask);
+	}
+
+	return (ci_send_msg_to_smc(rdev, enable ?
+				   PPSMC_MSG_VCEDPM_Enable : PPSMC_MSG_VCEDPM_Disable) == PPSMC_Result_OK) ?
+		0 : -EINVAL;
+}
+
+static int ci_enable_samu_dpm(struct radeon_device *rdev, bool enable)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	const struct radeon_clock_and_voltage_limits *max_limits;
+	int i;
+
+	if (rdev->pm.dpm.ac_power)
+		max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
+	else
+		max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc;
+
+	if (enable) {
+		pi->dpm_level_enable_mask.samu_dpm_enable_mask = 0;
+		for (i = rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.count - 1; i >= 0; i--) {
+			if (rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries[i].v <= max_limits->vddc) {
+				pi->dpm_level_enable_mask.samu_dpm_enable_mask |= 1 << i;
+
+				if (!pi->caps_samu_dpm)
+					break;
+			}
+		}
+
+		ci_send_msg_to_smc_with_parameter(rdev,
+						  PPSMC_MSG_SAMUDPM_SetEnabledMask,
+						  pi->dpm_level_enable_mask.samu_dpm_enable_mask);
+	}
+	return (ci_send_msg_to_smc(rdev, enable ?
+				   PPSMC_MSG_SAMUDPM_Enable : PPSMC_MSG_SAMUDPM_Disable) == PPSMC_Result_OK) ?
+		0 : -EINVAL;
+}
+
+static int ci_enable_acp_dpm(struct radeon_device *rdev, bool enable)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	const struct radeon_clock_and_voltage_limits *max_limits;
+	int i;
+
+	if (rdev->pm.dpm.ac_power)
+		max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
+	else
+		max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc;
+
+	if (enable) {
+		pi->dpm_level_enable_mask.acp_dpm_enable_mask = 0;
+		for (i = rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.count - 1; i >= 0; i--) {
+			if (rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries[i].v <= max_limits->vddc) {
+				pi->dpm_level_enable_mask.acp_dpm_enable_mask |= 1 << i;
+
+				if (!pi->caps_acp_dpm)
+					break;
+			}
+		}
+
+		ci_send_msg_to_smc_with_parameter(rdev,
+						  PPSMC_MSG_ACPDPM_SetEnabledMask,
+						  pi->dpm_level_enable_mask.acp_dpm_enable_mask);
+	}
+
+	return (ci_send_msg_to_smc(rdev, enable ?
+				   PPSMC_MSG_ACPDPM_Enable : PPSMC_MSG_ACPDPM_Disable) == PPSMC_Result_OK) ?
+		0 : -EINVAL;
+}
+#endif
+
+static int ci_update_uvd_dpm(struct radeon_device *rdev, bool gate)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	u32 tmp;
+
+	if (!gate) {
+		if (pi->caps_uvd_dpm ||
+		    (rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.count <= 0))
+			pi->smc_state_table.UvdBootLevel = 0;
+		else
+			pi->smc_state_table.UvdBootLevel =
+				rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.count - 1;
+
+		tmp = RREG32_SMC(DPM_TABLE_475);
+		tmp &= ~UvdBootLevel_MASK;
+		tmp |= UvdBootLevel(pi->smc_state_table.UvdBootLevel);
+		WREG32_SMC(DPM_TABLE_475, tmp);
+	}
+
+	return ci_enable_uvd_dpm(rdev, !gate);
+}
+
+#if 0
+static u8 ci_get_vce_boot_level(struct radeon_device *rdev)
+{
+	u8 i;
+	u32 min_evclk = 30000; /* ??? */
+	struct radeon_vce_clock_voltage_dependency_table *table =
+		&rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table;
+
+	for (i = 0; i < table->count; i++) {
+		if (table->entries[i].evclk >= min_evclk)
+			return i;
+	}
+
+	return table->count - 1;
+}
+
+static int ci_update_vce_dpm(struct radeon_device *rdev,
+			     struct radeon_ps *radeon_new_state,
+			     struct radeon_ps *radeon_current_state)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	bool new_vce_clock_non_zero = (radeon_new_state->evclk != 0);
+	bool old_vce_clock_non_zero = (radeon_current_state->evclk != 0);
+	int ret = 0;
+	u32 tmp;
+
+	if (new_vce_clock_non_zero != old_vce_clock_non_zero) {
+		if (new_vce_clock_non_zero) {
+			pi->smc_state_table.VceBootLevel = ci_get_vce_boot_level(rdev);
+
+			tmp = RREG32_SMC(DPM_TABLE_475);
+			tmp &= ~VceBootLevel_MASK;
+			tmp |= VceBootLevel(pi->smc_state_table.VceBootLevel);
+			WREG32_SMC(DPM_TABLE_475, tmp);
+
+			ret = ci_enable_vce_dpm(rdev, true);
+		} else {
+			ret = ci_enable_vce_dpm(rdev, false);
+		}
+	}
+	return ret;
+}
+
+static int ci_update_samu_dpm(struct radeon_device *rdev, bool gate)
+{
+	return ci_enable_samu_dpm(rdev, gate);
+}
+
+static int ci_update_acp_dpm(struct radeon_device *rdev, bool gate)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	u32 tmp;
+
+	if (!gate) {
+		pi->smc_state_table.AcpBootLevel = 0;
+
+		tmp = RREG32_SMC(DPM_TABLE_475);
+		tmp &= ~AcpBootLevel_MASK;
+		tmp |= AcpBootLevel(pi->smc_state_table.AcpBootLevel);
+		WREG32_SMC(DPM_TABLE_475, tmp);
+	}
+
+	return ci_enable_acp_dpm(rdev, !gate);
+}
+#endif
+
+static int ci_generate_dpm_level_enable_mask(struct radeon_device *rdev,
+					     struct radeon_ps *radeon_state)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	int ret;
+
+	ret = ci_trim_dpm_states(rdev, radeon_state);
+	if (ret)
+		return ret;
+
+	pi->dpm_level_enable_mask.sclk_dpm_enable_mask =
+		ci_get_dpm_level_enable_mask_value(&pi->dpm_table.sclk_table);
+	pi->dpm_level_enable_mask.mclk_dpm_enable_mask =
+		ci_get_dpm_level_enable_mask_value(&pi->dpm_table.mclk_table);
+	pi->last_mclk_dpm_enable_mask =
+		pi->dpm_level_enable_mask.mclk_dpm_enable_mask;
+	if (pi->uvd_enabled) {
+		if (pi->dpm_level_enable_mask.mclk_dpm_enable_mask & 1)
+			pi->dpm_level_enable_mask.mclk_dpm_enable_mask &= 0xFFFFFFFE;
+	}
+	pi->dpm_level_enable_mask.pcie_dpm_enable_mask =
+		ci_get_dpm_level_enable_mask_value(&pi->dpm_table.pcie_speed_table);
+
+	return 0;
+}
+
+static u32 ci_get_lowest_enabled_level(struct radeon_device *rdev,
+				       u32 level_mask)
+{
+	u32 level = 0;
+
+	while ((level_mask & (1 << level)) == 0)
+		level++;
+
+	return level;
+}
+
+
+int ci_dpm_force_performance_level(struct radeon_device *rdev,
+				   enum radeon_dpm_forced_level level)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	PPSMC_Result smc_result;
+	u32 tmp, levels, i;
+	int ret;
+
+	if (level == RADEON_DPM_FORCED_LEVEL_HIGH) {
+		if ((!pi->sclk_dpm_key_disabled) &&
+		    pi->dpm_level_enable_mask.sclk_dpm_enable_mask) {
+			levels = 0;
+			tmp = pi->dpm_level_enable_mask.sclk_dpm_enable_mask;
+			while (tmp >>= 1)
+				levels++;
+			if (levels) {
+				ret = ci_dpm_force_state_sclk(rdev, levels);
+				if (ret)
+					return ret;
+				for (i = 0; i < rdev->usec_timeout; i++) {
+					tmp = (RREG32_SMC(TARGET_AND_CURRENT_PROFILE_INDEX) &
+					       CURR_SCLK_INDEX_MASK) >> CURR_SCLK_INDEX_SHIFT;
+					if (tmp == levels)
+						break;
+					udelay(1);
+				}
+			}
+		}
+		if ((!pi->mclk_dpm_key_disabled) &&
+		    pi->dpm_level_enable_mask.mclk_dpm_enable_mask) {
+			levels = 0;
+			tmp = pi->dpm_level_enable_mask.mclk_dpm_enable_mask;
+			while (tmp >>= 1)
+				levels++;
+			if (levels) {
+				ret = ci_dpm_force_state_mclk(rdev, levels);
+				if (ret)
+					return ret;
+				for (i = 0; i < rdev->usec_timeout; i++) {
+					tmp = (RREG32_SMC(TARGET_AND_CURRENT_PROFILE_INDEX) &
+					       CURR_MCLK_INDEX_MASK) >> CURR_MCLK_INDEX_SHIFT;
+					if (tmp == levels)
+						break;
+					udelay(1);
+				}
+			}
+		}
+		if ((!pi->pcie_dpm_key_disabled) &&
+		    pi->dpm_level_enable_mask.pcie_dpm_enable_mask) {
+			levels = 0;
+			tmp = pi->dpm_level_enable_mask.pcie_dpm_enable_mask;
+			while (tmp >>= 1)
+				levels++;
+			if (levels) {
+				ret = ci_dpm_force_state_pcie(rdev, level);
+				if (ret)
+					return ret;
+				for (i = 0; i < rdev->usec_timeout; i++) {
+					tmp = (RREG32_SMC(TARGET_AND_CURRENT_PROFILE_INDEX_1) &
+					       CURR_PCIE_INDEX_MASK) >> CURR_PCIE_INDEX_SHIFT;
+					if (tmp == levels)
+						break;
+					udelay(1);
+				}
+			}
+		}
+	} else if (level == RADEON_DPM_FORCED_LEVEL_LOW) {
+		if ((!pi->sclk_dpm_key_disabled) &&
+		    pi->dpm_level_enable_mask.sclk_dpm_enable_mask) {
+			levels = ci_get_lowest_enabled_level(rdev,
+							     pi->dpm_level_enable_mask.sclk_dpm_enable_mask);
+			ret = ci_dpm_force_state_sclk(rdev, levels);
+			if (ret)
+				return ret;
+			for (i = 0; i < rdev->usec_timeout; i++) {
+				tmp = (RREG32_SMC(TARGET_AND_CURRENT_PROFILE_INDEX) &
+				       CURR_SCLK_INDEX_MASK) >> CURR_SCLK_INDEX_SHIFT;
+				if (tmp == levels)
+					break;
+				udelay(1);
+			}
+		}
+		if ((!pi->mclk_dpm_key_disabled) &&
+		    pi->dpm_level_enable_mask.mclk_dpm_enable_mask) {
+			levels = ci_get_lowest_enabled_level(rdev,
+							     pi->dpm_level_enable_mask.mclk_dpm_enable_mask);
+			ret = ci_dpm_force_state_mclk(rdev, levels);
+			if (ret)
+				return ret;
+			for (i = 0; i < rdev->usec_timeout; i++) {
+				tmp = (RREG32_SMC(TARGET_AND_CURRENT_PROFILE_INDEX) &
+				       CURR_MCLK_INDEX_MASK) >> CURR_MCLK_INDEX_SHIFT;
+				if (tmp == levels)
+					break;
+				udelay(1);
+			}
+		}
+		if ((!pi->pcie_dpm_key_disabled) &&
+		    pi->dpm_level_enable_mask.pcie_dpm_enable_mask) {
+			levels = ci_get_lowest_enabled_level(rdev,
+							     pi->dpm_level_enable_mask.pcie_dpm_enable_mask);
+			ret = ci_dpm_force_state_pcie(rdev, levels);
+			if (ret)
+				return ret;
+			for (i = 0; i < rdev->usec_timeout; i++) {
+				tmp = (RREG32_SMC(TARGET_AND_CURRENT_PROFILE_INDEX_1) &
+				       CURR_PCIE_INDEX_MASK) >> CURR_PCIE_INDEX_SHIFT;
+				if (tmp == levels)
+					break;
+				udelay(1);
+			}
+		}
+	} else if (level == RADEON_DPM_FORCED_LEVEL_AUTO) {
+		if (!pi->sclk_dpm_key_disabled) {
+			smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_NoForcedLevel);
+			if (smc_result != PPSMC_Result_OK)
+				return -EINVAL;
+		}
+		if (!pi->mclk_dpm_key_disabled) {
+			smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_MCLKDPM_NoForcedLevel);
+			if (smc_result != PPSMC_Result_OK)
+				return -EINVAL;
+		}
+		if (!pi->pcie_dpm_key_disabled) {
+			smc_result = ci_send_msg_to_smc(rdev, PPSMC_MSG_PCIeDPM_UnForceLevel);
+			if (smc_result != PPSMC_Result_OK)
+				return -EINVAL;
+		}
+	}
+
+	rdev->pm.dpm.forced_level = level;
+
+	return 0;
+}
+
+static int ci_set_mc_special_registers(struct radeon_device *rdev,
+				       struct ci_mc_reg_table *table)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	u8 i, j, k;
+	u32 temp_reg;
+
+	for (i = 0, j = table->last; i < table->last; i++) {
+		if (j >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE)
+			return -EINVAL;
+		switch(table->mc_reg_address[i].s1 << 2) {
+		case MC_SEQ_MISC1:
+			temp_reg = RREG32(MC_PMG_CMD_EMRS);
+			table->mc_reg_address[j].s1 = MC_PMG_CMD_EMRS >> 2;
+			table->mc_reg_address[j].s0 = MC_SEQ_PMG_CMD_EMRS_LP >> 2;
+			for (k = 0; k < table->num_entries; k++) {
+				table->mc_reg_table_entry[k].mc_data[j] =
+					((temp_reg & 0xffff0000)) | ((table->mc_reg_table_entry[k].mc_data[i] & 0xffff0000) >> 16);
+			}
+			j++;
+			if (j >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE)
+				return -EINVAL;
+
+			temp_reg = RREG32(MC_PMG_CMD_MRS);
+			table->mc_reg_address[j].s1 = MC_PMG_CMD_MRS >> 2;
+			table->mc_reg_address[j].s0 = MC_SEQ_PMG_CMD_MRS_LP >> 2;
+			for (k = 0; k < table->num_entries; k++) {
+				table->mc_reg_table_entry[k].mc_data[j] =
+					(temp_reg & 0xffff0000) | (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff);
+				if (!pi->mem_gddr5)
+					table->mc_reg_table_entry[k].mc_data[j] |= 0x100;
+			}
+			j++;
+			if (j > SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE)
+				return -EINVAL;
+
+			if (!pi->mem_gddr5) {
+				table->mc_reg_address[j].s1 = MC_PMG_AUTO_CMD >> 2;
+				table->mc_reg_address[j].s0 = MC_PMG_AUTO_CMD >> 2;
+				for (k = 0; k < table->num_entries; k++) {
+					table->mc_reg_table_entry[k].mc_data[j] =
+						(table->mc_reg_table_entry[k].mc_data[i] & 0xffff0000) >> 16;
+				}
+				j++;
+				if (j > SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE)
+					return -EINVAL;
+			}
+			break;
+		case MC_SEQ_RESERVE_M:
+			temp_reg = RREG32(MC_PMG_CMD_MRS1);
+			table->mc_reg_address[j].s1 = MC_PMG_CMD_MRS1 >> 2;
+			table->mc_reg_address[j].s0 = MC_SEQ_PMG_CMD_MRS1_LP >> 2;
+			for (k = 0; k < table->num_entries; k++) {
+				table->mc_reg_table_entry[k].mc_data[j] =
+					(temp_reg & 0xffff0000) | (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff);
+			}
+			j++;
+			if (j > SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE)
+				return -EINVAL;
+			break;
+		default:
+			break;
+		}
+
+	}
+
+	table->last = j;
+
+	return 0;
+}
+
+static bool ci_check_s0_mc_reg_index(u16 in_reg, u16 *out_reg)
+{
+	bool result = true;
+
+	switch(in_reg) {
+	case MC_SEQ_RAS_TIMING >> 2:
+		*out_reg = MC_SEQ_RAS_TIMING_LP >> 2;
+		break;
+	case MC_SEQ_DLL_STBY >> 2:
+		*out_reg = MC_SEQ_DLL_STBY_LP >> 2;
+		break;
+	case MC_SEQ_G5PDX_CMD0 >> 2:
+		*out_reg = MC_SEQ_G5PDX_CMD0_LP >> 2;
+		break;
+	case MC_SEQ_G5PDX_CMD1 >> 2:
+		*out_reg = MC_SEQ_G5PDX_CMD1_LP >> 2;
+		break;
+	case MC_SEQ_G5PDX_CTRL >> 2:
+		*out_reg = MC_SEQ_G5PDX_CTRL_LP >> 2;
+		break;
+	case MC_SEQ_CAS_TIMING >> 2:
+		*out_reg = MC_SEQ_CAS_TIMING_LP >> 2;
+            break;
+	case MC_SEQ_MISC_TIMING >> 2:
+		*out_reg = MC_SEQ_MISC_TIMING_LP >> 2;
+		break;
+	case MC_SEQ_MISC_TIMING2 >> 2:
+		*out_reg = MC_SEQ_MISC_TIMING2_LP >> 2;
+		break;
+	case MC_SEQ_PMG_DVS_CMD >> 2:
+		*out_reg = MC_SEQ_PMG_DVS_CMD_LP >> 2;
+		break;
+	case MC_SEQ_PMG_DVS_CTL >> 2:
+		*out_reg = MC_SEQ_PMG_DVS_CTL_LP >> 2;
+		break;
+	case MC_SEQ_RD_CTL_D0 >> 2:
+		*out_reg = MC_SEQ_RD_CTL_D0_LP >> 2;
+		break;
+	case MC_SEQ_RD_CTL_D1 >> 2:
+		*out_reg = MC_SEQ_RD_CTL_D1_LP >> 2;
+		break;
+	case MC_SEQ_WR_CTL_D0 >> 2:
+		*out_reg = MC_SEQ_WR_CTL_D0_LP >> 2;
+		break;
+	case MC_SEQ_WR_CTL_D1 >> 2:
+		*out_reg = MC_SEQ_WR_CTL_D1_LP >> 2;
+		break;
+	case MC_PMG_CMD_EMRS >> 2:
+		*out_reg = MC_SEQ_PMG_CMD_EMRS_LP >> 2;
+		break;
+	case MC_PMG_CMD_MRS >> 2:
+		*out_reg = MC_SEQ_PMG_CMD_MRS_LP >> 2;
+		break;
+	case MC_PMG_CMD_MRS1 >> 2:
+		*out_reg = MC_SEQ_PMG_CMD_MRS1_LP >> 2;
+		break;
+	case MC_SEQ_PMG_TIMING >> 2:
+		*out_reg = MC_SEQ_PMG_TIMING_LP >> 2;
+		break;
+	case MC_PMG_CMD_MRS2 >> 2:
+		*out_reg = MC_SEQ_PMG_CMD_MRS2_LP >> 2;
+		break;
+	case MC_SEQ_WR_CTL_2 >> 2:
+		*out_reg = MC_SEQ_WR_CTL_2_LP >> 2;
+		break;
+	default:
+		result = false;
+		break;
+	}
+
+	return result;
+}
+
+static void ci_set_valid_flag(struct ci_mc_reg_table *table)
+{
+	u8 i, j;
+
+	for (i = 0; i < table->last; i++) {
+		for (j = 1; j < table->num_entries; j++) {
+			if (table->mc_reg_table_entry[j-1].mc_data[i] !=
+			    table->mc_reg_table_entry[j].mc_data[i]) {
+				table->valid_flag |= 1 << i;
+				break;
+			}
+		}
+	}
+}
+
+static void ci_set_s0_mc_reg_index(struct ci_mc_reg_table *table)
+{
+	u32 i;
+	u16 address;
+
+	for (i = 0; i < table->last; i++) {
+		table->mc_reg_address[i].s0 =
+			ci_check_s0_mc_reg_index(table->mc_reg_address[i].s1, &address) ?
+			address : table->mc_reg_address[i].s1;
+	}
+}
+
+static int ci_copy_vbios_mc_reg_table(const struct atom_mc_reg_table *table,
+				      struct ci_mc_reg_table *ci_table)
+{
+	u8 i, j;
+
+	if (table->last > SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE)
+		return -EINVAL;
+	if (table->num_entries > MAX_AC_TIMING_ENTRIES)
+		return -EINVAL;
+
+	for (i = 0; i < table->last; i++)
+		ci_table->mc_reg_address[i].s1 = table->mc_reg_address[i].s1;
+
+	ci_table->last = table->last;
+
+	for (i = 0; i < table->num_entries; i++) {
+		ci_table->mc_reg_table_entry[i].mclk_max =
+			table->mc_reg_table_entry[i].mclk_max;
+		for (j = 0; j < table->last; j++)
+			ci_table->mc_reg_table_entry[i].mc_data[j] =
+				table->mc_reg_table_entry[i].mc_data[j];
+	}
+	ci_table->num_entries = table->num_entries;
+
+	return 0;
+}
+
+static int ci_initialize_mc_reg_table(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	struct atom_mc_reg_table *table;
+	struct ci_mc_reg_table *ci_table = &pi->mc_reg_table;
+	u8 module_index = rv770_get_memory_module_index(rdev);
+	int ret;
+
+	table = kzalloc(sizeof(struct atom_mc_reg_table), GFP_KERNEL);
+	if (!table)
+		return -ENOMEM;
+
+	WREG32(MC_SEQ_RAS_TIMING_LP, RREG32(MC_SEQ_RAS_TIMING));
+	WREG32(MC_SEQ_CAS_TIMING_LP, RREG32(MC_SEQ_CAS_TIMING));
+	WREG32(MC_SEQ_DLL_STBY_LP, RREG32(MC_SEQ_DLL_STBY));
+	WREG32(MC_SEQ_G5PDX_CMD0_LP, RREG32(MC_SEQ_G5PDX_CMD0));
+	WREG32(MC_SEQ_G5PDX_CMD1_LP, RREG32(MC_SEQ_G5PDX_CMD1));
+	WREG32(MC_SEQ_G5PDX_CTRL_LP, RREG32(MC_SEQ_G5PDX_CTRL));
+	WREG32(MC_SEQ_PMG_DVS_CMD_LP, RREG32(MC_SEQ_PMG_DVS_CMD));
+	WREG32(MC_SEQ_PMG_DVS_CTL_LP, RREG32(MC_SEQ_PMG_DVS_CTL));
+	WREG32(MC_SEQ_MISC_TIMING_LP, RREG32(MC_SEQ_MISC_TIMING));
+	WREG32(MC_SEQ_MISC_TIMING2_LP, RREG32(MC_SEQ_MISC_TIMING2));
+	WREG32(MC_SEQ_PMG_CMD_EMRS_LP, RREG32(MC_PMG_CMD_EMRS));
+	WREG32(MC_SEQ_PMG_CMD_MRS_LP, RREG32(MC_PMG_CMD_MRS));
+	WREG32(MC_SEQ_PMG_CMD_MRS1_LP, RREG32(MC_PMG_CMD_MRS1));
+	WREG32(MC_SEQ_WR_CTL_D0_LP, RREG32(MC_SEQ_WR_CTL_D0));
+	WREG32(MC_SEQ_WR_CTL_D1_LP, RREG32(MC_SEQ_WR_CTL_D1));
+	WREG32(MC_SEQ_RD_CTL_D0_LP, RREG32(MC_SEQ_RD_CTL_D0));
+	WREG32(MC_SEQ_RD_CTL_D1_LP, RREG32(MC_SEQ_RD_CTL_D1));
+	WREG32(MC_SEQ_PMG_TIMING_LP, RREG32(MC_SEQ_PMG_TIMING));
+	WREG32(MC_SEQ_PMG_CMD_MRS2_LP, RREG32(MC_PMG_CMD_MRS2));
+	WREG32(MC_SEQ_WR_CTL_2_LP, RREG32(MC_SEQ_WR_CTL_2));
+
+	ret = radeon_atom_init_mc_reg_table(rdev, module_index, table);
+	if (ret)
+		goto init_mc_done;
+
+        ret = ci_copy_vbios_mc_reg_table(table, ci_table);
+	if (ret)
+		goto init_mc_done;
+
+	ci_set_s0_mc_reg_index(ci_table);
+
+	ret = ci_set_mc_special_registers(rdev, ci_table);
+	if (ret)
+		goto init_mc_done;
+
+	ci_set_valid_flag(ci_table);
+
+init_mc_done:
+	kfree(table);
+
+	return ret;
+}
+
+static int ci_populate_mc_reg_addresses(struct radeon_device *rdev,
+					SMU7_Discrete_MCRegisters *mc_reg_table)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	u32 i, j;
+
+	for (i = 0, j = 0; j < pi->mc_reg_table.last; j++) {
+		if (pi->mc_reg_table.valid_flag & (1 << j)) {
+			if (i >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE)
+				return -EINVAL;
+			mc_reg_table->address[i].s0 = cpu_to_be16(pi->mc_reg_table.mc_reg_address[j].s0);
+			mc_reg_table->address[i].s1 = cpu_to_be16(pi->mc_reg_table.mc_reg_address[j].s1);
+			i++;
+		}
+	}
+
+	mc_reg_table->last = (u8)i;
+
+	return 0;
+}
+
+static void ci_convert_mc_registers(const struct ci_mc_reg_entry *entry,
+				    SMU7_Discrete_MCRegisterSet *data,
+				    u32 num_entries, u32 valid_flag)
+{
+	u32 i, j;
+
+	for (i = 0, j = 0; j < num_entries; j++) {
+		if (valid_flag & (1 << j)) {
+			data->value[i] = cpu_to_be32(entry->mc_data[j]);
+			i++;
+		}
+	}
+}
+
+static void ci_convert_mc_reg_table_entry_to_smc(struct radeon_device *rdev,
+						 const u32 memory_clock,
+						 SMU7_Discrete_MCRegisterSet *mc_reg_table_data)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	u32 i = 0;
+
+	for(i = 0; i < pi->mc_reg_table.num_entries; i++) {
+		if (memory_clock <= pi->mc_reg_table.mc_reg_table_entry[i].mclk_max)
+			break;
+	}
+
+	if ((i == pi->mc_reg_table.num_entries) && (i > 0))
+		--i;
+
+	ci_convert_mc_registers(&pi->mc_reg_table.mc_reg_table_entry[i],
+				mc_reg_table_data, pi->mc_reg_table.last,
+				pi->mc_reg_table.valid_flag);
+}
+
+static void ci_convert_mc_reg_table_to_smc(struct radeon_device *rdev,
+					   SMU7_Discrete_MCRegisters *mc_reg_table)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	u32 i;
+
+	for (i = 0; i < pi->dpm_table.mclk_table.count; i++)
+		ci_convert_mc_reg_table_entry_to_smc(rdev,
+						     pi->dpm_table.mclk_table.dpm_levels[i].value,
+						     &mc_reg_table->data[i]);
+}
+
+static int ci_populate_initial_mc_reg_table(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	int ret;
+
+	memset(&pi->smc_mc_reg_table, 0, sizeof(SMU7_Discrete_MCRegisters));
+
+	ret = ci_populate_mc_reg_addresses(rdev, &pi->smc_mc_reg_table);
+	if (ret)
+		return ret;
+	ci_convert_mc_reg_table_to_smc(rdev, &pi->smc_mc_reg_table);
+
+	return ci_copy_bytes_to_smc(rdev,
+				    pi->mc_reg_table_start,
+				    (u8 *)&pi->smc_mc_reg_table,
+				    sizeof(SMU7_Discrete_MCRegisters),
+				    pi->sram_end);
+}
+
+static int ci_update_and_upload_mc_reg_table(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+
+	if (!(pi->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK))
+		return 0;
+
+	memset(&pi->smc_mc_reg_table, 0, sizeof(SMU7_Discrete_MCRegisters));
+
+	ci_convert_mc_reg_table_to_smc(rdev, &pi->smc_mc_reg_table);
+
+	return ci_copy_bytes_to_smc(rdev,
+				    pi->mc_reg_table_start +
+				    offsetof(SMU7_Discrete_MCRegisters, data[0]),
+				    (u8 *)&pi->smc_mc_reg_table.data[0],
+				    sizeof(SMU7_Discrete_MCRegisterSet) *
+				    pi->dpm_table.mclk_table.count,
+				    pi->sram_end);
+}
+
+static void ci_enable_voltage_control(struct radeon_device *rdev)
+{
+	u32 tmp = RREG32_SMC(GENERAL_PWRMGT);
+
+	tmp |= VOLT_PWRMGT_EN;
+	WREG32_SMC(GENERAL_PWRMGT, tmp);
+}
+
+static enum radeon_pcie_gen ci_get_maximum_link_speed(struct radeon_device *rdev,
+						      struct radeon_ps *radeon_state)
+{
+	struct ci_ps *state = ci_get_ps(radeon_state);
+	int i;
+	u16 pcie_speed, max_speed = 0;
+
+	for (i = 0; i < state->performance_level_count; i++) {
+		pcie_speed = state->performance_levels[i].pcie_gen;
+		if (max_speed < pcie_speed)
+			max_speed = pcie_speed;
+	}
+
+	return max_speed;
+}
+
+static u16 ci_get_current_pcie_speed(struct radeon_device *rdev)
+{
+	u32 speed_cntl = 0;
+
+	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL) & LC_CURRENT_DATA_RATE_MASK;
+	speed_cntl >>= LC_CURRENT_DATA_RATE_SHIFT;
+
+	return (u16)speed_cntl;
+}
+
+static int ci_get_current_pcie_lane_number(struct radeon_device *rdev)
+{
+	u32 link_width = 0;
+
+	link_width = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL) & LC_LINK_WIDTH_RD_MASK;
+	link_width >>= LC_LINK_WIDTH_RD_SHIFT;
+
+	switch (link_width) {
+	case RADEON_PCIE_LC_LINK_WIDTH_X1:
+		return 1;
+	case RADEON_PCIE_LC_LINK_WIDTH_X2:
+		return 2;
+	case RADEON_PCIE_LC_LINK_WIDTH_X4:
+		return 4;
+	case RADEON_PCIE_LC_LINK_WIDTH_X8:
+		return 8;
+	case RADEON_PCIE_LC_LINK_WIDTH_X12:
+		/* not actually supported */
+		return 12;
+	case RADEON_PCIE_LC_LINK_WIDTH_X0:
+	case RADEON_PCIE_LC_LINK_WIDTH_X16:
+	default:
+		return 16;
+	}
+}
+
+static void ci_request_link_speed_change_before_state_change(struct radeon_device *rdev,
+							     struct radeon_ps *radeon_new_state,
+							     struct radeon_ps *radeon_current_state)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	enum radeon_pcie_gen target_link_speed =
+		ci_get_maximum_link_speed(rdev, radeon_new_state);
+	enum radeon_pcie_gen current_link_speed;
+
+	if (pi->force_pcie_gen == RADEON_PCIE_GEN_INVALID)
+		current_link_speed = ci_get_maximum_link_speed(rdev, radeon_current_state);
+	else
+		current_link_speed = pi->force_pcie_gen;
+
+	pi->force_pcie_gen = RADEON_PCIE_GEN_INVALID;
+	pi->pspp_notify_required = false;
+	if (target_link_speed > current_link_speed) {
+		switch (target_link_speed) {
+		case RADEON_PCIE_GEN3:
+			if (radeon_acpi_pcie_performance_request(rdev, PCIE_PERF_REQ_PECI_GEN3, false) == 0)
+				break;
+			pi->force_pcie_gen = RADEON_PCIE_GEN2;
+			if (current_link_speed == RADEON_PCIE_GEN2)
+				break;
+		case RADEON_PCIE_GEN2:
+			if (radeon_acpi_pcie_performance_request(rdev, PCIE_PERF_REQ_PECI_GEN2, false) == 0)
+				break;
+		default:
+			pi->force_pcie_gen = ci_get_current_pcie_speed(rdev);
+			break;
+		}
+	} else {
+		if (target_link_speed < current_link_speed)
+			pi->pspp_notify_required = true;
+	}
+}
+
+static void ci_notify_link_speed_change_after_state_change(struct radeon_device *rdev,
+							   struct radeon_ps *radeon_new_state,
+							   struct radeon_ps *radeon_current_state)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	enum radeon_pcie_gen target_link_speed =
+		ci_get_maximum_link_speed(rdev, radeon_new_state);
+	u8 request;
+
+	if (pi->pspp_notify_required) {
+		if (target_link_speed == RADEON_PCIE_GEN3)
+			request = PCIE_PERF_REQ_PECI_GEN3;
+		else if (target_link_speed == RADEON_PCIE_GEN2)
+			request = PCIE_PERF_REQ_PECI_GEN2;
+		else
+			request = PCIE_PERF_REQ_PECI_GEN1;
+
+		if ((request == PCIE_PERF_REQ_PECI_GEN1) &&
+		    (ci_get_current_pcie_speed(rdev) > 0))
+			return;
+
+		radeon_acpi_pcie_performance_request(rdev, request, false);
+	}
+}
+
+static int ci_set_private_data_variables_based_on_pptable(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	struct radeon_clock_voltage_dependency_table *allowed_sclk_vddc_table =
+		&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk;
+	struct radeon_clock_voltage_dependency_table *allowed_mclk_vddc_table =
+		&rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk;
+	struct radeon_clock_voltage_dependency_table *allowed_mclk_vddci_table =
+		&rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk;
+
+	if (allowed_sclk_vddc_table == NULL)
+		return -EINVAL;
+	if (allowed_sclk_vddc_table->count < 1)
+		return -EINVAL;
+	if (allowed_mclk_vddc_table == NULL)
+		return -EINVAL;
+	if (allowed_mclk_vddc_table->count < 1)
+		return -EINVAL;
+	if (allowed_mclk_vddci_table == NULL)
+		return -EINVAL;
+	if (allowed_mclk_vddci_table->count < 1)
+		return -EINVAL;
+
+	pi->min_vddc_in_pp_table = allowed_sclk_vddc_table->entries[0].v;
+	pi->max_vddc_in_pp_table =
+		allowed_sclk_vddc_table->entries[allowed_sclk_vddc_table->count - 1].v;
+
+	pi->min_vddci_in_pp_table = allowed_mclk_vddci_table->entries[0].v;
+	pi->max_vddci_in_pp_table =
+		allowed_mclk_vddci_table->entries[allowed_mclk_vddci_table->count - 1].v;
+
+	rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk =
+		allowed_sclk_vddc_table->entries[allowed_sclk_vddc_table->count - 1].clk;
+	rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.mclk =
+		allowed_mclk_vddc_table->entries[allowed_sclk_vddc_table->count - 1].clk;
+	rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.vddc =
+		allowed_sclk_vddc_table->entries[allowed_sclk_vddc_table->count - 1].v;
+        rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.vddci =
+		allowed_mclk_vddci_table->entries[allowed_mclk_vddci_table->count - 1].v;
+
+	return 0;
+}
+
+static void ci_patch_with_vddc_leakage(struct radeon_device *rdev, u16 *vddc)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	struct ci_leakage_voltage *leakage_table = &pi->vddc_leakage;
+	u32 leakage_index;
+
+	for (leakage_index = 0; leakage_index < leakage_table->count; leakage_index++) {
+		if (leakage_table->leakage_id[leakage_index] == *vddc) {
+			*vddc = leakage_table->actual_voltage[leakage_index];
+			break;
+		}
+	}
+}
+
+static void ci_patch_with_vddci_leakage(struct radeon_device *rdev, u16 *vddci)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	struct ci_leakage_voltage *leakage_table = &pi->vddci_leakage;
+	u32 leakage_index;
+
+	for (leakage_index = 0; leakage_index < leakage_table->count; leakage_index++) {
+		if (leakage_table->leakage_id[leakage_index] == *vddci) {
+			*vddci = leakage_table->actual_voltage[leakage_index];
+			break;
+		}
+	}
+}
+
+static void ci_patch_clock_voltage_dependency_table_with_vddc_leakage(struct radeon_device *rdev,
+								      struct radeon_clock_voltage_dependency_table *table)
+{
+	u32 i;
+
+	if (table) {
+		for (i = 0; i < table->count; i++)
+			ci_patch_with_vddc_leakage(rdev, &table->entries[i].v);
+	}
+}
+
+static void ci_patch_clock_voltage_dependency_table_with_vddci_leakage(struct radeon_device *rdev,
+								       struct radeon_clock_voltage_dependency_table *table)
+{
+	u32 i;
+
+	if (table) {
+		for (i = 0; i < table->count; i++)
+			ci_patch_with_vddci_leakage(rdev, &table->entries[i].v);
+	}
+}
+
+static void ci_patch_vce_clock_voltage_dependency_table_with_vddc_leakage(struct radeon_device *rdev,
+									  struct radeon_vce_clock_voltage_dependency_table *table)
+{
+	u32 i;
+
+	if (table) {
+		for (i = 0; i < table->count; i++)
+			ci_patch_with_vddc_leakage(rdev, &table->entries[i].v);
+	}
+}
+
+static void ci_patch_uvd_clock_voltage_dependency_table_with_vddc_leakage(struct radeon_device *rdev,
+									  struct radeon_uvd_clock_voltage_dependency_table *table)
+{
+	u32 i;
+
+	if (table) {
+		for (i = 0; i < table->count; i++)
+			ci_patch_with_vddc_leakage(rdev, &table->entries[i].v);
+	}
+}
+
+static void ci_patch_vddc_phase_shed_limit_table_with_vddc_leakage(struct radeon_device *rdev,
+								   struct radeon_phase_shedding_limits_table *table)
+{
+	u32 i;
+
+	if (table) {
+		for (i = 0; i < table->count; i++)
+			ci_patch_with_vddc_leakage(rdev, &table->entries[i].voltage);
+	}
+}
+
+static void ci_patch_clock_voltage_limits_with_vddc_leakage(struct radeon_device *rdev,
+							    struct radeon_clock_and_voltage_limits *table)
+{
+	if (table) {
+		ci_patch_with_vddc_leakage(rdev, (u16 *)&table->vddc);
+		ci_patch_with_vddci_leakage(rdev, (u16 *)&table->vddci);
+	}
+}
+
+static void ci_patch_cac_leakage_table_with_vddc_leakage(struct radeon_device *rdev,
+							 struct radeon_cac_leakage_table *table)
+{
+	u32 i;
+
+	if (table) {
+		for (i = 0; i < table->count; i++)
+			ci_patch_with_vddc_leakage(rdev, &table->entries[i].vddc);
+	}
+}
+
+static void ci_patch_dependency_tables_with_leakage(struct radeon_device *rdev)
+{
+
+	ci_patch_clock_voltage_dependency_table_with_vddc_leakage(rdev,
+								  &rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk);
+	ci_patch_clock_voltage_dependency_table_with_vddc_leakage(rdev,
+								  &rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk);
+	ci_patch_clock_voltage_dependency_table_with_vddc_leakage(rdev,
+								  &rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk);
+	ci_patch_clock_voltage_dependency_table_with_vddci_leakage(rdev,
+								   &rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk);
+	ci_patch_vce_clock_voltage_dependency_table_with_vddc_leakage(rdev,
+								      &rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table);
+	ci_patch_uvd_clock_voltage_dependency_table_with_vddc_leakage(rdev,
+								      &rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table);
+	ci_patch_clock_voltage_dependency_table_with_vddc_leakage(rdev,
+								  &rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table);
+	ci_patch_clock_voltage_dependency_table_with_vddc_leakage(rdev,
+								  &rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table);
+	ci_patch_vddc_phase_shed_limit_table_with_vddc_leakage(rdev,
+							       &rdev->pm.dpm.dyn_state.phase_shedding_limits_table);
+	ci_patch_clock_voltage_limits_with_vddc_leakage(rdev,
+							&rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac);
+	ci_patch_clock_voltage_limits_with_vddc_leakage(rdev,
+							&rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc);
+	ci_patch_cac_leakage_table_with_vddc_leakage(rdev,
+						     &rdev->pm.dpm.dyn_state.cac_leakage_table);
+
+}
+
+static void ci_get_memory_type(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	u32 tmp;
+
+	tmp = RREG32(MC_SEQ_MISC0);
+
+	if (((tmp & MC_SEQ_MISC0_GDDR5_MASK) >> MC_SEQ_MISC0_GDDR5_SHIFT) ==
+	    MC_SEQ_MISC0_GDDR5_VALUE)
+		pi->mem_gddr5 = true;
+	else
+		pi->mem_gddr5 = false;
+
+}
+
+void ci_update_current_ps(struct radeon_device *rdev,
+			  struct radeon_ps *rps)
+{
+	struct ci_ps *new_ps = ci_get_ps(rps);
+	struct ci_power_info *pi = ci_get_pi(rdev);
+
+	pi->current_rps = *rps;
+	pi->current_ps = *new_ps;
+	pi->current_rps.ps_priv = &pi->current_ps;
+}
+
+void ci_update_requested_ps(struct radeon_device *rdev,
+			    struct radeon_ps *rps)
+{
+	struct ci_ps *new_ps = ci_get_ps(rps);
+	struct ci_power_info *pi = ci_get_pi(rdev);
+
+	pi->requested_rps = *rps;
+	pi->requested_ps = *new_ps;
+	pi->requested_rps.ps_priv = &pi->requested_ps;
+}
+
+int ci_dpm_pre_set_power_state(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	struct radeon_ps requested_ps = *rdev->pm.dpm.requested_ps;
+	struct radeon_ps *new_ps = &requested_ps;
+
+	ci_update_requested_ps(rdev, new_ps);
+
+	ci_apply_state_adjust_rules(rdev, &pi->requested_rps);
+
+	return 0;
+}
+
+void ci_dpm_post_set_power_state(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	struct radeon_ps *new_ps = &pi->requested_rps;
+
+	ci_update_current_ps(rdev, new_ps);
+}
+
+
+void ci_dpm_setup_asic(struct radeon_device *rdev)
+{
+	ci_read_clock_registers(rdev);
+	ci_get_memory_type(rdev);
+	ci_enable_acpi_power_management(rdev);
+	ci_init_sclk_t(rdev);
+}
+
+int ci_dpm_enable(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	struct radeon_ps *boot_ps = rdev->pm.dpm.boot_ps;
+	int ret;
+
+	cik_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
+			     RADEON_CG_BLOCK_MC |
+			     RADEON_CG_BLOCK_SDMA |
+			     RADEON_CG_BLOCK_BIF |
+			     RADEON_CG_BLOCK_UVD |
+			     RADEON_CG_BLOCK_HDP), false);
+
+	if (ci_is_smc_running(rdev))
+		return -EINVAL;
+	if (pi->voltage_control != CISLANDS_VOLTAGE_CONTROL_NONE) {
+		ci_enable_voltage_control(rdev);
+		ret = ci_construct_voltage_tables(rdev);
+		if (ret) {
+			DRM_ERROR("ci_construct_voltage_tables failed\n");
+			return ret;
+		}
+	}
+	if (pi->caps_dynamic_ac_timing) {
+		ret = ci_initialize_mc_reg_table(rdev);
+		if (ret)
+			pi->caps_dynamic_ac_timing = false;
+	}
+	if (pi->dynamic_ss)
+		ci_enable_spread_spectrum(rdev, true);
+	if (pi->thermal_protection)
+		ci_enable_thermal_protection(rdev, true);
+	ci_program_sstp(rdev);
+	ci_enable_display_gap(rdev);
+	ci_program_vc(rdev);
+	ret = ci_upload_firmware(rdev);
+	if (ret) {
+		DRM_ERROR("ci_upload_firmware failed\n");
+		return ret;
+	}
+	ret = ci_process_firmware_header(rdev);
+	if (ret) {
+		DRM_ERROR("ci_process_firmware_header failed\n");
+		return ret;
+	}
+	ret = ci_initial_switch_from_arb_f0_to_f1(rdev);
+	if (ret) {
+		DRM_ERROR("ci_initial_switch_from_arb_f0_to_f1 failed\n");
+		return ret;
+	}
+	ret = ci_init_smc_table(rdev);
+	if (ret) {
+		DRM_ERROR("ci_init_smc_table failed\n");
+		return ret;
+	}
+	ret = ci_init_arb_table_index(rdev);
+	if (ret) {
+		DRM_ERROR("ci_init_arb_table_index failed\n");
+		return ret;
+	}
+	if (pi->caps_dynamic_ac_timing) {
+		ret = ci_populate_initial_mc_reg_table(rdev);
+		if (ret) {
+			DRM_ERROR("ci_populate_initial_mc_reg_table failed\n");
+			return ret;
+		}
+	}
+	ret = ci_populate_pm_base(rdev);
+	if (ret) {
+		DRM_ERROR("ci_populate_pm_base failed\n");
+		return ret;
+	}
+	ci_dpm_start_smc(rdev);
+	ci_enable_vr_hot_gpio_interrupt(rdev);
+	ret = ci_notify_smc_display_change(rdev, false);
+	if (ret) {
+		DRM_ERROR("ci_notify_smc_display_change failed\n");
+		return ret;
+	}
+	ci_enable_sclk_control(rdev, true);
+	ret = ci_enable_ulv(rdev, true);
+	if (ret) {
+		DRM_ERROR("ci_enable_ulv failed\n");
+		return ret;
+	}
+	ret = ci_enable_ds_master_switch(rdev, true);
+	if (ret) {
+		DRM_ERROR("ci_enable_ds_master_switch failed\n");
+		return ret;
+	}
+	ret = ci_start_dpm(rdev);
+	if (ret) {
+		DRM_ERROR("ci_start_dpm failed\n");
+		return ret;
+	}
+	ret = ci_enable_didt(rdev, true);
+	if (ret) {
+		DRM_ERROR("ci_enable_didt failed\n");
+		return ret;
+	}
+	ret = ci_enable_smc_cac(rdev, true);
+	if (ret) {
+		DRM_ERROR("ci_enable_smc_cac failed\n");
+		return ret;
+	}
+	ret = ci_enable_power_containment(rdev, true);
+	if (ret) {
+		DRM_ERROR("ci_enable_power_containment failed\n");
+		return ret;
+	}
+	if (rdev->irq.installed &&
+	    r600_is_internal_thermal_sensor(rdev->pm.int_thermal_type)) {
+#if 0
+		PPSMC_Result result;
+#endif
+		ret = ci_set_thermal_temperature_range(rdev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX);
+		if (ret) {
+			DRM_ERROR("ci_set_thermal_temperature_range failed\n");
+			return ret;
+		}
+		rdev->irq.dpm_thermal = true;
+		radeon_irq_set(rdev);
+#if 0
+		result = ci_send_msg_to_smc(rdev, PPSMC_MSG_EnableThermalInterrupt);
+
+		if (result != PPSMC_Result_OK)
+			DRM_DEBUG_KMS("Could not enable thermal interrupts.\n");
+#endif
+	}
+
+	ci_enable_auto_throttle_source(rdev, RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL, true);
+
+	ci_dpm_powergate_uvd(rdev, true);
+
+	cik_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
+			     RADEON_CG_BLOCK_MC |
+			     RADEON_CG_BLOCK_SDMA |
+			     RADEON_CG_BLOCK_BIF |
+			     RADEON_CG_BLOCK_UVD |
+			     RADEON_CG_BLOCK_HDP), true);
+
+	ci_update_current_ps(rdev, boot_ps);
+
+	return 0;
+}
+
+void ci_dpm_disable(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	struct radeon_ps *boot_ps = rdev->pm.dpm.boot_ps;
+
+	cik_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
+			     RADEON_CG_BLOCK_MC |
+			     RADEON_CG_BLOCK_SDMA |
+			     RADEON_CG_BLOCK_UVD |
+			     RADEON_CG_BLOCK_HDP), false);
+
+	ci_dpm_powergate_uvd(rdev, false);
+
+	if (!ci_is_smc_running(rdev))
+		return;
+
+	if (pi->thermal_protection)
+		ci_enable_thermal_protection(rdev, false);
+	ci_enable_power_containment(rdev, false);
+	ci_enable_smc_cac(rdev, false);
+	ci_enable_didt(rdev, false);
+	ci_enable_spread_spectrum(rdev, false);
+	ci_enable_auto_throttle_source(rdev, RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL, false);
+	ci_stop_dpm(rdev);
+	ci_enable_ds_master_switch(rdev, true);
+	ci_enable_ulv(rdev, false);
+	ci_clear_vc(rdev);
+	ci_reset_to_default(rdev);
+	ci_dpm_stop_smc(rdev);
+	ci_force_switch_to_arb_f0(rdev);
+
+	ci_update_current_ps(rdev, boot_ps);
+}
+
+int ci_dpm_set_power_state(struct radeon_device *rdev)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	struct radeon_ps *new_ps = &pi->requested_rps;
+	struct radeon_ps *old_ps = &pi->current_rps;
+	int ret;
+
+	cik_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
+			     RADEON_CG_BLOCK_MC |
+			     RADEON_CG_BLOCK_SDMA |
+			     RADEON_CG_BLOCK_BIF |
+			     RADEON_CG_BLOCK_UVD |
+			     RADEON_CG_BLOCK_HDP), false);
+
+	ci_find_dpm_states_clocks_in_dpm_table(rdev, new_ps);
+	if (pi->pcie_performance_request)
+		ci_request_link_speed_change_before_state_change(rdev, new_ps, old_ps);
+	ret = ci_freeze_sclk_mclk_dpm(rdev);
+	if (ret) {
+		DRM_ERROR("ci_freeze_sclk_mclk_dpm failed\n");
+		return ret;
+	}
+	ret = ci_populate_and_upload_sclk_mclk_dpm_levels(rdev, new_ps);
+	if (ret) {
+		DRM_ERROR("ci_populate_and_upload_sclk_mclk_dpm_levels failed\n");
+		return ret;
+	}
+	ret = ci_generate_dpm_level_enable_mask(rdev, new_ps);
+	if (ret) {
+		DRM_ERROR("ci_generate_dpm_level_enable_mask failed\n");
+		return ret;
+	}
+#if 0
+	ret = ci_update_vce_dpm(rdev, new_ps, old_ps);
+	if (ret) {
+		DRM_ERROR("ci_update_vce_dpm failed\n");
+		return ret;
+	}
+#endif
+	ret = ci_update_sclk_t(rdev);
+	if (ret) {
+		DRM_ERROR("ci_update_sclk_t failed\n");
+		return ret;
+	}
+	if (pi->caps_dynamic_ac_timing) {
+		ret = ci_update_and_upload_mc_reg_table(rdev);
+		if (ret) {
+			DRM_ERROR("ci_update_and_upload_mc_reg_table failed\n");
+			return ret;
+		}
+	}
+	ret = ci_program_memory_timing_parameters(rdev);
+	if (ret) {
+		DRM_ERROR("ci_program_memory_timing_parameters failed\n");
+		return ret;
+	}
+	ret = ci_unfreeze_sclk_mclk_dpm(rdev);
+	if (ret) {
+		DRM_ERROR("ci_unfreeze_sclk_mclk_dpm failed\n");
+		return ret;
+	}
+	ret = ci_upload_dpm_level_enable_mask(rdev);
+	if (ret) {
+		DRM_ERROR("ci_upload_dpm_level_enable_mask failed\n");
+		return ret;
+	}
+	if (pi->pcie_performance_request)
+		ci_notify_link_speed_change_after_state_change(rdev, new_ps, old_ps);
+
+	ret = ci_dpm_force_performance_level(rdev, RADEON_DPM_FORCED_LEVEL_AUTO);
+	if (ret) {
+		DRM_ERROR("ci_dpm_force_performance_level failed\n");
+		return ret;
+	}
+
+	cik_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
+			     RADEON_CG_BLOCK_MC |
+			     RADEON_CG_BLOCK_SDMA |
+			     RADEON_CG_BLOCK_BIF |
+			     RADEON_CG_BLOCK_UVD |
+			     RADEON_CG_BLOCK_HDP), true);
+
+	return 0;
+}
+
+int ci_dpm_power_control_set_level(struct radeon_device *rdev)
+{
+	return ci_power_control_set_level(rdev);
+}
+
+void ci_dpm_reset_asic(struct radeon_device *rdev)
+{
+	ci_set_boot_state(rdev);
+}
+
+void ci_dpm_display_configuration_changed(struct radeon_device *rdev)
+{
+	ci_program_display_gap(rdev);
+}
+
+union power_info {
+	struct _ATOM_POWERPLAY_INFO info;
+	struct _ATOM_POWERPLAY_INFO_V2 info_2;
+	struct _ATOM_POWERPLAY_INFO_V3 info_3;
+	struct _ATOM_PPLIB_POWERPLAYTABLE pplib;
+	struct _ATOM_PPLIB_POWERPLAYTABLE2 pplib2;
+	struct _ATOM_PPLIB_POWERPLAYTABLE3 pplib3;
+};
+
+union pplib_clock_info {
+	struct _ATOM_PPLIB_R600_CLOCK_INFO r600;
+	struct _ATOM_PPLIB_RS780_CLOCK_INFO rs780;
+	struct _ATOM_PPLIB_EVERGREEN_CLOCK_INFO evergreen;
+	struct _ATOM_PPLIB_SUMO_CLOCK_INFO sumo;
+	struct _ATOM_PPLIB_SI_CLOCK_INFO si;
+	struct _ATOM_PPLIB_CI_CLOCK_INFO ci;
+};
+
+union pplib_power_state {
+	struct _ATOM_PPLIB_STATE v1;
+	struct _ATOM_PPLIB_STATE_V2 v2;
+};
+
+static void ci_parse_pplib_non_clock_info(struct radeon_device *rdev,
+					  struct radeon_ps *rps,
+					  struct _ATOM_PPLIB_NONCLOCK_INFO *non_clock_info,
+					  u8 table_rev)
+{
+	rps->caps = le32_to_cpu(non_clock_info->ulCapsAndSettings);
+	rps->class = le16_to_cpu(non_clock_info->usClassification);
+	rps->class2 = le16_to_cpu(non_clock_info->usClassification2);
+
+	if (ATOM_PPLIB_NONCLOCKINFO_VER1 < table_rev) {
+		rps->vclk = le32_to_cpu(non_clock_info->ulVCLK);
+		rps->dclk = le32_to_cpu(non_clock_info->ulDCLK);
+	} else {
+		rps->vclk = 0;
+		rps->dclk = 0;
+	}
+
+	if (rps->class & ATOM_PPLIB_CLASSIFICATION_BOOT)
+		rdev->pm.dpm.boot_ps = rps;
+	if (rps->class & ATOM_PPLIB_CLASSIFICATION_UVDSTATE)
+		rdev->pm.dpm.uvd_ps = rps;
+}
+
+static void ci_parse_pplib_clock_info(struct radeon_device *rdev,
+				      struct radeon_ps *rps, int index,
+				      union pplib_clock_info *clock_info)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	struct ci_ps *ps = ci_get_ps(rps);
+	struct ci_pl *pl = &ps->performance_levels[index];
+
+	ps->performance_level_count = index + 1;
+
+	pl->sclk = le16_to_cpu(clock_info->ci.usEngineClockLow);
+	pl->sclk |= clock_info->ci.ucEngineClockHigh << 16;
+	pl->mclk = le16_to_cpu(clock_info->ci.usMemoryClockLow);
+	pl->mclk |= clock_info->ci.ucMemoryClockHigh << 16;
+
+	pl->pcie_gen = r600_get_pcie_gen_support(rdev,
+						 pi->sys_pcie_mask,
+						 pi->vbios_boot_state.pcie_gen_bootup_value,
+						 clock_info->ci.ucPCIEGen);
+	pl->pcie_lane = r600_get_pcie_lane_support(rdev,
+						   pi->vbios_boot_state.pcie_lane_bootup_value,
+						   le16_to_cpu(clock_info->ci.usPCIELane));
+
+	if (rps->class & ATOM_PPLIB_CLASSIFICATION_ACPI) {
+		pi->acpi_pcie_gen = pl->pcie_gen;
+	}
+
+	if (rps->class2 & ATOM_PPLIB_CLASSIFICATION2_ULV) {
+		pi->ulv.supported = true;
+		pi->ulv.pl = *pl;
+		pi->ulv.cg_ulv_parameter = CISLANDS_CGULVPARAMETER_DFLT;
+	}
+
+	/* patch up boot state */
+	if (rps->class & ATOM_PPLIB_CLASSIFICATION_BOOT) {
+		pl->mclk = pi->vbios_boot_state.mclk_bootup_value;
+		pl->sclk = pi->vbios_boot_state.sclk_bootup_value;
+		pl->pcie_gen = pi->vbios_boot_state.pcie_gen_bootup_value;
+		pl->pcie_lane = pi->vbios_boot_state.pcie_lane_bootup_value;
+	}
+
+	switch (rps->class & ATOM_PPLIB_CLASSIFICATION_UI_MASK) {
+	case ATOM_PPLIB_CLASSIFICATION_UI_BATTERY:
+		pi->use_pcie_powersaving_levels = true;
+		if (pi->pcie_gen_powersaving.max < pl->pcie_gen)
+			pi->pcie_gen_powersaving.max = pl->pcie_gen;
+		if (pi->pcie_gen_powersaving.min > pl->pcie_gen)
+			pi->pcie_gen_powersaving.min = pl->pcie_gen;
+		if (pi->pcie_lane_powersaving.max < pl->pcie_lane)
+			pi->pcie_lane_powersaving.max = pl->pcie_lane;
+		if (pi->pcie_lane_powersaving.min > pl->pcie_lane)
+			pi->pcie_lane_powersaving.min = pl->pcie_lane;
+		break;
+	case ATOM_PPLIB_CLASSIFICATION_UI_PERFORMANCE:
+		pi->use_pcie_performance_levels = true;
+		if (pi->pcie_gen_performance.max < pl->pcie_gen)
+			pi->pcie_gen_performance.max = pl->pcie_gen;
+		if (pi->pcie_gen_performance.min > pl->pcie_gen)
+			pi->pcie_gen_performance.min = pl->pcie_gen;
+		if (pi->pcie_lane_performance.max < pl->pcie_lane)
+			pi->pcie_lane_performance.max = pl->pcie_lane;
+		if (pi->pcie_lane_performance.min > pl->pcie_lane)
+			pi->pcie_lane_performance.min = pl->pcie_lane;
+		break;
+	default:
+		break;
+	}
+}
+
+static int ci_parse_power_table(struct radeon_device *rdev)
+{
+	struct radeon_mode_info *mode_info = &rdev->mode_info;
+	struct _ATOM_PPLIB_NONCLOCK_INFO *non_clock_info;
+	union pplib_power_state *power_state;
+	int i, j, k, non_clock_array_index, clock_array_index;
+	union pplib_clock_info *clock_info;
+	struct _StateArray *state_array;
+	struct _ClockInfoArray *clock_info_array;
+	struct _NonClockInfoArray *non_clock_info_array;
+	union power_info *power_info;
+	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
+        u16 data_offset;
+	u8 frev, crev;
+	u8 *power_state_offset;
+	struct ci_ps *ps;
+
+	if (!atom_parse_data_header(mode_info->atom_context, index, NULL,
+				   &frev, &crev, &data_offset))
+		return -EINVAL;
+	power_info = (union power_info *)(mode_info->atom_context->bios + data_offset);
+
+	state_array = (struct _StateArray *)
+		(mode_info->atom_context->bios + data_offset +
+		 le16_to_cpu(power_info->pplib.usStateArrayOffset));
+	clock_info_array = (struct _ClockInfoArray *)
+		(mode_info->atom_context->bios + data_offset +
+		 le16_to_cpu(power_info->pplib.usClockInfoArrayOffset));
+	non_clock_info_array = (struct _NonClockInfoArray *)
+		(mode_info->atom_context->bios + data_offset +
+		 le16_to_cpu(power_info->pplib.usNonClockInfoArrayOffset));
+
+	rdev->pm.dpm.ps = kzalloc(sizeof(struct radeon_ps) *
+				  state_array->ucNumEntries, GFP_KERNEL);
+	if (!rdev->pm.dpm.ps)
+		return -ENOMEM;
+	power_state_offset = (u8 *)state_array->states;
+	rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps);
+	rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime);
+	rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime);
+	for (i = 0; i < state_array->ucNumEntries; i++) {
+		u8 *idx;
+		power_state = (union pplib_power_state *)power_state_offset;
+		non_clock_array_index = power_state->v2.nonClockInfoIndex;
+		non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *)
+			&non_clock_info_array->nonClockInfo[non_clock_array_index];
+		if (!rdev->pm.power_state[i].clock_info)
+			return -EINVAL;
+		ps = kzalloc(sizeof(struct ci_ps), GFP_KERNEL);
+		if (ps == NULL) {
+			kfree(rdev->pm.dpm.ps);
+			return -ENOMEM;
+		}
+		rdev->pm.dpm.ps[i].ps_priv = ps;
+		ci_parse_pplib_non_clock_info(rdev, &rdev->pm.dpm.ps[i],
+					      non_clock_info,
+					      non_clock_info_array->ucEntrySize);
+		k = 0;
+		idx = (u8 *)&power_state->v2.clockInfoIndex[0];
+		for (j = 0; j < power_state->v2.ucNumDPMLevels; j++) {
+			clock_array_index = idx[j];
+			if (clock_array_index >= clock_info_array->ucNumEntries)
+				continue;
+			if (k >= CISLANDS_MAX_HARDWARE_POWERLEVELS)
+				break;
+			clock_info = (union pplib_clock_info *)
+				((u8 *)&clock_info_array->clockInfo[0] +
+				 (clock_array_index * clock_info_array->ucEntrySize));
+			ci_parse_pplib_clock_info(rdev,
+						  &rdev->pm.dpm.ps[i], k,
+						  clock_info);
+			k++;
+		}
+		power_state_offset += 2 + power_state->v2.ucNumDPMLevels;
+	}
+	rdev->pm.dpm.num_ps = state_array->ucNumEntries;
+	return 0;
+}
+
+int ci_get_vbios_boot_values(struct radeon_device *rdev,
+			     struct ci_vbios_boot_state *boot_state)
+{
+	struct radeon_mode_info *mode_info = &rdev->mode_info;
+	int index = GetIndexIntoMasterTable(DATA, FirmwareInfo);
+	ATOM_FIRMWARE_INFO_V2_2 *firmware_info;
+	u8 frev, crev;
+	u16 data_offset;
+
+	if (atom_parse_data_header(mode_info->atom_context, index, NULL,
+				   &frev, &crev, &data_offset)) {
+		firmware_info =
+			(ATOM_FIRMWARE_INFO_V2_2 *)(mode_info->atom_context->bios +
+						    data_offset);
+		boot_state->mvdd_bootup_value = le16_to_cpu(firmware_info->usBootUpMVDDCVoltage);
+		boot_state->vddc_bootup_value = le16_to_cpu(firmware_info->usBootUpVDDCVoltage);
+		boot_state->vddci_bootup_value = le16_to_cpu(firmware_info->usBootUpVDDCIVoltage);
+		boot_state->pcie_gen_bootup_value = ci_get_current_pcie_speed(rdev);
+		boot_state->pcie_lane_bootup_value = ci_get_current_pcie_lane_number(rdev);
+		boot_state->sclk_bootup_value = le32_to_cpu(firmware_info->ulDefaultEngineClock);
+		boot_state->mclk_bootup_value = le32_to_cpu(firmware_info->ulDefaultMemoryClock);
+
+		return 0;
+	}
+	return -EINVAL;
+}
+
+void ci_dpm_fini(struct radeon_device *rdev)
+{
+	int i;
+
+	for (i = 0; i < rdev->pm.dpm.num_ps; i++) {
+		kfree(rdev->pm.dpm.ps[i].ps_priv);
+	}
+	kfree(rdev->pm.dpm.ps);
+	kfree(rdev->pm.dpm.priv);
+	kfree(rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries);
+	r600_free_extended_power_table(rdev);
+}
+
+int ci_dpm_init(struct radeon_device *rdev)
+{
+	int index = GetIndexIntoMasterTable(DATA, ASIC_InternalSS_Info);
+	u16 data_offset, size;
+	u8 frev, crev;
+	struct ci_power_info *pi;
+	int ret;
+	u32 mask;
+
+	pi = kzalloc(sizeof(struct ci_power_info), GFP_KERNEL);
+	if (pi == NULL)
+		return -ENOMEM;
+	rdev->pm.dpm.priv = pi;
+
+	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
+	if (ret)
+		pi->sys_pcie_mask = 0;
+	else
+		pi->sys_pcie_mask = mask;
+	pi->force_pcie_gen = RADEON_PCIE_GEN_INVALID;
+
+	pi->pcie_gen_performance.max = RADEON_PCIE_GEN1;
+	pi->pcie_gen_performance.min = RADEON_PCIE_GEN3;
+	pi->pcie_gen_powersaving.max = RADEON_PCIE_GEN1;
+	pi->pcie_gen_powersaving.min = RADEON_PCIE_GEN3;
+
+	pi->pcie_lane_performance.max = 0;
+	pi->pcie_lane_performance.min = 16;
+	pi->pcie_lane_powersaving.max = 0;
+	pi->pcie_lane_powersaving.min = 16;
+
+	ret = ci_get_vbios_boot_values(rdev, &pi->vbios_boot_state);
+	if (ret) {
+		ci_dpm_fini(rdev);
+		return ret;
+	}
+	ret = ci_parse_power_table(rdev);
+	if (ret) {
+		ci_dpm_fini(rdev);
+		return ret;
+	}
+	ret = r600_parse_extended_power_table(rdev);
+	if (ret) {
+		ci_dpm_fini(rdev);
+		return ret;
+	}
+
+        pi->dll_default_on = false;
+        pi->sram_end = SMC_RAM_END;
+
+	pi->activity_target[0] = CISLAND_TARGETACTIVITY_DFLT;
+	pi->activity_target[1] = CISLAND_TARGETACTIVITY_DFLT;
+	pi->activity_target[2] = CISLAND_TARGETACTIVITY_DFLT;
+	pi->activity_target[3] = CISLAND_TARGETACTIVITY_DFLT;
+	pi->activity_target[4] = CISLAND_TARGETACTIVITY_DFLT;
+	pi->activity_target[5] = CISLAND_TARGETACTIVITY_DFLT;
+	pi->activity_target[6] = CISLAND_TARGETACTIVITY_DFLT;
+	pi->activity_target[7] = CISLAND_TARGETACTIVITY_DFLT;
+
+	pi->mclk_activity_target = CISLAND_MCLK_TARGETACTIVITY_DFLT;
+
+	pi->sclk_dpm_key_disabled = 0;
+	pi->mclk_dpm_key_disabled = 0;
+	pi->pcie_dpm_key_disabled = 0;
+
+	pi->caps_sclk_ds = true;
+
+	pi->mclk_strobe_mode_threshold = 40000;
+	pi->mclk_stutter_mode_threshold = 40000;
+	pi->mclk_edc_enable_threshold = 40000;
+	pi->mclk_edc_wr_enable_threshold = 40000;
+
+	ci_initialize_powertune_defaults(rdev);
+
+	pi->caps_fps = false;
+
+	pi->caps_sclk_throttle_low_notification = false;
+
+	pi->caps_uvd_dpm = true;
+
+        ci_get_leakage_voltages(rdev);
+        ci_patch_dependency_tables_with_leakage(rdev);
+        ci_set_private_data_variables_based_on_pptable(rdev);
+
+	rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries =
+		kzalloc(4 * sizeof(struct radeon_clock_voltage_dependency_entry), GFP_KERNEL);
+	if (!rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries) {
+		ci_dpm_fini(rdev);
+		return -ENOMEM;
+	}
+	rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.count = 4;
+	rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[0].clk = 0;
+	rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[0].v = 0;
+	rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[1].clk = 36000;
+	rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[1].v = 720;
+	rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[2].clk = 54000;
+	rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[2].v = 810;
+	rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[3].clk = 72000;
+	rdev->pm.dpm.dyn_state.vddc_dependency_on_dispclk.entries[3].v = 900;
+
+	rdev->pm.dpm.dyn_state.mclk_sclk_ratio = 4;
+	rdev->pm.dpm.dyn_state.sclk_mclk_delta = 15000;
+	rdev->pm.dpm.dyn_state.vddc_vddci_delta = 200;
+
+	rdev->pm.dpm.dyn_state.valid_sclk_values.count = 0;
+	rdev->pm.dpm.dyn_state.valid_sclk_values.values = NULL;
+	rdev->pm.dpm.dyn_state.valid_mclk_values.count = 0;
+	rdev->pm.dpm.dyn_state.valid_mclk_values.values = NULL;
+
+	pi->thermal_temp_setting.temperature_low = 99500;
+	pi->thermal_temp_setting.temperature_high = 100000;
+	pi->thermal_temp_setting.temperature_shutdown = 104000;
+
+	pi->uvd_enabled = false;
+
+	pi->voltage_control = CISLANDS_VOLTAGE_CONTROL_NONE;
+	pi->vddci_control = CISLANDS_VOLTAGE_CONTROL_NONE;
+	pi->mvdd_control = CISLANDS_VOLTAGE_CONTROL_NONE;
+	if (radeon_atom_is_voltage_gpio(rdev, VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_GPIO_LUT))
+		pi->voltage_control = CISLANDS_VOLTAGE_CONTROL_BY_GPIO;
+	else if (radeon_atom_is_voltage_gpio(rdev, VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2))
+		pi->voltage_control = CISLANDS_VOLTAGE_CONTROL_BY_SVID2;
+
+	if (rdev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_VDDCI_CONTROL) {
+		if (radeon_atom_is_voltage_gpio(rdev, VOLTAGE_TYPE_VDDCI, VOLTAGE_OBJ_GPIO_LUT))
+			pi->vddci_control = CISLANDS_VOLTAGE_CONTROL_BY_GPIO;
+		else if (radeon_atom_is_voltage_gpio(rdev, VOLTAGE_TYPE_VDDCI, VOLTAGE_OBJ_SVID2))
+			pi->vddci_control = CISLANDS_VOLTAGE_CONTROL_BY_SVID2;
+		else
+			rdev->pm.dpm.platform_caps &= ~ATOM_PP_PLATFORM_CAP_VDDCI_CONTROL;
+        }
+
+	if (rdev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_MVDDCONTROL) {
+		if (radeon_atom_is_voltage_gpio(rdev, VOLTAGE_TYPE_MVDDC, VOLTAGE_OBJ_GPIO_LUT))
+			pi->mvdd_control = CISLANDS_VOLTAGE_CONTROL_BY_GPIO;
+		else if (radeon_atom_is_voltage_gpio(rdev, VOLTAGE_TYPE_MVDDC, VOLTAGE_OBJ_SVID2))
+			pi->mvdd_control = CISLANDS_VOLTAGE_CONTROL_BY_SVID2;
+		else
+			rdev->pm.dpm.platform_caps &= ~ATOM_PP_PLATFORM_CAP_MVDDCONTROL;
+	}
+
+	pi->vddc_phase_shed_control = true;
+
+#if defined(CONFIG_ACPI)
+	pi->pcie_performance_request =
+		radeon_acpi_is_pcie_performance_request_supported(rdev);
+#else
+	pi->pcie_performance_request = false;
+#endif
+
+	if (atom_parse_data_header(rdev->mode_info.atom_context, index, &size,
+                                   &frev, &crev, &data_offset)) {
+		pi->caps_sclk_ss_support = true;
+		pi->caps_mclk_ss_support = true;
+		pi->dynamic_ss = true;
+	} else {
+		pi->caps_sclk_ss_support = false;
+		pi->caps_mclk_ss_support = false;
+		pi->dynamic_ss = true;
+	}
+
+	if (rdev->pm.int_thermal_type != THERMAL_TYPE_NONE)
+		pi->thermal_protection = true;
+	else
+		pi->thermal_protection = false;
+
+	pi->caps_dynamic_ac_timing = true;
+
+	pi->uvd_power_gated = false;
+
+	/* make sure dc limits are valid */
+	if ((rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc.sclk == 0) ||
+	    (rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc.mclk == 0))
+		rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc =
+			rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
+
+	return 0;
+}
+
+void ci_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev,
+						    struct seq_file *m)
+{
+	u32 sclk = ci_get_average_sclk_freq(rdev);
+	u32 mclk = ci_get_average_mclk_freq(rdev);
+
+	seq_printf(m, "power level avg    sclk: %u mclk: %u\n",
+		   sclk, mclk);
+}
+
+void ci_dpm_print_power_state(struct radeon_device *rdev,
+			      struct radeon_ps *rps)
+{
+	struct ci_ps *ps = ci_get_ps(rps);
+	struct ci_pl *pl;
+	int i;
+
+	r600_dpm_print_class_info(rps->class, rps->class2);
+	r600_dpm_print_cap_info(rps->caps);
+	printk("\tuvd    vclk: %d dclk: %d\n", rps->vclk, rps->dclk);
+	for (i = 0; i < ps->performance_level_count; i++) {
+		pl = &ps->performance_levels[i];
+		printk("\t\tpower level %d    sclk: %u mclk: %u pcie gen: %u pcie lanes: %u\n",
+		       i, pl->sclk, pl->mclk, pl->pcie_gen + 1, pl->pcie_lane);
+	}
+	r600_dpm_print_ps_status(rdev, rps);
+}
+
+u32 ci_dpm_get_sclk(struct radeon_device *rdev, bool low)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	struct ci_ps *requested_state = ci_get_ps(&pi->requested_rps);
+
+	if (low)
+		return requested_state->performance_levels[0].sclk;
+	else
+		return requested_state->performance_levels[requested_state->performance_level_count - 1].sclk;
+}
+
+u32 ci_dpm_get_mclk(struct radeon_device *rdev, bool low)
+{
+	struct ci_power_info *pi = ci_get_pi(rdev);
+	struct ci_ps *requested_state = ci_get_ps(&pi->requested_rps);
+
+	if (low)
+		return requested_state->performance_levels[0].mclk;
+	else
+		return requested_state->performance_levels[requested_state->performance_level_count - 1].mclk;
+}
diff --git a/drivers/gpu/drm/radeon/ci_dpm.h b/drivers/gpu/drm/radeon/ci_dpm.h
new file mode 100644
index 0000000..93bbed9
--- /dev/null
+++ b/drivers/gpu/drm/radeon/ci_dpm.h
@@ -0,0 +1,332 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __CI_DPM_H__
+#define __CI_DPM_H__
+
+#include "ppsmc.h"
+
+#define SMU__NUM_SCLK_DPM_STATE  8
+#define SMU__NUM_MCLK_DPM_LEVELS 6
+#define SMU__NUM_LCLK_DPM_LEVELS 8
+#define SMU__NUM_PCIE_DPM_LEVELS 8
+#include "smu7_discrete.h"
+
+#define CISLANDS_MAX_HARDWARE_POWERLEVELS 2
+
+struct ci_pl {
+	u32 mclk;
+	u32 sclk;
+	enum radeon_pcie_gen pcie_gen;
+	u16 pcie_lane;
+};
+
+struct ci_ps {
+	u16 performance_level_count;
+	bool dc_compatible;
+	u32 sclk_t;
+	struct ci_pl performance_levels[CISLANDS_MAX_HARDWARE_POWERLEVELS];
+};
+
+struct ci_dpm_level {
+	bool enabled;
+	u32 value;
+	u32 param1;
+};
+
+#define CISLAND_MAX_DEEPSLEEP_DIVIDER_ID 5
+#define MAX_REGULAR_DPM_NUMBER 8
+#define CISLAND_MINIMUM_ENGINE_CLOCK 800
+
+struct ci_single_dpm_table {
+	u32 count;
+	struct ci_dpm_level dpm_levels[MAX_REGULAR_DPM_NUMBER];
+};
+
+struct ci_dpm_table {
+	struct ci_single_dpm_table sclk_table;
+	struct ci_single_dpm_table mclk_table;
+	struct ci_single_dpm_table pcie_speed_table;
+	struct ci_single_dpm_table vddc_table;
+	struct ci_single_dpm_table vddci_table;
+	struct ci_single_dpm_table mvdd_table;
+};
+
+struct ci_mc_reg_entry {
+	u32 mclk_max;
+	u32 mc_data[SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE];
+};
+
+struct ci_mc_reg_table {
+	u8 last;
+	u8 num_entries;
+	u16 valid_flag;
+	struct ci_mc_reg_entry mc_reg_table_entry[MAX_AC_TIMING_ENTRIES];
+	SMU7_Discrete_MCRegisterAddress mc_reg_address[SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE];
+};
+
+struct ci_ulv_parm
+{
+	bool supported;
+	u32 cg_ulv_parameter;
+	u32 volt_change_delay;
+	struct ci_pl pl;
+};
+
+#define CISLANDS_MAX_LEAKAGE_COUNT  8
+
+struct ci_leakage_voltage {
+	u16 count;
+	u16 leakage_id[CISLANDS_MAX_LEAKAGE_COUNT];
+	u16 actual_voltage[CISLANDS_MAX_LEAKAGE_COUNT];
+};
+
+struct ci_dpm_level_enable_mask {
+	u32 uvd_dpm_enable_mask;
+	u32 vce_dpm_enable_mask;
+	u32 acp_dpm_enable_mask;
+	u32 samu_dpm_enable_mask;
+	u32 sclk_dpm_enable_mask;
+	u32 mclk_dpm_enable_mask;
+	u32 pcie_dpm_enable_mask;
+};
+
+struct ci_vbios_boot_state
+{
+	u16 mvdd_bootup_value;
+	u16 vddc_bootup_value;
+	u16 vddci_bootup_value;
+	u32 sclk_bootup_value;
+	u32 mclk_bootup_value;
+	u16 pcie_gen_bootup_value;
+	u16 pcie_lane_bootup_value;
+};
+
+struct ci_clock_registers {
+	u32 cg_spll_func_cntl;
+	u32 cg_spll_func_cntl_2;
+	u32 cg_spll_func_cntl_3;
+	u32 cg_spll_func_cntl_4;
+	u32 cg_spll_spread_spectrum;
+	u32 cg_spll_spread_spectrum_2;
+	u32 dll_cntl;
+	u32 mclk_pwrmgt_cntl;
+	u32 mpll_ad_func_cntl;
+	u32 mpll_dq_func_cntl;
+	u32 mpll_func_cntl;
+	u32 mpll_func_cntl_1;
+	u32 mpll_func_cntl_2;
+	u32 mpll_ss1;
+	u32 mpll_ss2;
+};
+
+struct ci_thermal_temperature_setting {
+	s32 temperature_low;
+	s32 temperature_high;
+	s32 temperature_shutdown;
+};
+
+struct ci_pcie_perf_range {
+	u16 max;
+	u16 min;
+};
+
+enum ci_pt_config_reg_type {
+	CISLANDS_CONFIGREG_MMR = 0,
+	CISLANDS_CONFIGREG_SMC_IND,
+	CISLANDS_CONFIGREG_DIDT_IND,
+	CISLANDS_CONFIGREG_CACHE,
+	CISLANDS_CONFIGREG_MAX
+};
+
+#define POWERCONTAINMENT_FEATURE_BAPM            0x00000001
+#define POWERCONTAINMENT_FEATURE_TDCLimit        0x00000002
+#define POWERCONTAINMENT_FEATURE_PkgPwrLimit     0x00000004
+
+struct ci_pt_config_reg {
+	u32 offset;
+	u32 mask;
+	u32 shift;
+	u32 value;
+	enum ci_pt_config_reg_type type;
+};
+
+struct ci_pt_defaults {
+	u8 svi_load_line_en;
+	u8 svi_load_line_vddc;
+	u8 tdc_vddc_throttle_release_limit_perc;
+	u8 tdc_mawt;
+	u8 tdc_waterfall_ctl;
+	u8 dte_ambient_temp_base;
+	u32 display_cac;
+	u32 bapm_temp_gradient;
+	u16 bapmti_r[SMU7_DTE_ITERATIONS * SMU7_DTE_SOURCES * SMU7_DTE_SINKS];
+	u16 bapmti_rc[SMU7_DTE_ITERATIONS * SMU7_DTE_SOURCES * SMU7_DTE_SINKS];
+};
+
+#define DPMTABLE_OD_UPDATE_SCLK     0x00000001
+#define DPMTABLE_OD_UPDATE_MCLK     0x00000002
+#define DPMTABLE_UPDATE_SCLK        0x00000004
+#define DPMTABLE_UPDATE_MCLK        0x00000008
+
+struct ci_power_info {
+	struct ci_dpm_table dpm_table;
+	u32 voltage_control;
+	u32 mvdd_control;
+	u32 vddci_control;
+	u32 active_auto_throttle_sources;
+	struct ci_clock_registers clock_registers;
+	u16 acpi_vddc;
+	u16 acpi_vddci;
+	enum radeon_pcie_gen force_pcie_gen;
+	enum radeon_pcie_gen acpi_pcie_gen;
+	struct ci_leakage_voltage vddc_leakage;
+	struct ci_leakage_voltage vddci_leakage;
+	u16 max_vddc_in_pp_table;
+	u16 min_vddc_in_pp_table;
+	u16 max_vddci_in_pp_table;
+	u16 min_vddci_in_pp_table;
+	u32 mclk_strobe_mode_threshold;
+	u32 mclk_stutter_mode_threshold;
+	u32 mclk_edc_enable_threshold;
+	u32 mclk_edc_wr_enable_threshold;
+	struct ci_vbios_boot_state vbios_boot_state;
+	/* smc offsets */
+	u32 sram_end;
+	u32 dpm_table_start;
+	u32 soft_regs_start;
+	u32 mc_reg_table_start;
+	u32 fan_table_start;
+	u32 arb_table_start;
+	/* smc tables */
+	SMU7_Discrete_DpmTable smc_state_table;
+	SMU7_Discrete_MCRegisters smc_mc_reg_table;
+	SMU7_Discrete_PmFuses smc_powertune_table;
+	/* other stuff */
+	struct ci_mc_reg_table mc_reg_table;
+	struct atom_voltage_table vddc_voltage_table;
+	struct atom_voltage_table vddci_voltage_table;
+	struct atom_voltage_table mvdd_voltage_table;
+	struct ci_ulv_parm ulv;
+	u32 power_containment_features;
+	const struct ci_pt_defaults *powertune_defaults;
+	u32 dte_tj_offset;
+	bool vddc_phase_shed_control;
+	struct ci_thermal_temperature_setting thermal_temp_setting;
+	struct ci_dpm_level_enable_mask dpm_level_enable_mask;
+	u32 need_update_smu7_dpm_table;
+	u32 sclk_dpm_key_disabled;
+	u32 mclk_dpm_key_disabled;
+	u32 pcie_dpm_key_disabled;
+	struct ci_pcie_perf_range pcie_gen_performance;
+	struct ci_pcie_perf_range pcie_lane_performance;
+	struct ci_pcie_perf_range pcie_gen_powersaving;
+	struct ci_pcie_perf_range pcie_lane_powersaving;
+	u32 activity_target[SMU7_MAX_LEVELS_GRAPHICS];
+	u32 mclk_activity_target;
+	u32 low_sclk_interrupt_t;
+	u32 last_mclk_dpm_enable_mask;
+	u32 sys_pcie_mask;
+	/* caps */
+	bool caps_power_containment;
+	bool caps_cac;
+	bool caps_sq_ramping;
+	bool caps_db_ramping;
+	bool caps_td_ramping;
+	bool caps_tcp_ramping;
+	bool caps_fps;
+	bool caps_sclk_ds;
+	bool caps_sclk_ss_support;
+	bool caps_mclk_ss_support;
+	bool caps_uvd_dpm;
+	bool caps_vce_dpm;
+	bool caps_samu_dpm;
+	bool caps_acp_dpm;
+	bool caps_automatic_dc_transition;
+	bool caps_sclk_throttle_low_notification;
+	bool caps_dynamic_ac_timing;
+	/* flags */
+	bool thermal_protection;
+	bool pcie_performance_request;
+	bool dynamic_ss;
+	bool dll_default_on;
+	bool cac_enabled;
+	bool uvd_enabled;
+	bool battery_state;
+	bool pspp_notify_required;
+	bool mem_gddr5;
+	bool enable_bapm_feature;
+	bool enable_tdc_limit_feature;
+	bool enable_pkg_pwr_tracking_feature;
+	bool use_pcie_performance_levels;
+	bool use_pcie_powersaving_levels;
+	bool uvd_power_gated;
+	/* driver states */
+	struct radeon_ps current_rps;
+	struct ci_ps current_ps;
+	struct radeon_ps requested_rps;
+	struct ci_ps requested_ps;
+};
+
+#define CISLANDS_VOLTAGE_CONTROL_NONE                   0x0
+#define CISLANDS_VOLTAGE_CONTROL_BY_GPIO                0x1
+#define CISLANDS_VOLTAGE_CONTROL_BY_SVID2               0x2
+
+#define CISLANDS_Q88_FORMAT_CONVERSION_UNIT             256
+
+#define CISLANDS_VRC_DFLT0                              0x3FFFC000
+#define CISLANDS_VRC_DFLT1                              0x000400
+#define CISLANDS_VRC_DFLT2                              0xC00080
+#define CISLANDS_VRC_DFLT3                              0xC00200
+#define CISLANDS_VRC_DFLT4                              0xC01680
+#define CISLANDS_VRC_DFLT5                              0xC00033
+#define CISLANDS_VRC_DFLT6                              0xC00033
+#define CISLANDS_VRC_DFLT7                              0x3FFFC000
+
+#define CISLANDS_CGULVPARAMETER_DFLT                    0x00040035
+#define CISLAND_TARGETACTIVITY_DFLT                     30
+#define CISLAND_MCLK_TARGETACTIVITY_DFLT                10
+
+#define PCIE_PERF_REQ_REMOVE_REGISTRY   0
+#define PCIE_PERF_REQ_FORCE_LOWPOWER    1
+#define PCIE_PERF_REQ_PECI_GEN1         2
+#define PCIE_PERF_REQ_PECI_GEN2         3
+#define PCIE_PERF_REQ_PECI_GEN3         4
+
+int ci_copy_bytes_to_smc(struct radeon_device *rdev,
+			 u32 smc_start_address,
+			 const u8 *src, u32 byte_count, u32 limit);
+void ci_start_smc(struct radeon_device *rdev);
+void ci_reset_smc(struct radeon_device *rdev);
+int ci_program_jump_on_start(struct radeon_device *rdev);
+void ci_stop_smc_clock(struct radeon_device *rdev);
+void ci_start_smc_clock(struct radeon_device *rdev);
+bool ci_is_smc_running(struct radeon_device *rdev);
+PPSMC_Result ci_send_msg_to_smc(struct radeon_device *rdev, PPSMC_Msg msg);
+PPSMC_Result ci_wait_for_smc_inactive(struct radeon_device *rdev);
+int ci_load_smc_ucode(struct radeon_device *rdev, u32 limit);
+int ci_read_smc_sram_dword(struct radeon_device *rdev,
+			   u32 smc_address, u32 *value, u32 limit);
+int ci_write_smc_sram_dword(struct radeon_device *rdev,
+			    u32 smc_address, u32 value, u32 limit);
+
+#endif
diff --git a/drivers/gpu/drm/radeon/ci_smc.c b/drivers/gpu/drm/radeon/ci_smc.c
new file mode 100644
index 0000000..53b43dd
--- /dev/null
+++ b/drivers/gpu/drm/radeon/ci_smc.c
@@ -0,0 +1,262 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Alex Deucher
+ */
+
+#include <linux/firmware.h>
+#include "drmP.h"
+#include "radeon.h"
+#include "cikd.h"
+#include "ppsmc.h"
+#include "radeon_ucode.h"
+
+static int ci_set_smc_sram_address(struct radeon_device *rdev,
+				   u32 smc_address, u32 limit)
+{
+	if (smc_address & 3)
+		return -EINVAL;
+	if ((smc_address + 3) > limit)
+		return -EINVAL;
+
+	WREG32(SMC_IND_INDEX_0, smc_address);
+	WREG32_P(SMC_IND_ACCESS_CNTL, 0, ~AUTO_INCREMENT_IND_0);
+
+	return 0;
+}
+
+int ci_copy_bytes_to_smc(struct radeon_device *rdev,
+			 u32 smc_start_address,
+			 const u8 *src, u32 byte_count, u32 limit)
+{
+	u32 data, original_data;
+	u32 addr;
+	u32 extra_shift;
+	int ret;
+
+	if (smc_start_address & 3)
+		return -EINVAL;
+	if ((smc_start_address + byte_count) > limit)
+		return -EINVAL;
+
+	addr = smc_start_address;
+
+	while (byte_count >= 4) {
+		/* SMC address space is BE */
+		data = (src[0] << 24) | (src[1] << 16) | (src[2] << 8) | src[3];
+
+		ret = ci_set_smc_sram_address(rdev, addr, limit);
+		if (ret)
+			return ret;
+
+		WREG32(SMC_IND_DATA_0, data);
+
+		src += 4;
+		byte_count -= 4;
+		addr += 4;
+	}
+
+	/* RMW for the final bytes */
+	if (byte_count > 0) {
+		data = 0;
+
+		ret = ci_set_smc_sram_address(rdev, addr, limit);
+		if (ret)
+			return ret;
+
+		original_data = RREG32(SMC_IND_DATA_0);
+
+		extra_shift = 8 * (4 - byte_count);
+
+		while (byte_count > 0) {
+			data = (data << 8) + *src++;
+			byte_count--;
+		}
+
+		data <<= extra_shift;
+
+		data |= (original_data & ~((~0UL) << extra_shift));
+
+		ret = ci_set_smc_sram_address(rdev, addr, limit);
+		if (ret)
+			return ret;
+
+		WREG32(SMC_IND_DATA_0, data);
+	}
+	return 0;
+}
+
+void ci_start_smc(struct radeon_device *rdev)
+{
+	u32 tmp = RREG32_SMC(SMC_SYSCON_RESET_CNTL);
+
+	tmp &= ~RST_REG;
+	WREG32_SMC(SMC_SYSCON_RESET_CNTL, tmp);
+}
+
+void ci_reset_smc(struct radeon_device *rdev)
+{
+	u32 tmp = RREG32_SMC(SMC_SYSCON_RESET_CNTL);
+
+	tmp |= RST_REG;
+	WREG32_SMC(SMC_SYSCON_RESET_CNTL, tmp);
+}
+
+int ci_program_jump_on_start(struct radeon_device *rdev)
+{
+	static u8 data[] = { 0xE0, 0x00, 0x80, 0x40 };
+
+	return ci_copy_bytes_to_smc(rdev, 0x0, data, 4, sizeof(data)+1);
+}
+
+void ci_stop_smc_clock(struct radeon_device *rdev)
+{
+	u32 tmp = RREG32_SMC(SMC_SYSCON_CLOCK_CNTL_0);
+
+	tmp |= CK_DISABLE;
+
+	WREG32_SMC(SMC_SYSCON_CLOCK_CNTL_0, tmp);
+}
+
+void ci_start_smc_clock(struct radeon_device *rdev)
+{
+	u32 tmp = RREG32_SMC(SMC_SYSCON_CLOCK_CNTL_0);
+
+	tmp &= ~CK_DISABLE;
+
+	WREG32_SMC(SMC_SYSCON_CLOCK_CNTL_0, tmp);
+}
+
+bool ci_is_smc_running(struct radeon_device *rdev)
+{
+	u32 clk = RREG32_SMC(SMC_SYSCON_CLOCK_CNTL_0);
+	u32 pc_c = RREG32_SMC(SMC_PC_C);
+
+	if (!(clk & CK_DISABLE) && (0x20100 <= pc_c))
+		return true;
+
+	return false;
+}
+
+PPSMC_Result ci_send_msg_to_smc(struct radeon_device *rdev, PPSMC_Msg msg)
+{
+	u32 tmp;
+	int i;
+
+	if (!ci_is_smc_running(rdev))
+		return PPSMC_Result_Failed;
+
+	WREG32(SMC_MESSAGE_0, msg);
+
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		tmp = RREG32(SMC_RESP_0);
+		if (tmp != 0)
+			break;
+		udelay(1);
+	}
+	tmp = RREG32(SMC_RESP_0);
+
+	return (PPSMC_Result)tmp;
+}
+
+PPSMC_Result ci_wait_for_smc_inactive(struct radeon_device *rdev)
+{
+	u32 tmp;
+	int i;
+
+	if (!ci_is_smc_running(rdev))
+		return PPSMC_Result_OK;
+
+	for (i = 0; i < rdev->usec_timeout; i++) {
+                tmp = RREG32_SMC(SMC_SYSCON_CLOCK_CNTL_0);
+                if ((tmp & CKEN) == 0)
+			break;
+                udelay(1);
+        }
+
+	return PPSMC_Result_OK;
+}
+
+int ci_load_smc_ucode(struct radeon_device *rdev, u32 limit)
+{
+	u32 ucode_start_address;
+	u32 ucode_size;
+	const u8 *src;
+	u32 data;
+
+	if (!rdev->smc_fw)
+		return -EINVAL;
+
+	switch (rdev->family) {
+	case CHIP_BONAIRE:
+		ucode_start_address = BONAIRE_SMC_UCODE_START;
+		ucode_size = BONAIRE_SMC_UCODE_SIZE;
+		break;
+	default:
+		DRM_ERROR("unknown asic in smc ucode loader\n");
+		BUG();
+	}
+
+	if (ucode_size & 3)
+		return -EINVAL;
+
+	src = (const u8 *)rdev->smc_fw->data;
+	WREG32(SMC_IND_INDEX_0, ucode_start_address);
+	WREG32_P(SMC_IND_ACCESS_CNTL, AUTO_INCREMENT_IND_0, ~AUTO_INCREMENT_IND_0);
+	while (ucode_size >= 4) {
+		/* SMC address space is BE */
+		data = (src[0] << 24) | (src[1] << 16) | (src[2] << 8) | src[3];
+
+		WREG32(SMC_IND_DATA_0, data);
+
+		src += 4;
+		ucode_size -= 4;
+	}
+	WREG32_P(SMC_IND_ACCESS_CNTL, 0, ~AUTO_INCREMENT_IND_0);
+
+	return 0;
+}
+
+int ci_read_smc_sram_dword(struct radeon_device *rdev,
+			   u32 smc_address, u32 *value, u32 limit)
+{
+	int ret;
+
+	ret = ci_set_smc_sram_address(rdev, smc_address, limit);
+	if (ret)
+		return ret;
+
+	*value = RREG32(SMC_IND_DATA_0);
+	return 0;
+}
+
+int ci_write_smc_sram_dword(struct radeon_device *rdev,
+			    u32 smc_address, u32 value, u32 limit)
+{
+	int ret;
+
+	ret = ci_set_smc_sram_address(rdev, smc_address, limit);
+	if (ret)
+		return ret;
+
+	WREG32(SMC_IND_DATA_0, value);
+	return 0;
+}
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 6dacec4..a3bba05 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -30,22 +30,8 @@
 #include "cikd.h"
 #include "atom.h"
 #include "cik_blit_shaders.h"
-
-/* GFX */
-#define CIK_PFP_UCODE_SIZE 2144
-#define CIK_ME_UCODE_SIZE 2144
-#define CIK_CE_UCODE_SIZE 2144
-/* compute */
-#define CIK_MEC_UCODE_SIZE 4192
-/* interrupts */
-#define BONAIRE_RLC_UCODE_SIZE 2048
-#define KB_RLC_UCODE_SIZE 2560
-#define KV_RLC_UCODE_SIZE 2560
-/* gddr controller */
-#define CIK_MC_UCODE_SIZE 7866
-/* sdma */
-#define CIK_SDMA_UCODE_SIZE 1050
-#define CIK_SDMA_UCODE_VERSION 64
+#include "radeon_ucode.h"
+#include "clearstate_ci.h"
 
 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
@@ -54,6 +40,7 @@
 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
+MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
@@ -72,10 +59,61 @@
 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
+extern void sumo_rlc_fini(struct radeon_device *rdev);
+extern int sumo_rlc_init(struct radeon_device *rdev);
 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
-extern void si_rlc_fini(struct radeon_device *rdev);
-extern int si_rlc_init(struct radeon_device *rdev);
+extern void si_rlc_reset(struct radeon_device *rdev);
+extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
+extern int cik_sdma_resume(struct radeon_device *rdev);
+extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
+extern void cik_sdma_fini(struct radeon_device *rdev);
+extern void cik_sdma_vm_set_page(struct radeon_device *rdev,
+				 struct radeon_ib *ib,
+				 uint64_t pe,
+				 uint64_t addr, unsigned count,
+				 uint32_t incr, uint32_t flags);
 static void cik_rlc_stop(struct radeon_device *rdev);
+static void cik_pcie_gen3_enable(struct radeon_device *rdev);
+static void cik_program_aspm(struct radeon_device *rdev);
+static void cik_init_pg(struct radeon_device *rdev);
+static void cik_init_cg(struct radeon_device *rdev);
+
+/* get temperature in millidegrees */
+int ci_get_temp(struct radeon_device *rdev)
+{
+	u32 temp;
+	int actual_temp = 0;
+
+	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
+		CTF_TEMP_SHIFT;
+
+	if (temp & 0x200)
+		actual_temp = 255;
+	else
+		actual_temp = temp & 0x1ff;
+
+	actual_temp = actual_temp * 1000;
+
+	return actual_temp;
+}
+
+/* get temperature in millidegrees */
+int kv_get_temp(struct radeon_device *rdev)
+{
+	u32 temp;
+	int actual_temp = 0;
+
+	temp = RREG32_SMC(0xC0300E0C);
+
+	if (temp)
+		actual_temp = (temp / 8) - 49;
+	else
+		actual_temp = 0;
+
+	actual_temp = actual_temp * 1000;
+
+	return actual_temp;
+}
 
 /*
  * Indirect registers accessor
@@ -98,6 +136,778 @@
 	(void)RREG32(PCIE_DATA);
 }
 
+static const u32 spectre_rlc_save_restore_register_list[] =
+{
+	(0x0e00 << 16) | (0xc12c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc140 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc150 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc15c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc168 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc170 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc178 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc204 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc2b4 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc2b8 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc2bc >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc2c0 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8228 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x829c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x869c >> 2),
+	0x00000000,
+	(0x0600 << 16) | (0x98f4 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x98f8 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x9900 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc260 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x90e8 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x3c000 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x3c00c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8c1c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x9700 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xcd20 >> 2),
+	0x00000000,
+	(0x4e00 << 16) | (0xcd20 >> 2),
+	0x00000000,
+	(0x5e00 << 16) | (0xcd20 >> 2),
+	0x00000000,
+	(0x6e00 << 16) | (0xcd20 >> 2),
+	0x00000000,
+	(0x7e00 << 16) | (0xcd20 >> 2),
+	0x00000000,
+	(0x8e00 << 16) | (0xcd20 >> 2),
+	0x00000000,
+	(0x9e00 << 16) | (0xcd20 >> 2),
+	0x00000000,
+	(0xae00 << 16) | (0xcd20 >> 2),
+	0x00000000,
+	(0xbe00 << 16) | (0xcd20 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x89bc >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8900 >> 2),
+	0x00000000,
+	0x3,
+	(0x0e00 << 16) | (0xc130 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc134 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc1fc >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc208 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc264 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc268 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc26c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc270 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc274 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc278 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc27c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc280 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc284 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc288 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc28c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc290 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc294 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc298 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc29c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc2a0 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc2a4 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc2a8 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc2ac  >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc2b0 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x301d0 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x30238 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x30250 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x30254 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x30258 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x3025c >> 2),
+	0x00000000,
+	(0x4e00 << 16) | (0xc900 >> 2),
+	0x00000000,
+	(0x5e00 << 16) | (0xc900 >> 2),
+	0x00000000,
+	(0x6e00 << 16) | (0xc900 >> 2),
+	0x00000000,
+	(0x7e00 << 16) | (0xc900 >> 2),
+	0x00000000,
+	(0x8e00 << 16) | (0xc900 >> 2),
+	0x00000000,
+	(0x9e00 << 16) | (0xc900 >> 2),
+	0x00000000,
+	(0xae00 << 16) | (0xc900 >> 2),
+	0x00000000,
+	(0xbe00 << 16) | (0xc900 >> 2),
+	0x00000000,
+	(0x4e00 << 16) | (0xc904 >> 2),
+	0x00000000,
+	(0x5e00 << 16) | (0xc904 >> 2),
+	0x00000000,
+	(0x6e00 << 16) | (0xc904 >> 2),
+	0x00000000,
+	(0x7e00 << 16) | (0xc904 >> 2),
+	0x00000000,
+	(0x8e00 << 16) | (0xc904 >> 2),
+	0x00000000,
+	(0x9e00 << 16) | (0xc904 >> 2),
+	0x00000000,
+	(0xae00 << 16) | (0xc904 >> 2),
+	0x00000000,
+	(0xbe00 << 16) | (0xc904 >> 2),
+	0x00000000,
+	(0x4e00 << 16) | (0xc908 >> 2),
+	0x00000000,
+	(0x5e00 << 16) | (0xc908 >> 2),
+	0x00000000,
+	(0x6e00 << 16) | (0xc908 >> 2),
+	0x00000000,
+	(0x7e00 << 16) | (0xc908 >> 2),
+	0x00000000,
+	(0x8e00 << 16) | (0xc908 >> 2),
+	0x00000000,
+	(0x9e00 << 16) | (0xc908 >> 2),
+	0x00000000,
+	(0xae00 << 16) | (0xc908 >> 2),
+	0x00000000,
+	(0xbe00 << 16) | (0xc908 >> 2),
+	0x00000000,
+	(0x4e00 << 16) | (0xc90c >> 2),
+	0x00000000,
+	(0x5e00 << 16) | (0xc90c >> 2),
+	0x00000000,
+	(0x6e00 << 16) | (0xc90c >> 2),
+	0x00000000,
+	(0x7e00 << 16) | (0xc90c >> 2),
+	0x00000000,
+	(0x8e00 << 16) | (0xc90c >> 2),
+	0x00000000,
+	(0x9e00 << 16) | (0xc90c >> 2),
+	0x00000000,
+	(0xae00 << 16) | (0xc90c >> 2),
+	0x00000000,
+	(0xbe00 << 16) | (0xc90c >> 2),
+	0x00000000,
+	(0x4e00 << 16) | (0xc910 >> 2),
+	0x00000000,
+	(0x5e00 << 16) | (0xc910 >> 2),
+	0x00000000,
+	(0x6e00 << 16) | (0xc910 >> 2),
+	0x00000000,
+	(0x7e00 << 16) | (0xc910 >> 2),
+	0x00000000,
+	(0x8e00 << 16) | (0xc910 >> 2),
+	0x00000000,
+	(0x9e00 << 16) | (0xc910 >> 2),
+	0x00000000,
+	(0xae00 << 16) | (0xc910 >> 2),
+	0x00000000,
+	(0xbe00 << 16) | (0xc910 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc99c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x9834 >> 2),
+	0x00000000,
+	(0x0000 << 16) | (0x30f00 >> 2),
+	0x00000000,
+	(0x0001 << 16) | (0x30f00 >> 2),
+	0x00000000,
+	(0x0000 << 16) | (0x30f04 >> 2),
+	0x00000000,
+	(0x0001 << 16) | (0x30f04 >> 2),
+	0x00000000,
+	(0x0000 << 16) | (0x30f08 >> 2),
+	0x00000000,
+	(0x0001 << 16) | (0x30f08 >> 2),
+	0x00000000,
+	(0x0000 << 16) | (0x30f0c >> 2),
+	0x00000000,
+	(0x0001 << 16) | (0x30f0c >> 2),
+	0x00000000,
+	(0x0600 << 16) | (0x9b7c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8a14 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8a18 >> 2),
+	0x00000000,
+	(0x0600 << 16) | (0x30a00 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8bf0 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8bcc >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8b24 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x30a04 >> 2),
+	0x00000000,
+	(0x0600 << 16) | (0x30a10 >> 2),
+	0x00000000,
+	(0x0600 << 16) | (0x30a14 >> 2),
+	0x00000000,
+	(0x0600 << 16) | (0x30a18 >> 2),
+	0x00000000,
+	(0x0600 << 16) | (0x30a2c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc700 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc704 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc708 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc768 >> 2),
+	0x00000000,
+	(0x0400 << 16) | (0xc770 >> 2),
+	0x00000000,
+	(0x0400 << 16) | (0xc774 >> 2),
+	0x00000000,
+	(0x0400 << 16) | (0xc778 >> 2),
+	0x00000000,
+	(0x0400 << 16) | (0xc77c >> 2),
+	0x00000000,
+	(0x0400 << 16) | (0xc780 >> 2),
+	0x00000000,
+	(0x0400 << 16) | (0xc784 >> 2),
+	0x00000000,
+	(0x0400 << 16) | (0xc788 >> 2),
+	0x00000000,
+	(0x0400 << 16) | (0xc78c >> 2),
+	0x00000000,
+	(0x0400 << 16) | (0xc798 >> 2),
+	0x00000000,
+	(0x0400 << 16) | (0xc79c >> 2),
+	0x00000000,
+	(0x0400 << 16) | (0xc7a0 >> 2),
+	0x00000000,
+	(0x0400 << 16) | (0xc7a4 >> 2),
+	0x00000000,
+	(0x0400 << 16) | (0xc7a8 >> 2),
+	0x00000000,
+	(0x0400 << 16) | (0xc7ac >> 2),
+	0x00000000,
+	(0x0400 << 16) | (0xc7b0 >> 2),
+	0x00000000,
+	(0x0400 << 16) | (0xc7b4 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x9100 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x3c010 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x92a8 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x92ac >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x92b4 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x92b8 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x92bc >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x92c0 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x92c4 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x92c8 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x92cc >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x92d0 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8c00 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8c04 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8c20 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8c38 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8c3c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xae00 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x9604 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac08 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac0c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac10 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac14 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac58 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac68 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac6c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac70 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac74 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac78 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac7c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac80 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac84 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac88 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac8c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x970c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x9714 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x9718 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x971c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x31068 >> 2),
+	0x00000000,
+	(0x4e00 << 16) | (0x31068 >> 2),
+	0x00000000,
+	(0x5e00 << 16) | (0x31068 >> 2),
+	0x00000000,
+	(0x6e00 << 16) | (0x31068 >> 2),
+	0x00000000,
+	(0x7e00 << 16) | (0x31068 >> 2),
+	0x00000000,
+	(0x8e00 << 16) | (0x31068 >> 2),
+	0x00000000,
+	(0x9e00 << 16) | (0x31068 >> 2),
+	0x00000000,
+	(0xae00 << 16) | (0x31068 >> 2),
+	0x00000000,
+	(0xbe00 << 16) | (0x31068 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xcd10 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xcd14 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x88b0 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x88b4 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x88b8 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x88bc >> 2),
+	0x00000000,
+	(0x0400 << 16) | (0x89c0 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x88c4 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x88c8 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x88d0 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x88d4 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x88d8 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8980 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x30938 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x3093c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x30940 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x89a0 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x30900 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x30904 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x89b4 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x3c210 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x3c214 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x3c218 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8904 >> 2),
+	0x00000000,
+	0x5,
+	(0x0e00 << 16) | (0x8c28 >> 2),
+	(0x0e00 << 16) | (0x8c2c >> 2),
+	(0x0e00 << 16) | (0x8c30 >> 2),
+	(0x0e00 << 16) | (0x8c34 >> 2),
+	(0x0e00 << 16) | (0x9600 >> 2),
+};
+
+static const u32 kalindi_rlc_save_restore_register_list[] =
+{
+	(0x0e00 << 16) | (0xc12c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc140 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc150 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc15c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc168 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc170 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc204 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc2b4 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc2b8 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc2bc >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc2c0 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8228 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x829c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x869c >> 2),
+	0x00000000,
+	(0x0600 << 16) | (0x98f4 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x98f8 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x9900 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc260 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x90e8 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x3c000 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x3c00c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8c1c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x9700 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xcd20 >> 2),
+	0x00000000,
+	(0x4e00 << 16) | (0xcd20 >> 2),
+	0x00000000,
+	(0x5e00 << 16) | (0xcd20 >> 2),
+	0x00000000,
+	(0x6e00 << 16) | (0xcd20 >> 2),
+	0x00000000,
+	(0x7e00 << 16) | (0xcd20 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x89bc >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8900 >> 2),
+	0x00000000,
+	0x3,
+	(0x0e00 << 16) | (0xc130 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc134 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc1fc >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc208 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc264 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc268 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc26c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc270 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc274 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc28c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc290 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc294 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc298 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc2a0 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc2a4 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc2a8 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc2ac >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x301d0 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x30238 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x30250 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x30254 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x30258 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x3025c >> 2),
+	0x00000000,
+	(0x4e00 << 16) | (0xc900 >> 2),
+	0x00000000,
+	(0x5e00 << 16) | (0xc900 >> 2),
+	0x00000000,
+	(0x6e00 << 16) | (0xc900 >> 2),
+	0x00000000,
+	(0x7e00 << 16) | (0xc900 >> 2),
+	0x00000000,
+	(0x4e00 << 16) | (0xc904 >> 2),
+	0x00000000,
+	(0x5e00 << 16) | (0xc904 >> 2),
+	0x00000000,
+	(0x6e00 << 16) | (0xc904 >> 2),
+	0x00000000,
+	(0x7e00 << 16) | (0xc904 >> 2),
+	0x00000000,
+	(0x4e00 << 16) | (0xc908 >> 2),
+	0x00000000,
+	(0x5e00 << 16) | (0xc908 >> 2),
+	0x00000000,
+	(0x6e00 << 16) | (0xc908 >> 2),
+	0x00000000,
+	(0x7e00 << 16) | (0xc908 >> 2),
+	0x00000000,
+	(0x4e00 << 16) | (0xc90c >> 2),
+	0x00000000,
+	(0x5e00 << 16) | (0xc90c >> 2),
+	0x00000000,
+	(0x6e00 << 16) | (0xc90c >> 2),
+	0x00000000,
+	(0x7e00 << 16) | (0xc90c >> 2),
+	0x00000000,
+	(0x4e00 << 16) | (0xc910 >> 2),
+	0x00000000,
+	(0x5e00 << 16) | (0xc910 >> 2),
+	0x00000000,
+	(0x6e00 << 16) | (0xc910 >> 2),
+	0x00000000,
+	(0x7e00 << 16) | (0xc910 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc99c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x9834 >> 2),
+	0x00000000,
+	(0x0000 << 16) | (0x30f00 >> 2),
+	0x00000000,
+	(0x0000 << 16) | (0x30f04 >> 2),
+	0x00000000,
+	(0x0000 << 16) | (0x30f08 >> 2),
+	0x00000000,
+	(0x0000 << 16) | (0x30f0c >> 2),
+	0x00000000,
+	(0x0600 << 16) | (0x9b7c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8a14 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8a18 >> 2),
+	0x00000000,
+	(0x0600 << 16) | (0x30a00 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8bf0 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8bcc >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8b24 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x30a04 >> 2),
+	0x00000000,
+	(0x0600 << 16) | (0x30a10 >> 2),
+	0x00000000,
+	(0x0600 << 16) | (0x30a14 >> 2),
+	0x00000000,
+	(0x0600 << 16) | (0x30a18 >> 2),
+	0x00000000,
+	(0x0600 << 16) | (0x30a2c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc700 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc704 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc708 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xc768 >> 2),
+	0x00000000,
+	(0x0400 << 16) | (0xc770 >> 2),
+	0x00000000,
+	(0x0400 << 16) | (0xc774 >> 2),
+	0x00000000,
+	(0x0400 << 16) | (0xc798 >> 2),
+	0x00000000,
+	(0x0400 << 16) | (0xc79c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x9100 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x3c010 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8c00 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8c04 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8c20 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8c38 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8c3c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xae00 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x9604 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac08 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac0c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac10 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac14 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac58 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac68 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac6c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac70 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac74 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac78 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac7c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac80 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac84 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac88 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xac8c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x970c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x9714 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x9718 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x971c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x31068 >> 2),
+	0x00000000,
+	(0x4e00 << 16) | (0x31068 >> 2),
+	0x00000000,
+	(0x5e00 << 16) | (0x31068 >> 2),
+	0x00000000,
+	(0x6e00 << 16) | (0x31068 >> 2),
+	0x00000000,
+	(0x7e00 << 16) | (0x31068 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xcd10 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0xcd14 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x88b0 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x88b4 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x88b8 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x88bc >> 2),
+	0x00000000,
+	(0x0400 << 16) | (0x89c0 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x88c4 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x88c8 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x88d0 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x88d4 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x88d8 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8980 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x30938 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x3093c >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x30940 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x89a0 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x30900 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x30904 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x89b4 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x3e1fc >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x3c210 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x3c214 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x3c218 >> 2),
+	0x00000000,
+	(0x0e00 << 16) | (0x8904 >> 2),
+	0x00000000,
+	0x5,
+	(0x0e00 << 16) | (0x8c28 >> 2),
+	(0x0e00 << 16) | (0x8c2c >> 2),
+	(0x0e00 << 16) | (0x8c30 >> 2),
+	(0x0e00 << 16) | (0x8c34 >> 2),
+	(0x0e00 << 16) | (0x9600 >> 2),
+};
+
 static const u32 bonaire_golden_spm_registers[] =
 {
 	0x30800, 0xe0ffffff, 0xe0000000
@@ -744,7 +1554,7 @@
 	const char *chip_name;
 	size_t pfp_req_size, me_req_size, ce_req_size,
 		mec_req_size, rlc_req_size, mc_req_size,
-		sdma_req_size;
+		sdma_req_size, smc_req_size;
 	char fw_name[30];
 	int err;
 
@@ -760,6 +1570,7 @@
 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
 		mc_req_size = CIK_MC_UCODE_SIZE * 4;
 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
+		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
 		break;
 	case CHIP_KAVERI:
 		chip_name = "KAVERI";
@@ -851,7 +1662,7 @@
 		err = -EINVAL;
 	}
 
-	/* No MC ucode on APUs */
+	/* No SMC, MC ucode on APUs */
 	if (!(rdev->flags & RADEON_IS_IGP)) {
 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
@@ -863,6 +1674,21 @@
 			       rdev->mc_fw->size, fw_name);
 			err = -EINVAL;
 		}
+
+		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
+		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
+		if (err) {
+			printk(KERN_ERR
+			       "smc: error loading firmware \"%s\"\n",
+			       fw_name);
+			release_firmware(rdev->smc_fw);
+			rdev->smc_fw = NULL;
+		} else if (rdev->smc_fw->size != smc_req_size) {
+			printk(KERN_ERR
+			       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
+			       rdev->smc_fw->size, fw_name);
+			err = -EINVAL;
+		}
 	}
 
 out:
@@ -881,6 +1707,8 @@
 		rdev->rlc_fw = NULL;
 		release_firmware(rdev->mc_fw);
 		rdev->mc_fw = NULL;
+		release_firmware(rdev->smc_fw);
+		rdev->smc_fw = NULL;
 	}
 	return err;
 }
@@ -1880,7 +2708,46 @@
 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
 		break;
 	case CHIP_KAVERI:
-		/* TODO */
+		rdev->config.cik.max_shader_engines = 1;
+		rdev->config.cik.max_tile_pipes = 4;
+		if ((rdev->pdev->device == 0x1304) ||
+		    (rdev->pdev->device == 0x1305) ||
+		    (rdev->pdev->device == 0x130C) ||
+		    (rdev->pdev->device == 0x130F) ||
+		    (rdev->pdev->device == 0x1310) ||
+		    (rdev->pdev->device == 0x1311) ||
+		    (rdev->pdev->device == 0x131C)) {
+			rdev->config.cik.max_cu_per_sh = 8;
+			rdev->config.cik.max_backends_per_se = 2;
+		} else if ((rdev->pdev->device == 0x1309) ||
+			   (rdev->pdev->device == 0x130A) ||
+			   (rdev->pdev->device == 0x130D) ||
+			   (rdev->pdev->device == 0x1313)) {
+			rdev->config.cik.max_cu_per_sh = 6;
+			rdev->config.cik.max_backends_per_se = 2;
+		} else if ((rdev->pdev->device == 0x1306) ||
+			   (rdev->pdev->device == 0x1307) ||
+			   (rdev->pdev->device == 0x130B) ||
+			   (rdev->pdev->device == 0x130E) ||
+			   (rdev->pdev->device == 0x1315) ||
+			   (rdev->pdev->device == 0x131B)) {
+			rdev->config.cik.max_cu_per_sh = 4;
+			rdev->config.cik.max_backends_per_se = 1;
+		} else {
+			rdev->config.cik.max_cu_per_sh = 3;
+			rdev->config.cik.max_backends_per_se = 1;
+		}
+		rdev->config.cik.max_sh_per_se = 1;
+		rdev->config.cik.max_texture_channel_caches = 4;
+		rdev->config.cik.max_gprs = 256;
+		rdev->config.cik.max_gs_threads = 16;
+		rdev->config.cik.max_hw_contexts = 8;
+
+		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
+		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
+		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
+		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
+		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
 		break;
 	case CHIP_KABINI:
 	default:
@@ -2535,8 +3402,8 @@
 	/* ring 0 - compute and gfx */
 	/* Set ring buffer size */
 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
-	rb_bufsz = drm_order(ring->ring_size / 8);
-	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
+	rb_bufsz = order_base_2(ring->ring_size / 8);
+	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
 #ifdef __BIG_ENDIAN
 	tmp |= BUF_SWAP_32BIT;
 #endif
@@ -2587,11 +3454,12 @@
 	if (rdev->wb.enabled) {
 		rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
 	} else {
+		mutex_lock(&rdev->srbm_mutex);
 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
 		rptr = RREG32(CP_HQD_PQ_RPTR);
 		cik_srbm_select(rdev, 0, 0, 0, 0);
+		mutex_unlock(&rdev->srbm_mutex);
 	}
-	rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
 
 	return rptr;
 }
@@ -2604,11 +3472,12 @@
 	if (rdev->wb.enabled) {
 		wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
 	} else {
+		mutex_lock(&rdev->srbm_mutex);
 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
 		wptr = RREG32(CP_HQD_PQ_WPTR);
 		cik_srbm_select(rdev, 0, 0, 0, 0);
+		mutex_unlock(&rdev->srbm_mutex);
 	}
-	wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
 
 	return wptr;
 }
@@ -2616,10 +3485,8 @@
 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
 			       struct radeon_ring *ring)
 {
-	u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
-
-	rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
-	WDOORBELL32(ring->doorbell_offset, wptr);
+	rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
+	WDOORBELL32(ring->doorbell_offset, ring->wptr);
 }
 
 /**
@@ -2897,6 +3764,7 @@
 	WREG32(CP_CPF_DEBUG, tmp);
 
 	/* init the pipes */
+	mutex_lock(&rdev->srbm_mutex);
 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
 		int me = (i < 4) ? 1 : 2;
 		int pipe = (i < 4) ? i : (i - 4);
@@ -2915,10 +3783,11 @@
 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
 		tmp = RREG32(CP_HPD_EOP_CONTROL);
 		tmp &= ~EOP_SIZE_MASK;
-		tmp |= drm_order(MEC_HPD_SIZE / 8);
+		tmp |= order_base_2(MEC_HPD_SIZE / 8);
 		WREG32(CP_HPD_EOP_CONTROL, tmp);
 	}
 	cik_srbm_select(rdev, 0, 0, 0, 0);
+	mutex_unlock(&rdev->srbm_mutex);
 
 	/* init the queues.  Just two for now. */
 	for (i = 0; i < 2; i++) {
@@ -2972,6 +3841,7 @@
 		mqd->static_thread_mgmt23[0] = 0xffffffff;
 		mqd->static_thread_mgmt23[1] = 0xffffffff;
 
+		mutex_lock(&rdev->srbm_mutex);
 		cik_srbm_select(rdev, rdev->ring[idx].me,
 				rdev->ring[idx].pipe,
 				rdev->ring[idx].queue, 0);
@@ -3030,9 +3900,9 @@
 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
 
 		mqd->queue_state.cp_hqd_pq_control |=
-			drm_order(rdev->ring[idx].ring_size / 8);
+			order_base_2(rdev->ring[idx].ring_size / 8);
 		mqd->queue_state.cp_hqd_pq_control |=
-			(drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
+			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
 #ifdef __BIG_ENDIAN
 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
 #endif
@@ -3099,6 +3969,7 @@
 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
 
 		cik_srbm_select(rdev, 0, 0, 0, 0);
+		mutex_unlock(&rdev->srbm_mutex);
 
 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
@@ -3142,13 +4013,6 @@
 {
 	int r;
 
-	/* Reset all cp blocks */
-	WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
-	RREG32(GRBM_SOFT_RESET);
-	mdelay(15);
-	WREG32(GRBM_SOFT_RESET, 0);
-	RREG32(GRBM_SOFT_RESET);
-
 	r = cik_cp_load_microcode(rdev);
 	if (r)
 		return r;
@@ -3163,579 +4027,6 @@
 	return 0;
 }
 
-/*
- * sDMA - System DMA
- * Starting with CIK, the GPU has new asynchronous
- * DMA engines.  These engines are used for compute
- * and gfx.  There are two DMA engines (SDMA0, SDMA1)
- * and each one supports 1 ring buffer used for gfx
- * and 2 queues used for compute.
- *
- * The programming model is very similar to the CP
- * (ring buffer, IBs, etc.), but sDMA has it's own
- * packet format that is different from the PM4 format
- * used by the CP. sDMA supports copying data, writing
- * embedded data, solid fills, and a number of other
- * things.  It also has support for tiling/detiling of
- * buffers.
- */
-/**
- * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
- *
- * @rdev: radeon_device pointer
- * @ib: IB object to schedule
- *
- * Schedule an IB in the DMA ring (CIK).
- */
-void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
-			      struct radeon_ib *ib)
-{
-	struct radeon_ring *ring = &rdev->ring[ib->ring];
-	u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
-
-	if (rdev->wb.enabled) {
-		u32 next_rptr = ring->wptr + 5;
-		while ((next_rptr & 7) != 4)
-			next_rptr++;
-		next_rptr += 4;
-		radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
-		radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-		radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
-		radeon_ring_write(ring, 1); /* number of DWs to follow */
-		radeon_ring_write(ring, next_rptr);
-	}
-
-	/* IB packet must end on a 8 DW boundary */
-	while ((ring->wptr & 7) != 4)
-		radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
-	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
-	radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
-	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
-	radeon_ring_write(ring, ib->length_dw);
-
-}
-
-/**
- * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
- *
- * @rdev: radeon_device pointer
- * @fence: radeon fence object
- *
- * Add a DMA fence packet to the ring to write
- * the fence seq number and DMA trap packet to generate
- * an interrupt if needed (CIK).
- */
-void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
-			      struct radeon_fence *fence)
-{
-	struct radeon_ring *ring = &rdev->ring[fence->ring];
-	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
-	u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
-			  SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
-	u32 ref_and_mask;
-
-	if (fence->ring == R600_RING_TYPE_DMA_INDEX)
-		ref_and_mask = SDMA0;
-	else
-		ref_and_mask = SDMA1;
-
-	/* write the fence */
-	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
-	radeon_ring_write(ring, addr & 0xffffffff);
-	radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
-	radeon_ring_write(ring, fence->seq);
-	/* generate an interrupt */
-	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
-	/* flush HDP */
-	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
-	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
-	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
-	radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
-	radeon_ring_write(ring, ref_and_mask); /* MASK */
-	radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
-}
-
-/**
- * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
- *
- * @rdev: radeon_device pointer
- * @ring: radeon_ring structure holding ring information
- * @semaphore: radeon semaphore object
- * @emit_wait: wait or signal semaphore
- *
- * Add a DMA semaphore packet to the ring wait on or signal
- * other rings (CIK).
- */
-void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
-				  struct radeon_ring *ring,
-				  struct radeon_semaphore *semaphore,
-				  bool emit_wait)
-{
-	u64 addr = semaphore->gpu_addr;
-	u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
-
-	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
-	radeon_ring_write(ring, addr & 0xfffffff8);
-	radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
-}
-
-/**
- * cik_sdma_gfx_stop - stop the gfx async dma engines
- *
- * @rdev: radeon_device pointer
- *
- * Stop the gfx async dma ring buffers (CIK).
- */
-static void cik_sdma_gfx_stop(struct radeon_device *rdev)
-{
-	u32 rb_cntl, reg_offset;
-	int i;
-
-	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
-
-	for (i = 0; i < 2; i++) {
-		if (i == 0)
-			reg_offset = SDMA0_REGISTER_OFFSET;
-		else
-			reg_offset = SDMA1_REGISTER_OFFSET;
-		rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
-		rb_cntl &= ~SDMA_RB_ENABLE;
-		WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
-		WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
-	}
-}
-
-/**
- * cik_sdma_rlc_stop - stop the compute async dma engines
- *
- * @rdev: radeon_device pointer
- *
- * Stop the compute async dma queues (CIK).
- */
-static void cik_sdma_rlc_stop(struct radeon_device *rdev)
-{
-	/* XXX todo */
-}
-
-/**
- * cik_sdma_enable - stop the async dma engines
- *
- * @rdev: radeon_device pointer
- * @enable: enable/disable the DMA MEs.
- *
- * Halt or unhalt the async dma engines (CIK).
- */
-static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
-{
-	u32 me_cntl, reg_offset;
-	int i;
-
-	for (i = 0; i < 2; i++) {
-		if (i == 0)
-			reg_offset = SDMA0_REGISTER_OFFSET;
-		else
-			reg_offset = SDMA1_REGISTER_OFFSET;
-		me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
-		if (enable)
-			me_cntl &= ~SDMA_HALT;
-		else
-			me_cntl |= SDMA_HALT;
-		WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
-	}
-}
-
-/**
- * cik_sdma_gfx_resume - setup and start the async dma engines
- *
- * @rdev: radeon_device pointer
- *
- * Set up the gfx DMA ring buffers and enable them (CIK).
- * Returns 0 for success, error for failure.
- */
-static int cik_sdma_gfx_resume(struct radeon_device *rdev)
-{
-	struct radeon_ring *ring;
-	u32 rb_cntl, ib_cntl;
-	u32 rb_bufsz;
-	u32 reg_offset, wb_offset;
-	int i, r;
-
-	for (i = 0; i < 2; i++) {
-		if (i == 0) {
-			ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
-			reg_offset = SDMA0_REGISTER_OFFSET;
-			wb_offset = R600_WB_DMA_RPTR_OFFSET;
-		} else {
-			ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
-			reg_offset = SDMA1_REGISTER_OFFSET;
-			wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
-		}
-
-		WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
-		WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
-
-		/* Set ring buffer size in dwords */
-		rb_bufsz = drm_order(ring->ring_size / 4);
-		rb_cntl = rb_bufsz << 1;
-#ifdef __BIG_ENDIAN
-		rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
-#endif
-		WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
-
-		/* Initialize the ring buffer's read and write pointers */
-		WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
-		WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
-
-		/* set the wb address whether it's enabled or not */
-		WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
-		       upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
-		WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
-		       ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
-
-		if (rdev->wb.enabled)
-			rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
-
-		WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
-		WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
-
-		ring->wptr = 0;
-		WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
-
-		ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
-
-		/* enable DMA RB */
-		WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
-
-		ib_cntl = SDMA_IB_ENABLE;
-#ifdef __BIG_ENDIAN
-		ib_cntl |= SDMA_IB_SWAP_ENABLE;
-#endif
-		/* enable DMA IBs */
-		WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
-
-		ring->ready = true;
-
-		r = radeon_ring_test(rdev, ring->idx, ring);
-		if (r) {
-			ring->ready = false;
-			return r;
-		}
-	}
-
-	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
-
-	return 0;
-}
-
-/**
- * cik_sdma_rlc_resume - setup and start the async dma engines
- *
- * @rdev: radeon_device pointer
- *
- * Set up the compute DMA queues and enable them (CIK).
- * Returns 0 for success, error for failure.
- */
-static int cik_sdma_rlc_resume(struct radeon_device *rdev)
-{
-	/* XXX todo */
-	return 0;
-}
-
-/**
- * cik_sdma_load_microcode - load the sDMA ME ucode
- *
- * @rdev: radeon_device pointer
- *
- * Loads the sDMA0/1 ucode.
- * Returns 0 for success, -EINVAL if the ucode is not available.
- */
-static int cik_sdma_load_microcode(struct radeon_device *rdev)
-{
-	const __be32 *fw_data;
-	int i;
-
-	if (!rdev->sdma_fw)
-		return -EINVAL;
-
-	/* stop the gfx rings and rlc compute queues */
-	cik_sdma_gfx_stop(rdev);
-	cik_sdma_rlc_stop(rdev);
-
-	/* halt the MEs */
-	cik_sdma_enable(rdev, false);
-
-	/* sdma0 */
-	fw_data = (const __be32 *)rdev->sdma_fw->data;
-	WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
-	for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
-		WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
-	WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
-
-	/* sdma1 */
-	fw_data = (const __be32 *)rdev->sdma_fw->data;
-	WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
-	for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
-		WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
-	WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
-
-	WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
-	WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
-	return 0;
-}
-
-/**
- * cik_sdma_resume - setup and start the async dma engines
- *
- * @rdev: radeon_device pointer
- *
- * Set up the DMA engines and enable them (CIK).
- * Returns 0 for success, error for failure.
- */
-static int cik_sdma_resume(struct radeon_device *rdev)
-{
-	int r;
-
-	/* Reset dma */
-	WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
-	RREG32(SRBM_SOFT_RESET);
-	udelay(50);
-	WREG32(SRBM_SOFT_RESET, 0);
-	RREG32(SRBM_SOFT_RESET);
-
-	r = cik_sdma_load_microcode(rdev);
-	if (r)
-		return r;
-
-	/* unhalt the MEs */
-	cik_sdma_enable(rdev, true);
-
-	/* start the gfx rings and rlc compute queues */
-	r = cik_sdma_gfx_resume(rdev);
-	if (r)
-		return r;
-	r = cik_sdma_rlc_resume(rdev);
-	if (r)
-		return r;
-
-	return 0;
-}
-
-/**
- * cik_sdma_fini - tear down the async dma engines
- *
- * @rdev: radeon_device pointer
- *
- * Stop the async dma engines and free the rings (CIK).
- */
-static void cik_sdma_fini(struct radeon_device *rdev)
-{
-	/* stop the gfx rings and rlc compute queues */
-	cik_sdma_gfx_stop(rdev);
-	cik_sdma_rlc_stop(rdev);
-	/* halt the MEs */
-	cik_sdma_enable(rdev, false);
-	radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
-	radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
-	/* XXX - compute dma queue tear down */
-}
-
-/**
- * cik_copy_dma - copy pages using the DMA engine
- *
- * @rdev: radeon_device pointer
- * @src_offset: src GPU address
- * @dst_offset: dst GPU address
- * @num_gpu_pages: number of GPU pages to xfer
- * @fence: radeon fence object
- *
- * Copy GPU paging using the DMA engine (CIK).
- * Used by the radeon ttm implementation to move pages if
- * registered as the asic copy callback.
- */
-int cik_copy_dma(struct radeon_device *rdev,
-		 uint64_t src_offset, uint64_t dst_offset,
-		 unsigned num_gpu_pages,
-		 struct radeon_fence **fence)
-{
-	struct radeon_semaphore *sem = NULL;
-	int ring_index = rdev->asic->copy.dma_ring_index;
-	struct radeon_ring *ring = &rdev->ring[ring_index];
-	u32 size_in_bytes, cur_size_in_bytes;
-	int i, num_loops;
-	int r = 0;
-
-	r = radeon_semaphore_create(rdev, &sem);
-	if (r) {
-		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		return r;
-	}
-
-	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
-	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
-	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
-	if (r) {
-		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
-	}
-
-	if (radeon_fence_need_sync(*fence, ring->idx)) {
-		radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
-					    ring->idx);
-		radeon_fence_note_sync(*fence, ring->idx);
-	} else {
-		radeon_semaphore_free(rdev, &sem, NULL);
-	}
-
-	for (i = 0; i < num_loops; i++) {
-		cur_size_in_bytes = size_in_bytes;
-		if (cur_size_in_bytes > 0x1fffff)
-			cur_size_in_bytes = 0x1fffff;
-		size_in_bytes -= cur_size_in_bytes;
-		radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
-		radeon_ring_write(ring, cur_size_in_bytes);
-		radeon_ring_write(ring, 0); /* src/dst endian swap */
-		radeon_ring_write(ring, src_offset & 0xffffffff);
-		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
-		radeon_ring_write(ring, dst_offset & 0xfffffffc);
-		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
-		src_offset += cur_size_in_bytes;
-		dst_offset += cur_size_in_bytes;
-	}
-
-	r = radeon_fence_emit(rdev, fence, ring->idx);
-	if (r) {
-		radeon_ring_unlock_undo(rdev, ring);
-		return r;
-	}
-
-	radeon_ring_unlock_commit(rdev, ring);
-	radeon_semaphore_free(rdev, &sem, *fence);
-
-	return r;
-}
-
-/**
- * cik_sdma_ring_test - simple async dma engine test
- *
- * @rdev: radeon_device pointer
- * @ring: radeon_ring structure holding ring information
- *
- * Test the DMA engine by writing using it to write an
- * value to memory. (CIK).
- * Returns 0 for success, error for failure.
- */
-int cik_sdma_ring_test(struct radeon_device *rdev,
-		       struct radeon_ring *ring)
-{
-	unsigned i;
-	int r;
-	void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
-	u32 tmp;
-
-	if (!ptr) {
-		DRM_ERROR("invalid vram scratch pointer\n");
-		return -EINVAL;
-	}
-
-	tmp = 0xCAFEDEAD;
-	writel(tmp, ptr);
-
-	r = radeon_ring_lock(rdev, ring, 4);
-	if (r) {
-		DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
-		return r;
-	}
-	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
-	radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
-	radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
-	radeon_ring_write(ring, 1); /* number of DWs to follow */
-	radeon_ring_write(ring, 0xDEADBEEF);
-	radeon_ring_unlock_commit(rdev, ring);
-
-	for (i = 0; i < rdev->usec_timeout; i++) {
-		tmp = readl(ptr);
-		if (tmp == 0xDEADBEEF)
-			break;
-		DRM_UDELAY(1);
-	}
-
-	if (i < rdev->usec_timeout) {
-		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
-	} else {
-		DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
-			  ring->idx, tmp);
-		r = -EINVAL;
-	}
-	return r;
-}
-
-/**
- * cik_sdma_ib_test - test an IB on the DMA engine
- *
- * @rdev: radeon_device pointer
- * @ring: radeon_ring structure holding ring information
- *
- * Test a simple IB in the DMA ring (CIK).
- * Returns 0 on success, error on failure.
- */
-int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
-{
-	struct radeon_ib ib;
-	unsigned i;
-	int r;
-	void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
-	u32 tmp = 0;
-
-	if (!ptr) {
-		DRM_ERROR("invalid vram scratch pointer\n");
-		return -EINVAL;
-	}
-
-	tmp = 0xCAFEDEAD;
-	writel(tmp, ptr);
-
-	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
-	if (r) {
-		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
-		return r;
-	}
-
-	ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
-	ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
-	ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
-	ib.ptr[3] = 1;
-	ib.ptr[4] = 0xDEADBEEF;
-	ib.length_dw = 5;
-
-	r = radeon_ib_schedule(rdev, &ib, NULL);
-	if (r) {
-		radeon_ib_free(rdev, &ib);
-		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
-		return r;
-	}
-	r = radeon_fence_wait(ib.fence, false);
-	if (r) {
-		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
-		return r;
-	}
-	for (i = 0; i < rdev->usec_timeout; i++) {
-		tmp = readl(ptr);
-		if (tmp == 0xDEADBEEF)
-			break;
-		DRM_UDELAY(1);
-	}
-	if (i < rdev->usec_timeout) {
-		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
-	} else {
-		DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
-		r = -EINVAL;
-	}
-	radeon_ib_free(rdev, &ib);
-	return r;
-}
-
-
 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
 {
 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
@@ -3785,7 +4076,7 @@
  * mask to be used by cik_gpu_soft_reset().
  * Returns a mask of the blocks to be reset.
  */
-static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
+u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
 {
 	u32 reset_mask = 0;
 	u32 tmp;
@@ -4036,34 +4327,6 @@
 	return radeon_ring_test_lockup(rdev, ring);
 }
 
-/**
- * cik_sdma_is_lockup - Check if the DMA engine is locked up
- *
- * @rdev: radeon_device pointer
- * @ring: radeon_ring structure holding ring information
- *
- * Check if the async DMA engine is locked up (CIK).
- * Returns true if the engine appears to be locked up, false if not.
- */
-bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
-{
-	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
-	u32 mask;
-
-	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
-		mask = RADEON_RESET_DMA;
-	else
-		mask = RADEON_RESET_DMA1;
-
-	if (!(reset_mask & mask)) {
-		radeon_ring_lockup_update(ring);
-		return false;
-	}
-	/* force ring activities */
-	radeon_ring_force_activity(rdev, ring);
-	return radeon_ring_test_lockup(rdev, ring);
-}
-
 /* MC */
 /**
  * cik_mc_program - program the GPU memory controller
@@ -4320,6 +4583,7 @@
 
 	/* XXX SH_MEM regs */
 	/* where to put LDS, scratch, GPUVM in FSA64 space */
+	mutex_lock(&rdev->srbm_mutex);
 	for (i = 0; i < 16; i++) {
 		cik_srbm_select(rdev, 0, 0, 0, i);
 		/* CP and shaders */
@@ -4335,6 +4599,7 @@
 		/* XXX SDMA RLC - todo */
 	}
 	cik_srbm_select(rdev, 0, 0, 0, 0);
+	mutex_unlock(&rdev->srbm_mutex);
 
 	cik_pcie_gart_tlb_flush(rdev);
 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
@@ -4598,164 +4863,44 @@
 		}
 	} else {
 		/* DMA */
-		if (flags & RADEON_VM_PAGE_SYSTEM) {
-			while (count) {
-				ndw = count * 2;
-				if (ndw > 0xFFFFE)
-					ndw = 0xFFFFE;
-
-				/* for non-physically contiguous pages (system) */
-				ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
-				ib->ptr[ib->length_dw++] = pe;
-				ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-				ib->ptr[ib->length_dw++] = ndw;
-				for (; ndw > 0; ndw -= 2, --count, pe += 8) {
-					if (flags & RADEON_VM_PAGE_SYSTEM) {
-						value = radeon_vm_map_gart(rdev, addr);
-						value &= 0xFFFFFFFFFFFFF000ULL;
-					} else if (flags & RADEON_VM_PAGE_VALID) {
-						value = addr;
-					} else {
-						value = 0;
-					}
-					addr += incr;
-					value |= r600_flags;
-					ib->ptr[ib->length_dw++] = value;
-					ib->ptr[ib->length_dw++] = upper_32_bits(value);
-				}
-			}
-		} else {
-			while (count) {
-				ndw = count;
-				if (ndw > 0x7FFFF)
-					ndw = 0x7FFFF;
-
-				if (flags & RADEON_VM_PAGE_VALID)
-					value = addr;
-				else
-					value = 0;
-				/* for physically contiguous pages (vram) */
-				ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
-				ib->ptr[ib->length_dw++] = pe; /* dst addr */
-				ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-				ib->ptr[ib->length_dw++] = r600_flags; /* mask */
-				ib->ptr[ib->length_dw++] = 0;
-				ib->ptr[ib->length_dw++] = value; /* value */
-				ib->ptr[ib->length_dw++] = upper_32_bits(value);
-				ib->ptr[ib->length_dw++] = incr; /* increment size */
-				ib->ptr[ib->length_dw++] = 0;
-				ib->ptr[ib->length_dw++] = ndw; /* number of entries */
-				pe += ndw * 8;
-				addr += ndw * incr;
-				count -= ndw;
-			}
-		}
-		while (ib->length_dw & 0x7)
-			ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
+		cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
 	}
 }
 
-/**
- * cik_dma_vm_flush - cik vm flush using sDMA
- *
- * @rdev: radeon_device pointer
- *
- * Update the page table base and flush the VM TLB
- * using sDMA (CIK).
- */
-void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
-{
-	struct radeon_ring *ring = &rdev->ring[ridx];
-	u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
-			  SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
-	u32 ref_and_mask;
-
-	if (vm == NULL)
-		return;
-
-	if (ridx == R600_RING_TYPE_DMA_INDEX)
-		ref_and_mask = SDMA0;
-	else
-		ref_and_mask = SDMA1;
-
-	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-	if (vm->id < 8) {
-		radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
-	} else {
-		radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
-	}
-	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
-
-	/* update SH_MEM_* regs */
-	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
-	radeon_ring_write(ring, VMID(vm->id));
-
-	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-	radeon_ring_write(ring, SH_MEM_BASES >> 2);
-	radeon_ring_write(ring, 0);
-
-	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-	radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
-	radeon_ring_write(ring, 0);
-
-	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-	radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
-	radeon_ring_write(ring, 1);
-
-	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-	radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
-	radeon_ring_write(ring, 0);
-
-	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
-	radeon_ring_write(ring, VMID(0));
-
-	/* flush HDP */
-	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
-	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
-	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
-	radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
-	radeon_ring_write(ring, ref_and_mask); /* MASK */
-	radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
-
-	/* flush TLB */
-	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
-	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
-	radeon_ring_write(ring, 1 << vm->id);
-}
-
 /*
  * RLC
  * The RLC is a multi-purpose microengine that handles a
  * variety of functions, the most important of which is
  * the interrupt controller.
  */
-/**
- * cik_rlc_stop - stop the RLC ME
- *
- * @rdev: radeon_device pointer
- *
- * Halt the RLC ME (MicroEngine) (CIK).
- */
-static void cik_rlc_stop(struct radeon_device *rdev)
+static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
+					  bool enable)
 {
-	int i, j, k;
-	u32 mask, tmp;
+	u32 tmp = RREG32(CP_INT_CNTL_RING0);
 
-	tmp = RREG32(CP_INT_CNTL_RING0);
-	tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
+	if (enable)
+		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
+	else
+		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
 	WREG32(CP_INT_CNTL_RING0, tmp);
+}
 
-	RREG32(CB_CGTT_SCLK_CTRL);
-	RREG32(CB_CGTT_SCLK_CTRL);
-	RREG32(CB_CGTT_SCLK_CTRL);
-	RREG32(CB_CGTT_SCLK_CTRL);
+static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
+{
+	u32 tmp;
 
-	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
-	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
+	tmp = RREG32(RLC_LB_CNTL);
+	if (enable)
+		tmp |= LOAD_BALANCE_ENABLE;
+	else
+		tmp &= ~LOAD_BALANCE_ENABLE;
+	WREG32(RLC_LB_CNTL, tmp);
+}
 
-	WREG32(RLC_CNTL, 0);
+static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
+{
+	u32 i, j, k;
+	u32 mask;
 
 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
@@ -4777,6 +4922,84 @@
 	}
 }
 
+static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
+{
+	u32 tmp;
+
+	tmp = RREG32(RLC_CNTL);
+	if (tmp != rlc)
+		WREG32(RLC_CNTL, rlc);
+}
+
+static u32 cik_halt_rlc(struct radeon_device *rdev)
+{
+	u32 data, orig;
+
+	orig = data = RREG32(RLC_CNTL);
+
+	if (data & RLC_ENABLE) {
+		u32 i;
+
+		data &= ~RLC_ENABLE;
+		WREG32(RLC_CNTL, data);
+
+		for (i = 0; i < rdev->usec_timeout; i++) {
+			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
+				break;
+			udelay(1);
+		}
+
+		cik_wait_for_rlc_serdes(rdev);
+	}
+
+	return orig;
+}
+
+void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
+{
+	u32 tmp, i, mask;
+
+	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
+	WREG32(RLC_GPR_REG2, tmp);
+
+	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
+			break;
+		udelay(1);
+	}
+
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
+			break;
+		udelay(1);
+	}
+}
+
+void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
+{
+	u32 tmp;
+
+	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
+	WREG32(RLC_GPR_REG2, tmp);
+}
+
+/**
+ * cik_rlc_stop - stop the RLC ME
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Halt the RLC ME (MicroEngine) (CIK).
+ */
+static void cik_rlc_stop(struct radeon_device *rdev)
+{
+	WREG32(RLC_CNTL, 0);
+
+	cik_enable_gui_idle_interrupt(rdev, false);
+
+	cik_wait_for_rlc_serdes(rdev);
+}
+
 /**
  * cik_rlc_start - start the RLC ME
  *
@@ -4786,13 +5009,9 @@
  */
 static void cik_rlc_start(struct radeon_device *rdev)
 {
-	u32 tmp;
-
 	WREG32(RLC_CNTL, RLC_ENABLE);
 
-	tmp = RREG32(CP_INT_CNTL_RING0);
-	tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
-	WREG32(CP_INT_CNTL_RING0, tmp);
+	cik_enable_gui_idle_interrupt(rdev, true);
 
 	udelay(50);
 }
@@ -4808,8 +5027,7 @@
  */
 static int cik_rlc_resume(struct radeon_device *rdev)
 {
-	u32 i, size;
-	u32 clear_state_info[3];
+	u32 i, size, tmp;
 	const __be32 *fw_data;
 
 	if (!rdev->rlc_fw)
@@ -4830,12 +5048,15 @@
 
 	cik_rlc_stop(rdev);
 
-	WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
-	RREG32(GRBM_SOFT_RESET);
-	udelay(50);
-	WREG32(GRBM_SOFT_RESET, 0);
-	RREG32(GRBM_SOFT_RESET);
-	udelay(50);
+	/* disable CG */
+	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
+	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
+
+	si_rlc_reset(rdev);
+
+	cik_init_pg(rdev);
+
+	cik_init_cg(rdev);
 
 	WREG32(RLC_LB_CNTR_INIT, 0);
 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
@@ -4854,20 +5075,757 @@
 		WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
 	WREG32(RLC_GPM_UCODE_ADDR, 0);
 
-	/* XXX */
-	clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
-	clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
-	clear_state_info[2] = 0;//cik_default_size;
-	WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
-	for (i = 0; i < 3; i++)
-		WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
-	WREG32(RLC_DRIVER_DMA_STATUS, 0);
+	/* XXX - find out what chips support lbpw */
+	cik_enable_lbpw(rdev, false);
+
+	if (rdev->family == CHIP_BONAIRE)
+		WREG32(RLC_DRIVER_DMA_STATUS, 0);
 
 	cik_rlc_start(rdev);
 
 	return 0;
 }
 
+static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
+{
+	u32 data, orig, tmp, tmp2;
+
+	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
+
+	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
+		cik_enable_gui_idle_interrupt(rdev, true);
+
+		tmp = cik_halt_rlc(rdev);
+
+		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
+		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
+		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
+		WREG32(RLC_SERDES_WR_CTRL, tmp2);
+
+		cik_update_rlc(rdev, tmp);
+
+		data |= CGCG_EN | CGLS_EN;
+	} else {
+		cik_enable_gui_idle_interrupt(rdev, false);
+
+		RREG32(CB_CGTT_SCLK_CTRL);
+		RREG32(CB_CGTT_SCLK_CTRL);
+		RREG32(CB_CGTT_SCLK_CTRL);
+		RREG32(CB_CGTT_SCLK_CTRL);
+
+		data &= ~(CGCG_EN | CGLS_EN);
+	}
+
+	if (orig != data)
+		WREG32(RLC_CGCG_CGLS_CTRL, data);
+
+}
+
+static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
+{
+	u32 data, orig, tmp = 0;
+
+	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
+		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
+			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
+				orig = data = RREG32(CP_MEM_SLP_CNTL);
+				data |= CP_MEM_LS_EN;
+				if (orig != data)
+					WREG32(CP_MEM_SLP_CNTL, data);
+			}
+		}
+
+		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
+		data &= 0xfffffffd;
+		if (orig != data)
+			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
+
+		tmp = cik_halt_rlc(rdev);
+
+		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
+		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
+		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
+		WREG32(RLC_SERDES_WR_CTRL, data);
+
+		cik_update_rlc(rdev, tmp);
+
+		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
+			orig = data = RREG32(CGTS_SM_CTRL_REG);
+			data &= ~SM_MODE_MASK;
+			data |= SM_MODE(0x2);
+			data |= SM_MODE_ENABLE;
+			data &= ~CGTS_OVERRIDE;
+			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
+			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
+				data &= ~CGTS_LS_OVERRIDE;
+			data &= ~ON_MONITOR_ADD_MASK;
+			data |= ON_MONITOR_ADD_EN;
+			data |= ON_MONITOR_ADD(0x96);
+			if (orig != data)
+				WREG32(CGTS_SM_CTRL_REG, data);
+		}
+	} else {
+		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
+		data |= 0x00000002;
+		if (orig != data)
+			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
+
+		data = RREG32(RLC_MEM_SLP_CNTL);
+		if (data & RLC_MEM_LS_EN) {
+			data &= ~RLC_MEM_LS_EN;
+			WREG32(RLC_MEM_SLP_CNTL, data);
+		}
+
+		data = RREG32(CP_MEM_SLP_CNTL);
+		if (data & CP_MEM_LS_EN) {
+			data &= ~CP_MEM_LS_EN;
+			WREG32(CP_MEM_SLP_CNTL, data);
+		}
+
+		orig = data = RREG32(CGTS_SM_CTRL_REG);
+		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
+		if (orig != data)
+			WREG32(CGTS_SM_CTRL_REG, data);
+
+		tmp = cik_halt_rlc(rdev);
+
+		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
+		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
+		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
+		WREG32(RLC_SERDES_WR_CTRL, data);
+
+		cik_update_rlc(rdev, tmp);
+	}
+}
+
+static const u32 mc_cg_registers[] =
+{
+	MC_HUB_MISC_HUB_CG,
+	MC_HUB_MISC_SIP_CG,
+	MC_HUB_MISC_VM_CG,
+	MC_XPB_CLK_GAT,
+	ATC_MISC_CG,
+	MC_CITF_MISC_WR_CG,
+	MC_CITF_MISC_RD_CG,
+	MC_CITF_MISC_VM_CG,
+	VM_L2_CG,
+};
+
+static void cik_enable_mc_ls(struct radeon_device *rdev,
+			     bool enable)
+{
+	int i;
+	u32 orig, data;
+
+	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
+		orig = data = RREG32(mc_cg_registers[i]);
+		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
+			data |= MC_LS_ENABLE;
+		else
+			data &= ~MC_LS_ENABLE;
+		if (data != orig)
+			WREG32(mc_cg_registers[i], data);
+	}
+}
+
+static void cik_enable_mc_mgcg(struct radeon_device *rdev,
+			       bool enable)
+{
+	int i;
+	u32 orig, data;
+
+	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
+		orig = data = RREG32(mc_cg_registers[i]);
+		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
+			data |= MC_CG_ENABLE;
+		else
+			data &= ~MC_CG_ENABLE;
+		if (data != orig)
+			WREG32(mc_cg_registers[i], data);
+	}
+}
+
+static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
+				 bool enable)
+{
+	u32 orig, data;
+
+	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
+		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
+		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
+	} else {
+		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
+		data |= 0xff000000;
+		if (data != orig)
+			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
+
+		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
+		data |= 0xff000000;
+		if (data != orig)
+			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
+	}
+}
+
+static void cik_enable_sdma_mgls(struct radeon_device *rdev,
+				 bool enable)
+{
+	u32 orig, data;
+
+	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
+		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
+		data |= 0x100;
+		if (orig != data)
+			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
+
+		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
+		data |= 0x100;
+		if (orig != data)
+			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
+	} else {
+		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
+		data &= ~0x100;
+		if (orig != data)
+			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
+
+		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
+		data &= ~0x100;
+		if (orig != data)
+			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
+	}
+}
+
+static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
+				bool enable)
+{
+	u32 orig, data;
+
+	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
+		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
+		data = 0xfff;
+		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
+
+		orig = data = RREG32(UVD_CGC_CTRL);
+		data |= DCM;
+		if (orig != data)
+			WREG32(UVD_CGC_CTRL, data);
+	} else {
+		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
+		data &= ~0xfff;
+		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
+
+		orig = data = RREG32(UVD_CGC_CTRL);
+		data &= ~DCM;
+		if (orig != data)
+			WREG32(UVD_CGC_CTRL, data);
+	}
+}
+
+static void cik_enable_bif_mgls(struct radeon_device *rdev,
+			       bool enable)
+{
+	u32 orig, data;
+
+	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
+
+	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
+		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
+			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
+	else
+		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
+			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
+
+	if (orig != data)
+		WREG32_PCIE_PORT(PCIE_CNTL2, data);
+}
+
+static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
+				bool enable)
+{
+	u32 orig, data;
+
+	orig = data = RREG32(HDP_HOST_PATH_CNTL);
+
+	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
+		data &= ~CLOCK_GATING_DIS;
+	else
+		data |= CLOCK_GATING_DIS;
+
+	if (orig != data)
+		WREG32(HDP_HOST_PATH_CNTL, data);
+}
+
+static void cik_enable_hdp_ls(struct radeon_device *rdev,
+			      bool enable)
+{
+	u32 orig, data;
+
+	orig = data = RREG32(HDP_MEM_POWER_LS);
+
+	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
+		data |= HDP_LS_ENABLE;
+	else
+		data &= ~HDP_LS_ENABLE;
+
+	if (orig != data)
+		WREG32(HDP_MEM_POWER_LS, data);
+}
+
+void cik_update_cg(struct radeon_device *rdev,
+		   u32 block, bool enable)
+{
+	if (block & RADEON_CG_BLOCK_GFX) {
+		/* order matters! */
+		if (enable) {
+			cik_enable_mgcg(rdev, true);
+			cik_enable_cgcg(rdev, true);
+		} else {
+			cik_enable_cgcg(rdev, false);
+			cik_enable_mgcg(rdev, false);
+		}
+	}
+
+	if (block & RADEON_CG_BLOCK_MC) {
+		if (!(rdev->flags & RADEON_IS_IGP)) {
+			cik_enable_mc_mgcg(rdev, enable);
+			cik_enable_mc_ls(rdev, enable);
+		}
+	}
+
+	if (block & RADEON_CG_BLOCK_SDMA) {
+		cik_enable_sdma_mgcg(rdev, enable);
+		cik_enable_sdma_mgls(rdev, enable);
+	}
+
+	if (block & RADEON_CG_BLOCK_BIF) {
+		cik_enable_bif_mgls(rdev, enable);
+	}
+
+	if (block & RADEON_CG_BLOCK_UVD) {
+		if (rdev->has_uvd)
+			cik_enable_uvd_mgcg(rdev, enable);
+	}
+
+	if (block & RADEON_CG_BLOCK_HDP) {
+		cik_enable_hdp_mgcg(rdev, enable);
+		cik_enable_hdp_ls(rdev, enable);
+	}
+}
+
+static void cik_init_cg(struct radeon_device *rdev)
+{
+
+	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
+
+	if (rdev->has_uvd)
+		si_init_uvd_internal_cg(rdev);
+
+	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
+			     RADEON_CG_BLOCK_SDMA |
+			     RADEON_CG_BLOCK_BIF |
+			     RADEON_CG_BLOCK_UVD |
+			     RADEON_CG_BLOCK_HDP), true);
+}
+
+static void cik_fini_cg(struct radeon_device *rdev)
+{
+	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
+			     RADEON_CG_BLOCK_SDMA |
+			     RADEON_CG_BLOCK_BIF |
+			     RADEON_CG_BLOCK_UVD |
+			     RADEON_CG_BLOCK_HDP), false);
+
+	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
+}
+
+static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
+					  bool enable)
+{
+	u32 data, orig;
+
+	orig = data = RREG32(RLC_PG_CNTL);
+	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
+		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
+	else
+		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
+	if (orig != data)
+		WREG32(RLC_PG_CNTL, data);
+}
+
+static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
+					  bool enable)
+{
+	u32 data, orig;
+
+	orig = data = RREG32(RLC_PG_CNTL);
+	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
+		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
+	else
+		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
+	if (orig != data)
+		WREG32(RLC_PG_CNTL, data);
+}
+
+static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
+{
+	u32 data, orig;
+
+	orig = data = RREG32(RLC_PG_CNTL);
+	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
+		data &= ~DISABLE_CP_PG;
+	else
+		data |= DISABLE_CP_PG;
+	if (orig != data)
+		WREG32(RLC_PG_CNTL, data);
+}
+
+static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
+{
+	u32 data, orig;
+
+	orig = data = RREG32(RLC_PG_CNTL);
+	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
+		data &= ~DISABLE_GDS_PG;
+	else
+		data |= DISABLE_GDS_PG;
+	if (orig != data)
+		WREG32(RLC_PG_CNTL, data);
+}
+
+#define CP_ME_TABLE_SIZE    96
+#define CP_ME_TABLE_OFFSET  2048
+#define CP_MEC_TABLE_OFFSET 4096
+
+void cik_init_cp_pg_table(struct radeon_device *rdev)
+{
+	const __be32 *fw_data;
+	volatile u32 *dst_ptr;
+	int me, i, max_me = 4;
+	u32 bo_offset = 0;
+	u32 table_offset;
+
+	if (rdev->family == CHIP_KAVERI)
+		max_me = 5;
+
+	if (rdev->rlc.cp_table_ptr == NULL)
+		return;
+
+	/* write the cp table buffer */
+	dst_ptr = rdev->rlc.cp_table_ptr;
+	for (me = 0; me < max_me; me++) {
+		if (me == 0) {
+			fw_data = (const __be32 *)rdev->ce_fw->data;
+			table_offset = CP_ME_TABLE_OFFSET;
+		} else if (me == 1) {
+			fw_data = (const __be32 *)rdev->pfp_fw->data;
+			table_offset = CP_ME_TABLE_OFFSET;
+		} else if (me == 2) {
+			fw_data = (const __be32 *)rdev->me_fw->data;
+			table_offset = CP_ME_TABLE_OFFSET;
+		} else {
+			fw_data = (const __be32 *)rdev->mec_fw->data;
+			table_offset = CP_MEC_TABLE_OFFSET;
+		}
+
+		for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
+			dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
+		}
+		bo_offset += CP_ME_TABLE_SIZE;
+	}
+}
+
+static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
+				bool enable)
+{
+	u32 data, orig;
+
+	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG)) {
+		orig = data = RREG32(RLC_PG_CNTL);
+		data |= GFX_PG_ENABLE;
+		if (orig != data)
+			WREG32(RLC_PG_CNTL, data);
+
+		orig = data = RREG32(RLC_AUTO_PG_CTRL);
+		data |= AUTO_PG_EN;
+		if (orig != data)
+			WREG32(RLC_AUTO_PG_CTRL, data);
+	} else {
+		orig = data = RREG32(RLC_PG_CNTL);
+		data &= ~GFX_PG_ENABLE;
+		if (orig != data)
+			WREG32(RLC_PG_CNTL, data);
+
+		orig = data = RREG32(RLC_AUTO_PG_CTRL);
+		data &= ~AUTO_PG_EN;
+		if (orig != data)
+			WREG32(RLC_AUTO_PG_CTRL, data);
+
+		data = RREG32(DB_RENDER_CONTROL);
+	}
+}
+
+static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
+{
+	u32 mask = 0, tmp, tmp1;
+	int i;
+
+	cik_select_se_sh(rdev, se, sh);
+	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
+	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
+	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
+
+	tmp &= 0xffff0000;
+
+	tmp |= tmp1;
+	tmp >>= 16;
+
+	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
+		mask <<= 1;
+		mask |= 1;
+	}
+
+	return (~tmp) & mask;
+}
+
+static void cik_init_ao_cu_mask(struct radeon_device *rdev)
+{
+	u32 i, j, k, active_cu_number = 0;
+	u32 mask, counter, cu_bitmap;
+	u32 tmp = 0;
+
+	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
+		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
+			mask = 1;
+			cu_bitmap = 0;
+			counter = 0;
+			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
+				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
+					if (counter < 2)
+						cu_bitmap |= mask;
+					counter ++;
+				}
+				mask <<= 1;
+			}
+
+			active_cu_number += counter;
+			tmp |= (cu_bitmap << (i * 16 + j * 8));
+		}
+	}
+
+	WREG32(RLC_PG_AO_CU_MASK, tmp);
+
+	tmp = RREG32(RLC_MAX_PG_CU);
+	tmp &= ~MAX_PU_CU_MASK;
+	tmp |= MAX_PU_CU(active_cu_number);
+	WREG32(RLC_MAX_PG_CU, tmp);
+}
+
+static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
+				       bool enable)
+{
+	u32 data, orig;
+
+	orig = data = RREG32(RLC_PG_CNTL);
+	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
+		data |= STATIC_PER_CU_PG_ENABLE;
+	else
+		data &= ~STATIC_PER_CU_PG_ENABLE;
+	if (orig != data)
+		WREG32(RLC_PG_CNTL, data);
+}
+
+static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
+					bool enable)
+{
+	u32 data, orig;
+
+	orig = data = RREG32(RLC_PG_CNTL);
+	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
+		data |= DYN_PER_CU_PG_ENABLE;
+	else
+		data &= ~DYN_PER_CU_PG_ENABLE;
+	if (orig != data)
+		WREG32(RLC_PG_CNTL, data);
+}
+
+#define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
+#define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
+
+static void cik_init_gfx_cgpg(struct radeon_device *rdev)
+{
+	u32 data, orig;
+	u32 i;
+
+	if (rdev->rlc.cs_data) {
+		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
+		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
+		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
+		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
+	} else {
+		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
+		for (i = 0; i < 3; i++)
+			WREG32(RLC_GPM_SCRATCH_DATA, 0);
+	}
+	if (rdev->rlc.reg_list) {
+		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
+		for (i = 0; i < rdev->rlc.reg_list_size; i++)
+			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
+	}
+
+	orig = data = RREG32(RLC_PG_CNTL);
+	data |= GFX_PG_SRC;
+	if (orig != data)
+		WREG32(RLC_PG_CNTL, data);
+
+	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
+	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
+
+	data = RREG32(CP_RB_WPTR_POLL_CNTL);
+	data &= ~IDLE_POLL_COUNT_MASK;
+	data |= IDLE_POLL_COUNT(0x60);
+	WREG32(CP_RB_WPTR_POLL_CNTL, data);
+
+	data = 0x10101010;
+	WREG32(RLC_PG_DELAY, data);
+
+	data = RREG32(RLC_PG_DELAY_2);
+	data &= ~0xff;
+	data |= 0x3;
+	WREG32(RLC_PG_DELAY_2, data);
+
+	data = RREG32(RLC_AUTO_PG_CTRL);
+	data &= ~GRBM_REG_SGIT_MASK;
+	data |= GRBM_REG_SGIT(0x700);
+	WREG32(RLC_AUTO_PG_CTRL, data);
+
+}
+
+static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
+{
+	cik_enable_gfx_cgpg(rdev, enable);
+	cik_enable_gfx_static_mgpg(rdev, enable);
+	cik_enable_gfx_dynamic_mgpg(rdev, enable);
+}
+
+u32 cik_get_csb_size(struct radeon_device *rdev)
+{
+	u32 count = 0;
+	const struct cs_section_def *sect = NULL;
+	const struct cs_extent_def *ext = NULL;
+
+	if (rdev->rlc.cs_data == NULL)
+		return 0;
+
+	/* begin clear state */
+	count += 2;
+	/* context control state */
+	count += 3;
+
+	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
+		for (ext = sect->section; ext->extent != NULL; ++ext) {
+			if (sect->id == SECT_CONTEXT)
+				count += 2 + ext->reg_count;
+			else
+				return 0;
+		}
+	}
+	/* pa_sc_raster_config/pa_sc_raster_config1 */
+	count += 4;
+	/* end clear state */
+	count += 2;
+	/* clear state */
+	count += 2;
+
+	return count;
+}
+
+void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
+{
+	u32 count = 0, i;
+	const struct cs_section_def *sect = NULL;
+	const struct cs_extent_def *ext = NULL;
+
+	if (rdev->rlc.cs_data == NULL)
+		return;
+	if (buffer == NULL)
+		return;
+
+	buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
+	buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
+
+	buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
+	buffer[count++] = 0x80000000;
+	buffer[count++] = 0x80000000;
+
+	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
+		for (ext = sect->section; ext->extent != NULL; ++ext) {
+			if (sect->id == SECT_CONTEXT) {
+				buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
+				buffer[count++] = ext->reg_index - 0xa000;
+				for (i = 0; i < ext->reg_count; i++)
+					buffer[count++] = ext->extent[i];
+			} else {
+				return;
+			}
+		}
+	}
+
+	buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
+	buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
+	switch (rdev->family) {
+	case CHIP_BONAIRE:
+		buffer[count++] = 0x16000012;
+		buffer[count++] = 0x00000000;
+		break;
+	case CHIP_KAVERI:
+		buffer[count++] = 0x00000000; /* XXX */
+		buffer[count++] = 0x00000000;
+		break;
+	case CHIP_KABINI:
+		buffer[count++] = 0x00000000; /* XXX */
+		buffer[count++] = 0x00000000;
+		break;
+	default:
+		buffer[count++] = 0x00000000;
+		buffer[count++] = 0x00000000;
+		break;
+	}
+
+	buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
+	buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
+
+	buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
+	buffer[count++] = 0;
+}
+
+static void cik_init_pg(struct radeon_device *rdev)
+{
+	if (rdev->pg_flags) {
+		cik_enable_sck_slowdown_on_pu(rdev, true);
+		cik_enable_sck_slowdown_on_pd(rdev, true);
+		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG) {
+			cik_init_gfx_cgpg(rdev);
+			cik_enable_cp_pg(rdev, true);
+			cik_enable_gds_pg(rdev, true);
+		}
+		cik_init_ao_cu_mask(rdev);
+		cik_update_gfx_pg(rdev, true);
+	}
+}
+
+static void cik_fini_pg(struct radeon_device *rdev)
+{
+	if (rdev->pg_flags) {
+		cik_update_gfx_pg(rdev, false);
+		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG) {
+			cik_enable_cp_pg(rdev, false);
+			cik_enable_gds_pg(rdev, false);
+		}
+	}
+}
+
 /*
  * Interrupts
  * Starting with r6xx, interrupts are handled via a ring buffer.
@@ -5030,7 +5988,7 @@
 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
 
 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
-	rb_bufsz = drm_order(rdev->ih.ring_size / 4);
+	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
 
 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
 		      IH_WPTR_OVERFLOW_CLEAR |
@@ -5086,6 +6044,7 @@
 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
 	u32 grbm_int_cntl = 0;
 	u32 dma_cntl, dma_cntl1;
+	u32 thermal_int;
 
 	if (!rdev->irq.installed) {
 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
@@ -5118,6 +6077,13 @@
 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
 
+	if (rdev->flags & RADEON_IS_IGP)
+		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
+			~(THERM_INTH_MASK | THERM_INTL_MASK);
+	else
+		thermal_int = RREG32_SMC(CG_THERMAL_INT) &
+			~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
+
 	/* enable CP interrupts on all rings */
 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
@@ -5275,6 +6241,14 @@
 		hpd6 |= DC_HPDx_INT_EN;
 	}
 
+	if (rdev->irq.dpm_thermal) {
+		DRM_DEBUG("dpm thermal\n");
+		if (rdev->flags & RADEON_IS_IGP)
+			thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
+		else
+			thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
+	}
+
 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
 
 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
@@ -5309,6 +6283,11 @@
 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
 
+	if (rdev->flags & RADEON_IS_IGP)
+		WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
+	else
+		WREG32_SMC(CG_THERMAL_INT, thermal_int);
+
 	return 0;
 }
 
@@ -5520,6 +6499,7 @@
 	bool queue_hotplug = false;
 	bool queue_reset = false;
 	u32 addr, status, mc_client;
+	bool queue_thermal = false;
 
 	if (!rdev->ih.enabled || rdev->shutdown)
 		return IRQ_NONE;
@@ -5753,6 +6733,10 @@
 				break;
 			}
 			break;
+		case 124: /* UVD */
+			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
+			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
+			break;
 		case 146:
 		case 147:
 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
@@ -5870,6 +6854,19 @@
 				break;
 			}
 			break;
+		case 230: /* thermal low to high */
+			DRM_DEBUG("IH: thermal low to high\n");
+			rdev->pm.dpm.thermal.high_to_low = false;
+			queue_thermal = true;
+			break;
+		case 231: /* thermal high to low */
+			DRM_DEBUG("IH: thermal high to low\n");
+			rdev->pm.dpm.thermal.high_to_low = true;
+			queue_thermal = true;
+			break;
+		case 233: /* GUI IDLE */
+			DRM_DEBUG("IH: GUI idle\n");
+			break;
 		case 241: /* SDMA Privileged inst */
 		case 247: /* SDMA Privileged inst */
 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
@@ -5909,9 +6906,6 @@
 				break;
 			}
 			break;
-		case 233: /* GUI IDLE */
-			DRM_DEBUG("IH: GUI idle\n");
-			break;
 		default:
 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
 			break;
@@ -5925,6 +6919,8 @@
 		schedule_work(&rdev->hotplug_work);
 	if (queue_reset)
 		schedule_work(&rdev->reset_work);
+	if (queue_thermal)
+		schedule_work(&rdev->pm.dpm.thermal.work);
 	rdev->ih.rptr = rptr;
 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
 	atomic_set(&rdev->ih.lock, 0);
@@ -5954,6 +6950,18 @@
 	struct radeon_ring *ring;
 	int r;
 
+	/* enable pcie gen2/3 link */
+	cik_pcie_gen3_enable(rdev);
+	/* enable aspm */
+	cik_program_aspm(rdev);
+
+	/* scratch needs to be initialized before MC */
+	r = r600_vram_scratch_init(rdev);
+	if (r)
+		return r;
+
+	cik_mc_program(rdev);
+
 	if (rdev->flags & RADEON_IS_IGP) {
 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
@@ -5981,18 +6989,26 @@
 		}
 	}
 
-	r = r600_vram_scratch_init(rdev);
-	if (r)
-		return r;
-
-	cik_mc_program(rdev);
 	r = cik_pcie_gart_enable(rdev);
 	if (r)
 		return r;
 	cik_gpu_init(rdev);
 
 	/* allocate rlc buffers */
-	r = si_rlc_init(rdev);
+	if (rdev->flags & RADEON_IS_IGP) {
+		if (rdev->family == CHIP_KAVERI) {
+			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
+			rdev->rlc.reg_list_size =
+				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
+		} else {
+			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
+			rdev->rlc.reg_list_size =
+				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
+		}
+	}
+	rdev->rlc.cs_data = ci_cs_data;
+	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
+	r = sumo_rlc_init(rdev);
 	if (r) {
 		DRM_ERROR("Failed to init rlc BOs!\n");
 		return r;
@@ -6040,12 +7056,15 @@
 		return r;
 	}
 
-	r = cik_uvd_resume(rdev);
+	r = radeon_uvd_resume(rdev);
 	if (!r) {
-		r = radeon_fence_driver_start_ring(rdev,
-						   R600_RING_TYPE_UVD_INDEX);
-		if (r)
-			dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
+		r = uvd_v4_2_resume(rdev);
+		if (!r) {
+			r = radeon_fence_driver_start_ring(rdev,
+							   R600_RING_TYPE_UVD_INDEX);
+			if (r)
+				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
+		}
 	}
 	if (r)
 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
@@ -6068,7 +7087,7 @@
 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
 			     CP_RB0_RPTR, CP_RB0_WPTR,
-			     0, 0xfffff, RADEON_CP_PACKET2);
+			     RADEON_CP_PACKET2);
 	if (r)
 		return r;
 
@@ -6077,7 +7096,7 @@
 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
 			     CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
-			     0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
+			     PACKET3(PACKET3_NOP, 0x3FFF));
 	if (r)
 		return r;
 	ring->me = 1; /* first MEC */
@@ -6089,7 +7108,7 @@
 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
 			     CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
-			     0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
+			     PACKET3(PACKET3_NOP, 0x3FFF));
 	if (r)
 		return r;
 	/* dGPU only have 1 MEC */
@@ -6102,7 +7121,7 @@
 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
 			     SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
 			     SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
-			     2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
+			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
 	if (r)
 		return r;
 
@@ -6110,7 +7129,7 @@
 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
 			     SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
 			     SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
-			     2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
+			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
 	if (r)
 		return r;
 
@@ -6124,12 +7143,11 @@
 
 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
 	if (ring->ring_size) {
-		r = radeon_ring_init(rdev, ring, ring->ring_size,
-				     R600_WB_UVD_RPTR_OFFSET,
+		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
 				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
-				     0, 0xfffff, RADEON_CP_PACKET2);
+				     RADEON_CP_PACKET2);
 		if (!r)
-			r = r600_uvd_init(rdev);
+			r = uvd_v1_0_init(rdev);
 		if (r)
 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
 	}
@@ -6146,6 +7164,10 @@
 		return r;
 	}
 
+	r = dce6_audio_init(rdev);
+	if (r)
+		return r;
+
 	return 0;
 }
 
@@ -6191,11 +7213,14 @@
  */
 int cik_suspend(struct radeon_device *rdev)
 {
+	dce6_audio_fini(rdev);
 	radeon_vm_manager_fini(rdev);
 	cik_cp_enable(rdev, false);
 	cik_sdma_enable(rdev, false);
-	r600_uvd_rbc_stop(rdev);
+	uvd_v1_0_fini(rdev);
 	radeon_uvd_suspend(rdev);
+	cik_fini_pg(rdev);
+	cik_fini_cg(rdev);
 	cik_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	cik_pcie_gart_disable(rdev);
@@ -6316,7 +7341,7 @@
 		cik_cp_fini(rdev);
 		cik_sdma_fini(rdev);
 		cik_irq_fini(rdev);
-		si_rlc_fini(rdev);
+		sumo_rlc_fini(rdev);
 		cik_mec_fini(rdev);
 		radeon_wb_fini(rdev);
 		radeon_ib_pool_fini(rdev);
@@ -6351,13 +7376,16 @@
 {
 	cik_cp_fini(rdev);
 	cik_sdma_fini(rdev);
+	cik_fini_pg(rdev);
+	cik_fini_cg(rdev);
 	cik_irq_fini(rdev);
-	si_rlc_fini(rdev);
+	sumo_rlc_fini(rdev);
 	cik_mec_fini(rdev);
 	radeon_wb_fini(rdev);
 	radeon_vm_manager_fini(rdev);
 	radeon_ib_pool_fini(rdev);
 	radeon_irq_kms_fini(rdev);
+	uvd_v1_0_fini(rdev);
 	radeon_uvd_fini(rdev);
 	cik_pcie_gart_fini(rdev);
 	r600_vram_scratch_fini(rdev);
@@ -6386,8 +7414,8 @@
 				   struct radeon_crtc *radeon_crtc,
 				   struct drm_display_mode *mode)
 {
-	u32 tmp;
-
+	u32 tmp, buffer_alloc, i;
+	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
 	/*
 	 * Line Buffer Setup
 	 * There are 6 line buffers, one for each display controllers.
@@ -6397,22 +7425,37 @@
 	 * them using the stereo blender.
 	 */
 	if (radeon_crtc->base.enabled && mode) {
-		if (mode->crtc_hdisplay < 1920)
+		if (mode->crtc_hdisplay < 1920) {
 			tmp = 1;
-		else if (mode->crtc_hdisplay < 2560)
+			buffer_alloc = 2;
+		} else if (mode->crtc_hdisplay < 2560) {
 			tmp = 2;
-		else if (mode->crtc_hdisplay < 4096)
+			buffer_alloc = 2;
+		} else if (mode->crtc_hdisplay < 4096) {
 			tmp = 0;
-		else {
+			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
+		} else {
 			DRM_DEBUG_KMS("Mode too big for LB!\n");
 			tmp = 0;
+			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
 		}
-	} else
+	} else {
 		tmp = 1;
+		buffer_alloc = 0;
+	}
 
 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
 
+	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
+	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
+		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
+			break;
+		udelay(1);
+	}
+
 	if (radeon_crtc->base.enabled && mode) {
 		switch (tmp) {
 		case 0:
@@ -6814,7 +7857,7 @@
 				    u32 lb_size, u32 num_heads)
 {
 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
-	struct dce8_wm_params wm;
+	struct dce8_wm_params wm_low, wm_high;
 	u32 pixel_period;
 	u32 line_time = 0;
 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
@@ -6824,35 +7867,82 @@
 		pixel_period = 1000000 / (u32)mode->clock;
 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
 
-		wm.yclk = rdev->pm.current_mclk * 10;
-		wm.sclk = rdev->pm.current_sclk * 10;
-		wm.disp_clk = mode->clock;
-		wm.src_width = mode->crtc_hdisplay;
-		wm.active_time = mode->crtc_hdisplay * pixel_period;
-		wm.blank_time = line_time - wm.active_time;
-		wm.interlaced = false;
+		/* watermark for high clocks */
+		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
+		    rdev->pm.dpm_enabled) {
+			wm_high.yclk =
+				radeon_dpm_get_mclk(rdev, false) * 10;
+			wm_high.sclk =
+				radeon_dpm_get_sclk(rdev, false) * 10;
+		} else {
+			wm_high.yclk = rdev->pm.current_mclk * 10;
+			wm_high.sclk = rdev->pm.current_sclk * 10;
+		}
+
+		wm_high.disp_clk = mode->clock;
+		wm_high.src_width = mode->crtc_hdisplay;
+		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
+		wm_high.blank_time = line_time - wm_high.active_time;
+		wm_high.interlaced = false;
 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
-			wm.interlaced = true;
-		wm.vsc = radeon_crtc->vsc;
-		wm.vtaps = 1;
+			wm_high.interlaced = true;
+		wm_high.vsc = radeon_crtc->vsc;
+		wm_high.vtaps = 1;
 		if (radeon_crtc->rmx_type != RMX_OFF)
-			wm.vtaps = 2;
-		wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
-		wm.lb_size = lb_size;
-		wm.dram_channels = cik_get_number_of_dram_channels(rdev);
-		wm.num_heads = num_heads;
+			wm_high.vtaps = 2;
+		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
+		wm_high.lb_size = lb_size;
+		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
+		wm_high.num_heads = num_heads;
 
 		/* set for high clocks */
-		latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
-		/* set for low clocks */
-		/* wm.yclk = low clk; wm.sclk = low clk */
-		latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
+		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
 
 		/* possibly force display priority to high */
 		/* should really do this at mode validation time... */
-		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
-		    !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
-		    !dce8_check_latency_hiding(&wm) ||
+		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
+		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
+		    !dce8_check_latency_hiding(&wm_high) ||
+		    (rdev->disp_priority == 2)) {
+			DRM_DEBUG_KMS("force priority to high\n");
+		}
+
+		/* watermark for low clocks */
+		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
+		    rdev->pm.dpm_enabled) {
+			wm_low.yclk =
+				radeon_dpm_get_mclk(rdev, true) * 10;
+			wm_low.sclk =
+				radeon_dpm_get_sclk(rdev, true) * 10;
+		} else {
+			wm_low.yclk = rdev->pm.current_mclk * 10;
+			wm_low.sclk = rdev->pm.current_sclk * 10;
+		}
+
+		wm_low.disp_clk = mode->clock;
+		wm_low.src_width = mode->crtc_hdisplay;
+		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
+		wm_low.blank_time = line_time - wm_low.active_time;
+		wm_low.interlaced = false;
+		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+			wm_low.interlaced = true;
+		wm_low.vsc = radeon_crtc->vsc;
+		wm_low.vtaps = 1;
+		if (radeon_crtc->rmx_type != RMX_OFF)
+			wm_low.vtaps = 2;
+		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
+		wm_low.lb_size = lb_size;
+		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
+		wm_low.num_heads = num_heads;
+
+		/* set for low clocks */
+		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
+
+		/* possibly force display priority to high */
+		/* should really do this at mode validation time... */
+		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
+		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
+		    !dce8_check_latency_hiding(&wm_low) ||
 		    (rdev->disp_priority == 2)) {
 			DRM_DEBUG_KMS("force priority to high\n");
 		}
@@ -6877,6 +7967,11 @@
 		LATENCY_HIGH_WATERMARK(line_time)));
 	/* restore original selection */
 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
+
+	/* save values for DPM */
+	radeon_crtc->line_time = line_time;
+	radeon_crtc->wm_high = latency_watermark_a;
+	radeon_crtc->wm_low = latency_watermark_b;
 }
 
 /**
@@ -6966,39 +8061,307 @@
 	return r;
 }
 
-int cik_uvd_resume(struct radeon_device *rdev)
+static void cik_pcie_gen3_enable(struct radeon_device *rdev)
 {
-	uint64_t addr;
-	uint32_t size;
-	int r;
+	struct pci_dev *root = rdev->pdev->bus->self;
+	int bridge_pos, gpu_pos;
+	u32 speed_cntl, mask, current_data_rate;
+	int ret, i;
+	u16 tmp16;
 
-	r = radeon_uvd_resume(rdev);
-	if (r)
-		return r;
+	if (radeon_pcie_gen2 == 0)
+		return;
 
-	/* programm the VCPU memory controller bits 0-27 */
-	addr = rdev->uvd.gpu_addr >> 3;
-	size = RADEON_GPU_PAGE_ALIGN(rdev->uvd.fw_size + 4) >> 3;
-	WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
-	WREG32(UVD_VCPU_CACHE_SIZE0, size);
+	if (rdev->flags & RADEON_IS_IGP)
+		return;
 
-	addr += size;
-	size = RADEON_UVD_STACK_SIZE >> 3;
-	WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
-	WREG32(UVD_VCPU_CACHE_SIZE1, size);
+	if (!(rdev->flags & RADEON_IS_PCIE))
+		return;
 
-	addr += size;
-	size = RADEON_UVD_HEAP_SIZE >> 3;
-	WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
-	WREG32(UVD_VCPU_CACHE_SIZE2, size);
+	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
+	if (ret != 0)
+		return;
 
-	/* bits 28-31 */
-	addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
-	WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
+	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
+		return;
 
-	/* bits 32-39 */
-	addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
-	WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
+	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
+	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
+		LC_CURRENT_DATA_RATE_SHIFT;
+	if (mask & DRM_PCIE_SPEED_80) {
+		if (current_data_rate == 2) {
+			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
+			return;
+		}
+		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
+	} else if (mask & DRM_PCIE_SPEED_50) {
+		if (current_data_rate == 1) {
+			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
+			return;
+		}
+		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
+	}
 
-	return 0;
+	bridge_pos = pci_pcie_cap(root);
+	if (!bridge_pos)
+		return;
+
+	gpu_pos = pci_pcie_cap(rdev->pdev);
+	if (!gpu_pos)
+		return;
+
+	if (mask & DRM_PCIE_SPEED_80) {
+		/* re-try equalization if gen3 is not already enabled */
+		if (current_data_rate != 2) {
+			u16 bridge_cfg, gpu_cfg;
+			u16 bridge_cfg2, gpu_cfg2;
+			u32 max_lw, current_lw, tmp;
+
+			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
+			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+
+			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
+			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+
+			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
+			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+
+			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
+			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
+			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
+
+			if (current_lw < max_lw) {
+				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
+				if (tmp & LC_RENEGOTIATION_SUPPORT) {
+					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
+					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
+					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
+					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
+				}
+			}
+
+			for (i = 0; i < 10; i++) {
+				/* check status */
+				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
+				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
+					break;
+
+				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
+				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+
+				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
+				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
+
+				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
+				tmp |= LC_SET_QUIESCE;
+				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+
+				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
+				tmp |= LC_REDO_EQ;
+				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+
+				mdelay(100);
+
+				/* linkctl */
+				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
+				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
+				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
+				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+
+				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
+				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
+				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
+				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+
+				/* linkctl2 */
+				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
+				tmp16 &= ~((1 << 4) | (7 << 9));
+				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
+				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
+
+				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
+				tmp16 &= ~((1 << 4) | (7 << 9));
+				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
+				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+
+				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
+				tmp &= ~LC_SET_QUIESCE;
+				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
+			}
+		}
+	}
+
+	/* set the link speed */
+	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
+	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
+	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
+
+	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
+	tmp16 &= ~0xf;
+	if (mask & DRM_PCIE_SPEED_80)
+		tmp16 |= 3; /* gen3 */
+	else if (mask & DRM_PCIE_SPEED_50)
+		tmp16 |= 2; /* gen2 */
+	else
+		tmp16 |= 1; /* gen1 */
+	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+
+	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
+	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
+	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
+
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
+		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
+			break;
+		udelay(1);
+	}
+}
+
+static void cik_program_aspm(struct radeon_device *rdev)
+{
+	u32 data, orig;
+	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
+	bool disable_clkreq = false;
+
+	if (radeon_aspm == 0)
+		return;
+
+	/* XXX double check IGPs */
+	if (rdev->flags & RADEON_IS_IGP)
+		return;
+
+	if (!(rdev->flags & RADEON_IS_PCIE))
+		return;
+
+	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
+	data &= ~LC_XMIT_N_FTS_MASK;
+	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
+	if (orig != data)
+		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
+
+	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
+	data |= LC_GO_TO_RECOVERY;
+	if (orig != data)
+		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
+
+	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
+	data |= P_IGNORE_EDB_ERR;
+	if (orig != data)
+		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
+
+	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
+	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
+	data |= LC_PMI_TO_L1_DIS;
+	if (!disable_l0s)
+		data |= LC_L0S_INACTIVITY(7);
+
+	if (!disable_l1) {
+		data |= LC_L1_INACTIVITY(7);
+		data &= ~LC_PMI_TO_L1_DIS;
+		if (orig != data)
+			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+
+		if (!disable_plloff_in_l1) {
+			bool clk_req_support;
+
+			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
+			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
+			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
+			if (orig != data)
+				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
+
+			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
+			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
+			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
+			if (orig != data)
+				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
+
+			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
+			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
+			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
+			if (orig != data)
+				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
+
+			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
+			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
+			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
+			if (orig != data)
+				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
+
+			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
+			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
+			data |= LC_DYN_LANES_PWR_STATE(3);
+			if (orig != data)
+				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
+
+			if (!disable_clkreq) {
+				struct pci_dev *root = rdev->pdev->bus->self;
+				u32 lnkcap;
+
+				clk_req_support = false;
+				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
+				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
+					clk_req_support = true;
+			} else {
+				clk_req_support = false;
+			}
+
+			if (clk_req_support) {
+				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
+				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
+				if (orig != data)
+					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
+
+				orig = data = RREG32_SMC(THM_CLK_CNTL);
+				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
+				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
+				if (orig != data)
+					WREG32_SMC(THM_CLK_CNTL, data);
+
+				orig = data = RREG32_SMC(MISC_CLK_CTRL);
+				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
+				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
+				if (orig != data)
+					WREG32_SMC(MISC_CLK_CTRL, data);
+
+				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
+				data &= ~BCLK_AS_XCLK;
+				if (orig != data)
+					WREG32_SMC(CG_CLKPIN_CNTL, data);
+
+				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
+				data &= ~FORCE_BIF_REFCLK_EN;
+				if (orig != data)
+					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
+
+				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
+				data &= ~MPLL_CLKOUT_SEL_MASK;
+				data |= MPLL_CLKOUT_SEL(4);
+				if (orig != data)
+					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
+			}
+		}
+	} else {
+		if (orig != data)
+			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+	}
+
+	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
+	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
+	if (orig != data)
+		WREG32_PCIE_PORT(PCIE_CNTL2, data);
+
+	if (!disable_l0s) {
+		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
+		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
+			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
+			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
+				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
+				data &= ~LC_L0S_INACTIVITY_MASK;
+				if (orig != data)
+					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
+			}
+		}
+	}
 }
diff --git a/drivers/gpu/drm/radeon/cik_reg.h b/drivers/gpu/drm/radeon/cik_reg.h
index d71e46d..ca1bb61 100644
--- a/drivers/gpu/drm/radeon/cik_reg.h
+++ b/drivers/gpu/drm/radeon/cik_reg.h
@@ -24,6 +24,9 @@
 #ifndef __CIK_REG_H__
 #define __CIK_REG_H__
 
+#define CIK_DIDT_IND_INDEX                        0xca00
+#define CIK_DIDT_IND_DATA                         0xca04
+
 #define CIK_DC_GPIO_HPD_MASK                      0x65b0
 #define CIK_DC_GPIO_HPD_A                         0x65b4
 #define CIK_DC_GPIO_HPD_EN                        0x65b8
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c
new file mode 100644
index 0000000..b628606
--- /dev/null
+++ b/drivers/gpu/drm/radeon/cik_sdma.c
@@ -0,0 +1,785 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Alex Deucher
+ */
+#include <linux/firmware.h>
+#include <drm/drmP.h>
+#include "radeon.h"
+#include "radeon_asic.h"
+#include "cikd.h"
+
+/* sdma */
+#define CIK_SDMA_UCODE_SIZE 1050
+#define CIK_SDMA_UCODE_VERSION 64
+
+u32 cik_gpu_check_soft_reset(struct radeon_device *rdev);
+
+/*
+ * sDMA - System DMA
+ * Starting with CIK, the GPU has new asynchronous
+ * DMA engines.  These engines are used for compute
+ * and gfx.  There are two DMA engines (SDMA0, SDMA1)
+ * and each one supports 1 ring buffer used for gfx
+ * and 2 queues used for compute.
+ *
+ * The programming model is very similar to the CP
+ * (ring buffer, IBs, etc.), but sDMA has it's own
+ * packet format that is different from the PM4 format
+ * used by the CP. sDMA supports copying data, writing
+ * embedded data, solid fills, and a number of other
+ * things.  It also has support for tiling/detiling of
+ * buffers.
+ */
+
+/**
+ * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @ib: IB object to schedule
+ *
+ * Schedule an IB in the DMA ring (CIK).
+ */
+void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
+			      struct radeon_ib *ib)
+{
+	struct radeon_ring *ring = &rdev->ring[ib->ring];
+	u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
+
+	if (rdev->wb.enabled) {
+		u32 next_rptr = ring->wptr + 5;
+		while ((next_rptr & 7) != 4)
+			next_rptr++;
+		next_rptr += 4;
+		radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
+		radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
+		radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
+		radeon_ring_write(ring, 1); /* number of DWs to follow */
+		radeon_ring_write(ring, next_rptr);
+	}
+
+	/* IB packet must end on a 8 DW boundary */
+	while ((ring->wptr & 7) != 4)
+		radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
+	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
+	radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
+	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
+	radeon_ring_write(ring, ib->length_dw);
+
+}
+
+/**
+ * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
+ *
+ * @rdev: radeon_device pointer
+ * @fence: radeon fence object
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed (CIK).
+ */
+void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
+			      struct radeon_fence *fence)
+{
+	struct radeon_ring *ring = &rdev->ring[fence->ring];
+	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
+	u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
+			  SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
+	u32 ref_and_mask;
+
+	if (fence->ring == R600_RING_TYPE_DMA_INDEX)
+		ref_and_mask = SDMA0;
+	else
+		ref_and_mask = SDMA1;
+
+	/* write the fence */
+	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
+	radeon_ring_write(ring, addr & 0xffffffff);
+	radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+	radeon_ring_write(ring, fence->seq);
+	/* generate an interrupt */
+	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
+	/* flush HDP */
+	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
+	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
+	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
+	radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
+	radeon_ring_write(ring, ref_and_mask); /* MASK */
+	radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
+}
+
+/**
+ * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ * @semaphore: radeon semaphore object
+ * @emit_wait: wait or signal semaphore
+ *
+ * Add a DMA semaphore packet to the ring wait on or signal
+ * other rings (CIK).
+ */
+void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
+				  struct radeon_ring *ring,
+				  struct radeon_semaphore *semaphore,
+				  bool emit_wait)
+{
+	u64 addr = semaphore->gpu_addr;
+	u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
+
+	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
+	radeon_ring_write(ring, addr & 0xfffffff8);
+	radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+}
+
+/**
+ * cik_sdma_gfx_stop - stop the gfx async dma engines
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Stop the gfx async dma ring buffers (CIK).
+ */
+static void cik_sdma_gfx_stop(struct radeon_device *rdev)
+{
+	u32 rb_cntl, reg_offset;
+	int i;
+
+	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
+
+	for (i = 0; i < 2; i++) {
+		if (i == 0)
+			reg_offset = SDMA0_REGISTER_OFFSET;
+		else
+			reg_offset = SDMA1_REGISTER_OFFSET;
+		rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
+		rb_cntl &= ~SDMA_RB_ENABLE;
+		WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
+		WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
+	}
+}
+
+/**
+ * cik_sdma_rlc_stop - stop the compute async dma engines
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Stop the compute async dma queues (CIK).
+ */
+static void cik_sdma_rlc_stop(struct radeon_device *rdev)
+{
+	/* XXX todo */
+}
+
+/**
+ * cik_sdma_enable - stop the async dma engines
+ *
+ * @rdev: radeon_device pointer
+ * @enable: enable/disable the DMA MEs.
+ *
+ * Halt or unhalt the async dma engines (CIK).
+ */
+void cik_sdma_enable(struct radeon_device *rdev, bool enable)
+{
+	u32 me_cntl, reg_offset;
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		if (i == 0)
+			reg_offset = SDMA0_REGISTER_OFFSET;
+		else
+			reg_offset = SDMA1_REGISTER_OFFSET;
+		me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
+		if (enable)
+			me_cntl &= ~SDMA_HALT;
+		else
+			me_cntl |= SDMA_HALT;
+		WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
+	}
+}
+
+/**
+ * cik_sdma_gfx_resume - setup and start the async dma engines
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Set up the gfx DMA ring buffers and enable them (CIK).
+ * Returns 0 for success, error for failure.
+ */
+static int cik_sdma_gfx_resume(struct radeon_device *rdev)
+{
+	struct radeon_ring *ring;
+	u32 rb_cntl, ib_cntl;
+	u32 rb_bufsz;
+	u32 reg_offset, wb_offset;
+	int i, r;
+
+	for (i = 0; i < 2; i++) {
+		if (i == 0) {
+			ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+			reg_offset = SDMA0_REGISTER_OFFSET;
+			wb_offset = R600_WB_DMA_RPTR_OFFSET;
+		} else {
+			ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
+			reg_offset = SDMA1_REGISTER_OFFSET;
+			wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
+		}
+
+		WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
+		WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
+
+		/* Set ring buffer size in dwords */
+		rb_bufsz = order_base_2(ring->ring_size / 4);
+		rb_cntl = rb_bufsz << 1;
+#ifdef __BIG_ENDIAN
+		rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
+#endif
+		WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
+
+		/* Initialize the ring buffer's read and write pointers */
+		WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
+		WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
+
+		/* set the wb address whether it's enabled or not */
+		WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
+		       upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
+		WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
+		       ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
+
+		if (rdev->wb.enabled)
+			rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
+
+		WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
+		WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
+
+		ring->wptr = 0;
+		WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
+
+		ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
+
+		/* enable DMA RB */
+		WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
+
+		ib_cntl = SDMA_IB_ENABLE;
+#ifdef __BIG_ENDIAN
+		ib_cntl |= SDMA_IB_SWAP_ENABLE;
+#endif
+		/* enable DMA IBs */
+		WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
+
+		ring->ready = true;
+
+		r = radeon_ring_test(rdev, ring->idx, ring);
+		if (r) {
+			ring->ready = false;
+			return r;
+		}
+	}
+
+	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
+
+	return 0;
+}
+
+/**
+ * cik_sdma_rlc_resume - setup and start the async dma engines
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Set up the compute DMA queues and enable them (CIK).
+ * Returns 0 for success, error for failure.
+ */
+static int cik_sdma_rlc_resume(struct radeon_device *rdev)
+{
+	/* XXX todo */
+	return 0;
+}
+
+/**
+ * cik_sdma_load_microcode - load the sDMA ME ucode
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Loads the sDMA0/1 ucode.
+ * Returns 0 for success, -EINVAL if the ucode is not available.
+ */
+static int cik_sdma_load_microcode(struct radeon_device *rdev)
+{
+	const __be32 *fw_data;
+	int i;
+
+	if (!rdev->sdma_fw)
+		return -EINVAL;
+
+	/* stop the gfx rings and rlc compute queues */
+	cik_sdma_gfx_stop(rdev);
+	cik_sdma_rlc_stop(rdev);
+
+	/* halt the MEs */
+	cik_sdma_enable(rdev, false);
+
+	/* sdma0 */
+	fw_data = (const __be32 *)rdev->sdma_fw->data;
+	WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
+	for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
+		WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
+	WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
+
+	/* sdma1 */
+	fw_data = (const __be32 *)rdev->sdma_fw->data;
+	WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
+	for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
+		WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
+	WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
+
+	WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
+	WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
+	return 0;
+}
+
+/**
+ * cik_sdma_resume - setup and start the async dma engines
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Set up the DMA engines and enable them (CIK).
+ * Returns 0 for success, error for failure.
+ */
+int cik_sdma_resume(struct radeon_device *rdev)
+{
+	int r;
+
+	/* Reset dma */
+	WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
+	RREG32(SRBM_SOFT_RESET);
+	udelay(50);
+	WREG32(SRBM_SOFT_RESET, 0);
+	RREG32(SRBM_SOFT_RESET);
+
+	r = cik_sdma_load_microcode(rdev);
+	if (r)
+		return r;
+
+	/* unhalt the MEs */
+	cik_sdma_enable(rdev, true);
+
+	/* start the gfx rings and rlc compute queues */
+	r = cik_sdma_gfx_resume(rdev);
+	if (r)
+		return r;
+	r = cik_sdma_rlc_resume(rdev);
+	if (r)
+		return r;
+
+	return 0;
+}
+
+/**
+ * cik_sdma_fini - tear down the async dma engines
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Stop the async dma engines and free the rings (CIK).
+ */
+void cik_sdma_fini(struct radeon_device *rdev)
+{
+	/* stop the gfx rings and rlc compute queues */
+	cik_sdma_gfx_stop(rdev);
+	cik_sdma_rlc_stop(rdev);
+	/* halt the MEs */
+	cik_sdma_enable(rdev, false);
+	radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
+	radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
+	/* XXX - compute dma queue tear down */
+}
+
+/**
+ * cik_copy_dma - copy pages using the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @num_gpu_pages: number of GPU pages to xfer
+ * @fence: radeon fence object
+ *
+ * Copy GPU paging using the DMA engine (CIK).
+ * Used by the radeon ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+int cik_copy_dma(struct radeon_device *rdev,
+		 uint64_t src_offset, uint64_t dst_offset,
+		 unsigned num_gpu_pages,
+		 struct radeon_fence **fence)
+{
+	struct radeon_semaphore *sem = NULL;
+	int ring_index = rdev->asic->copy.dma_ring_index;
+	struct radeon_ring *ring = &rdev->ring[ring_index];
+	u32 size_in_bytes, cur_size_in_bytes;
+	int i, num_loops;
+	int r = 0;
+
+	r = radeon_semaphore_create(rdev, &sem);
+	if (r) {
+		DRM_ERROR("radeon: moving bo (%d).\n", r);
+		return r;
+	}
+
+	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
+	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
+	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
+	if (r) {
+		DRM_ERROR("radeon: moving bo (%d).\n", r);
+		radeon_semaphore_free(rdev, &sem, NULL);
+		return r;
+	}
+
+	if (radeon_fence_need_sync(*fence, ring->idx)) {
+		radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
+					    ring->idx);
+		radeon_fence_note_sync(*fence, ring->idx);
+	} else {
+		radeon_semaphore_free(rdev, &sem, NULL);
+	}
+
+	for (i = 0; i < num_loops; i++) {
+		cur_size_in_bytes = size_in_bytes;
+		if (cur_size_in_bytes > 0x1fffff)
+			cur_size_in_bytes = 0x1fffff;
+		size_in_bytes -= cur_size_in_bytes;
+		radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
+		radeon_ring_write(ring, cur_size_in_bytes);
+		radeon_ring_write(ring, 0); /* src/dst endian swap */
+		radeon_ring_write(ring, src_offset & 0xffffffff);
+		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
+		radeon_ring_write(ring, dst_offset & 0xfffffffc);
+		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
+		src_offset += cur_size_in_bytes;
+		dst_offset += cur_size_in_bytes;
+	}
+
+	r = radeon_fence_emit(rdev, fence, ring->idx);
+	if (r) {
+		radeon_ring_unlock_undo(rdev, ring);
+		return r;
+	}
+
+	radeon_ring_unlock_commit(rdev, ring);
+	radeon_semaphore_free(rdev, &sem, *fence);
+
+	return r;
+}
+
+/**
+ * cik_sdma_ring_test - simple async dma engine test
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Test the DMA engine by writing using it to write an
+ * value to memory. (CIK).
+ * Returns 0 for success, error for failure.
+ */
+int cik_sdma_ring_test(struct radeon_device *rdev,
+		       struct radeon_ring *ring)
+{
+	unsigned i;
+	int r;
+	void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
+	u32 tmp;
+
+	if (!ptr) {
+		DRM_ERROR("invalid vram scratch pointer\n");
+		return -EINVAL;
+	}
+
+	tmp = 0xCAFEDEAD;
+	writel(tmp, ptr);
+
+	r = radeon_ring_lock(rdev, ring, 4);
+	if (r) {
+		DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
+		return r;
+	}
+	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
+	radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
+	radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
+	radeon_ring_write(ring, 1); /* number of DWs to follow */
+	radeon_ring_write(ring, 0xDEADBEEF);
+	radeon_ring_unlock_commit(rdev, ring);
+
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		tmp = readl(ptr);
+		if (tmp == 0xDEADBEEF)
+			break;
+		DRM_UDELAY(1);
+	}
+
+	if (i < rdev->usec_timeout) {
+		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
+	} else {
+		DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
+			  ring->idx, tmp);
+		r = -EINVAL;
+	}
+	return r;
+}
+
+/**
+ * cik_sdma_ib_test - test an IB on the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Test a simple IB in the DMA ring (CIK).
+ * Returns 0 on success, error on failure.
+ */
+int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+	struct radeon_ib ib;
+	unsigned i;
+	int r;
+	void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
+	u32 tmp = 0;
+
+	if (!ptr) {
+		DRM_ERROR("invalid vram scratch pointer\n");
+		return -EINVAL;
+	}
+
+	tmp = 0xCAFEDEAD;
+	writel(tmp, ptr);
+
+	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
+	if (r) {
+		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
+		return r;
+	}
+
+	ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
+	ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
+	ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
+	ib.ptr[3] = 1;
+	ib.ptr[4] = 0xDEADBEEF;
+	ib.length_dw = 5;
+
+	r = radeon_ib_schedule(rdev, &ib, NULL);
+	if (r) {
+		radeon_ib_free(rdev, &ib);
+		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
+		return r;
+	}
+	r = radeon_fence_wait(ib.fence, false);
+	if (r) {
+		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
+		return r;
+	}
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		tmp = readl(ptr);
+		if (tmp == 0xDEADBEEF)
+			break;
+		DRM_UDELAY(1);
+	}
+	if (i < rdev->usec_timeout) {
+		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
+	} else {
+		DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
+		r = -EINVAL;
+	}
+	radeon_ib_free(rdev, &ib);
+	return r;
+}
+
+/**
+ * cik_sdma_is_lockup - Check if the DMA engine is locked up
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Check if the async DMA engine is locked up (CIK).
+ * Returns true if the engine appears to be locked up, false if not.
+ */
+bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
+	u32 mask;
+
+	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
+		mask = RADEON_RESET_DMA;
+	else
+		mask = RADEON_RESET_DMA1;
+
+	if (!(reset_mask & mask)) {
+		radeon_ring_lockup_update(ring);
+		return false;
+	}
+	/* force ring activities */
+	radeon_ring_force_activity(rdev, ring);
+	return radeon_ring_test_lockup(rdev, ring);
+}
+
+/**
+ * cik_sdma_vm_set_page - update the page tables using sDMA
+ *
+ * @rdev: radeon_device pointer
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: access flags
+ *
+ * Update the page tables using sDMA (CIK).
+ */
+void cik_sdma_vm_set_page(struct radeon_device *rdev,
+			  struct radeon_ib *ib,
+			  uint64_t pe,
+			  uint64_t addr, unsigned count,
+			  uint32_t incr, uint32_t flags)
+{
+	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
+	uint64_t value;
+	unsigned ndw;
+
+	if (flags & RADEON_VM_PAGE_SYSTEM) {
+		while (count) {
+			ndw = count * 2;
+			if (ndw > 0xFFFFE)
+				ndw = 0xFFFFE;
+
+			/* for non-physically contiguous pages (system) */
+			ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
+			ib->ptr[ib->length_dw++] = pe;
+			ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+			ib->ptr[ib->length_dw++] = ndw;
+			for (; ndw > 0; ndw -= 2, --count, pe += 8) {
+				if (flags & RADEON_VM_PAGE_SYSTEM) {
+					value = radeon_vm_map_gart(rdev, addr);
+					value &= 0xFFFFFFFFFFFFF000ULL;
+				} else if (flags & RADEON_VM_PAGE_VALID) {
+					value = addr;
+				} else {
+					value = 0;
+				}
+				addr += incr;
+				value |= r600_flags;
+				ib->ptr[ib->length_dw++] = value;
+				ib->ptr[ib->length_dw++] = upper_32_bits(value);
+			}
+		}
+	} else {
+		while (count) {
+			ndw = count;
+			if (ndw > 0x7FFFF)
+				ndw = 0x7FFFF;
+
+			if (flags & RADEON_VM_PAGE_VALID)
+				value = addr;
+			else
+				value = 0;
+			/* for physically contiguous pages (vram) */
+			ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
+			ib->ptr[ib->length_dw++] = pe; /* dst addr */
+			ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+			ib->ptr[ib->length_dw++] = r600_flags; /* mask */
+			ib->ptr[ib->length_dw++] = 0;
+			ib->ptr[ib->length_dw++] = value; /* value */
+			ib->ptr[ib->length_dw++] = upper_32_bits(value);
+			ib->ptr[ib->length_dw++] = incr; /* increment size */
+			ib->ptr[ib->length_dw++] = 0;
+			ib->ptr[ib->length_dw++] = ndw; /* number of entries */
+			pe += ndw * 8;
+			addr += ndw * incr;
+			count -= ndw;
+		}
+	}
+	while (ib->length_dw & 0x7)
+		ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
+}
+
+/**
+ * cik_dma_vm_flush - cik vm flush using sDMA
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Update the page table base and flush the VM TLB
+ * using sDMA (CIK).
+ */
+void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+{
+	struct radeon_ring *ring = &rdev->ring[ridx];
+	u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
+			  SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
+	u32 ref_and_mask;
+
+	if (vm == NULL)
+		return;
+
+	if (ridx == R600_RING_TYPE_DMA_INDEX)
+		ref_and_mask = SDMA0;
+	else
+		ref_and_mask = SDMA1;
+
+	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+	if (vm->id < 8) {
+		radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
+	} else {
+		radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
+	}
+	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+
+	/* update SH_MEM_* regs */
+	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
+	radeon_ring_write(ring, VMID(vm->id));
+
+	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+	radeon_ring_write(ring, SH_MEM_BASES >> 2);
+	radeon_ring_write(ring, 0);
+
+	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+	radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
+	radeon_ring_write(ring, 0);
+
+	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+	radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
+	radeon_ring_write(ring, 1);
+
+	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+	radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
+	radeon_ring_write(ring, 0);
+
+	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
+	radeon_ring_write(ring, VMID(0));
+
+	/* flush HDP */
+	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
+	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
+	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
+	radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
+	radeon_ring_write(ring, ref_and_mask); /* MASK */
+	radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
+
+	/* flush TLB */
+	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
+	radeon_ring_write(ring, 1 << vm->id);
+}
+
diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h
index 7e9275e..203d2a0 100644
--- a/drivers/gpu/drm/radeon/cikd.h
+++ b/drivers/gpu/drm/radeon/cikd.h
@@ -28,21 +28,375 @@
 
 #define CIK_RB_BITMAP_WIDTH_PER_SH  2
 
-/* SMC IND registers */
-#define GENERAL_PWRMGT                                    0xC0200000
-#       define GPU_COUNTER_CLK                            (1 << 15)
+/* DIDT IND registers */
+#define DIDT_SQ_CTRL0                                     0x0
+#       define DIDT_CTRL_EN                               (1 << 0)
+#define DIDT_DB_CTRL0                                     0x20
+#define DIDT_TD_CTRL0                                     0x40
+#define DIDT_TCP_CTRL0                                    0x60
 
+/* SMC IND registers */
+#define DPM_TABLE_475                                     0x3F768
+#       define SamuBootLevel(x)                           ((x) << 0)
+#       define SamuBootLevel_MASK                         0x000000ff
+#       define SamuBootLevel_SHIFT                        0
+#       define AcpBootLevel(x)                            ((x) << 8)
+#       define AcpBootLevel_MASK                          0x0000ff00
+#       define AcpBootLevel_SHIFT                         8
+#       define VceBootLevel(x)                            ((x) << 16)
+#       define VceBootLevel_MASK                          0x00ff0000
+#       define VceBootLevel_SHIFT                         16
+#       define UvdBootLevel(x)                            ((x) << 24)
+#       define UvdBootLevel_MASK                          0xff000000
+#       define UvdBootLevel_SHIFT                         24
+
+#define FIRMWARE_FLAGS                                    0x3F800
+#       define INTERRUPTS_ENABLED                         (1 << 0)
+
+#define NB_DPM_CONFIG_1                                   0x3F9E8
+#       define Dpm0PgNbPsLo(x)                            ((x) << 0)
+#       define Dpm0PgNbPsLo_MASK                          0x000000ff
+#       define Dpm0PgNbPsLo_SHIFT                         0
+#       define Dpm0PgNbPsHi(x)                            ((x) << 8)
+#       define Dpm0PgNbPsHi_MASK                          0x0000ff00
+#       define Dpm0PgNbPsHi_SHIFT                         8
+#       define DpmXNbPsLo(x)                              ((x) << 16)
+#       define DpmXNbPsLo_MASK                            0x00ff0000
+#       define DpmXNbPsLo_SHIFT                           16
+#       define DpmXNbPsHi(x)                              ((x) << 24)
+#       define DpmXNbPsHi_MASK                            0xff000000
+#       define DpmXNbPsHi_SHIFT                           24
+
+#define	SMC_SYSCON_RESET_CNTL				0x80000000
+#       define RST_REG                                  (1 << 0)
+#define	SMC_SYSCON_CLOCK_CNTL_0				0x80000004
+#       define CK_DISABLE                               (1 << 0)
+#       define CKEN                                     (1 << 24)
+
+#define	SMC_SYSCON_MISC_CNTL				0x80000010
+
+#define SMC_SYSCON_MSG_ARG_0                              0x80000068
+
+#define SMC_PC_C                                          0x80000370
+
+#define SMC_SCRATCH9                                      0x80000424
+
+#define RCU_UC_EVENTS                                     0xC0000004
+#       define BOOT_SEQ_DONE                              (1 << 7)
+
+#define GENERAL_PWRMGT                                    0xC0200000
+#       define GLOBAL_PWRMGT_EN                           (1 << 0)
+#       define STATIC_PM_EN                               (1 << 1)
+#       define THERMAL_PROTECTION_DIS                     (1 << 2)
+#       define THERMAL_PROTECTION_TYPE                    (1 << 3)
+#       define SW_SMIO_INDEX(x)                           ((x) << 6)
+#       define SW_SMIO_INDEX_MASK                         (1 << 6)
+#       define SW_SMIO_INDEX_SHIFT                        6
+#       define VOLT_PWRMGT_EN                             (1 << 10)
+#       define GPU_COUNTER_CLK                            (1 << 15)
+#       define DYN_SPREAD_SPECTRUM_EN                     (1 << 23)
+
+#define CNB_PWRMGT_CNTL                                   0xC0200004
+#       define GNB_SLOW_MODE(x)                           ((x) << 0)
+#       define GNB_SLOW_MODE_MASK                         (3 << 0)
+#       define GNB_SLOW_MODE_SHIFT                        0
+#       define GNB_SLOW                                   (1 << 2)
+#       define FORCE_NB_PS1                               (1 << 3)
+#       define DPM_ENABLED                                (1 << 4)
+
+#define SCLK_PWRMGT_CNTL                                  0xC0200008
+#       define SCLK_PWRMGT_OFF                            (1 << 0)
+#       define RESET_BUSY_CNT                             (1 << 4)
+#       define RESET_SCLK_CNT                             (1 << 5)
+#       define DYNAMIC_PM_EN                              (1 << 21)
+
+#define TARGET_AND_CURRENT_PROFILE_INDEX                  0xC0200014
+#       define CURRENT_STATE_MASK                         (0xf << 4)
+#       define CURRENT_STATE_SHIFT                        4
+#       define CURR_MCLK_INDEX_MASK                       (0xf << 8)
+#       define CURR_MCLK_INDEX_SHIFT                      8
+#       define CURR_SCLK_INDEX_MASK                       (0x1f << 16)
+#       define CURR_SCLK_INDEX_SHIFT                      16
+
+#define CG_SSP                                            0xC0200044
+#       define SST(x)                                     ((x) << 0)
+#       define SST_MASK                                   (0xffff << 0)
+#       define SSTU(x)                                    ((x) << 16)
+#       define SSTU_MASK                                  (0xf << 16)
+
+#define CG_DISPLAY_GAP_CNTL                               0xC0200060
+#       define DISP_GAP(x)                                ((x) << 0)
+#       define DISP_GAP_MASK                              (3 << 0)
+#       define VBI_TIMER_COUNT(x)                         ((x) << 4)
+#       define VBI_TIMER_COUNT_MASK                       (0x3fff << 4)
+#       define VBI_TIMER_UNIT(x)                          ((x) << 20)
+#       define VBI_TIMER_UNIT_MASK                        (7 << 20)
+#       define DISP_GAP_MCHG(x)                           ((x) << 24)
+#       define DISP_GAP_MCHG_MASK                         (3 << 24)
+
+#define SMU_VOLTAGE_STATUS                                0xC0200094
+#       define SMU_VOLTAGE_CURRENT_LEVEL_MASK             (0xff << 1)
+#       define SMU_VOLTAGE_CURRENT_LEVEL_SHIFT            1
+
+#define TARGET_AND_CURRENT_PROFILE_INDEX_1                0xC02000F0
+#       define CURR_PCIE_INDEX_MASK                       (0xf << 24)
+#       define CURR_PCIE_INDEX_SHIFT                      24
+
+#define CG_ULV_PARAMETER                                  0xC0200158
+
+#define CG_FTV_0                                          0xC02001A8
+#define CG_FTV_1                                          0xC02001AC
+#define CG_FTV_2                                          0xC02001B0
+#define CG_FTV_3                                          0xC02001B4
+#define CG_FTV_4                                          0xC02001B8
+#define CG_FTV_5                                          0xC02001BC
+#define CG_FTV_6                                          0xC02001C0
+#define CG_FTV_7                                          0xC02001C4
+
+#define CG_DISPLAY_GAP_CNTL2                              0xC0200230
+
+#define LCAC_SX0_OVR_SEL                                  0xC0400D04
+#define LCAC_SX0_OVR_VAL                                  0xC0400D08
+
+#define LCAC_MC0_CNTL                                     0xC0400D30
+#define LCAC_MC0_OVR_SEL                                  0xC0400D34
+#define LCAC_MC0_OVR_VAL                                  0xC0400D38
+#define LCAC_MC1_CNTL                                     0xC0400D3C
+#define LCAC_MC1_OVR_SEL                                  0xC0400D40
+#define LCAC_MC1_OVR_VAL                                  0xC0400D44
+
+#define LCAC_MC2_OVR_SEL                                  0xC0400D4C
+#define LCAC_MC2_OVR_VAL                                  0xC0400D50
+
+#define LCAC_MC3_OVR_SEL                                  0xC0400D58
+#define LCAC_MC3_OVR_VAL                                  0xC0400D5C
+
+#define LCAC_CPL_CNTL                                     0xC0400D80
+#define LCAC_CPL_OVR_SEL                                  0xC0400D84
+#define LCAC_CPL_OVR_VAL                                  0xC0400D88
+
+/* dGPU */
+#define	CG_THERMAL_CTRL					0xC0300004
+#define 	DPM_EVENT_SRC(x)			((x) << 0)
+#define 	DPM_EVENT_SRC_MASK			(7 << 0)
+#define		DIG_THERM_DPM(x)			((x) << 14)
+#define		DIG_THERM_DPM_MASK			0x003FC000
+#define		DIG_THERM_DPM_SHIFT			14
+
+#define	CG_THERMAL_INT					0xC030000C
+#define		CI_DIG_THERM_INTH(x)			((x) << 8)
+#define		CI_DIG_THERM_INTH_MASK			0x0000FF00
+#define		CI_DIG_THERM_INTH_SHIFT			8
+#define		CI_DIG_THERM_INTL(x)			((x) << 16)
+#define		CI_DIG_THERM_INTL_MASK			0x00FF0000
+#define		CI_DIG_THERM_INTL_SHIFT			16
+#define 	THERM_INT_MASK_HIGH			(1 << 24)
+#define 	THERM_INT_MASK_LOW			(1 << 25)
+
+#define	CG_MULT_THERMAL_STATUS				0xC0300014
+#define		ASIC_MAX_TEMP(x)			((x) << 0)
+#define		ASIC_MAX_TEMP_MASK			0x000001ff
+#define		ASIC_MAX_TEMP_SHIFT			0
+#define		CTF_TEMP(x)				((x) << 9)
+#define		CTF_TEMP_MASK				0x0003fe00
+#define		CTF_TEMP_SHIFT				9
+
+#define	CG_SPLL_FUNC_CNTL				0xC0500140
+#define		SPLL_RESET				(1 << 0)
+#define		SPLL_PWRON				(1 << 1)
+#define		SPLL_BYPASS_EN				(1 << 3)
+#define		SPLL_REF_DIV(x)				((x) << 5)
+#define		SPLL_REF_DIV_MASK			(0x3f << 5)
+#define		SPLL_PDIV_A(x)				((x) << 20)
+#define		SPLL_PDIV_A_MASK			(0x7f << 20)
+#define		SPLL_PDIV_A_SHIFT			20
+#define	CG_SPLL_FUNC_CNTL_2				0xC0500144
+#define		SCLK_MUX_SEL(x)				((x) << 0)
+#define		SCLK_MUX_SEL_MASK			(0x1ff << 0)
+#define	CG_SPLL_FUNC_CNTL_3				0xC0500148
+#define		SPLL_FB_DIV(x)				((x) << 0)
+#define		SPLL_FB_DIV_MASK			(0x3ffffff << 0)
+#define		SPLL_FB_DIV_SHIFT			0
+#define		SPLL_DITHEN				(1 << 28)
+#define	CG_SPLL_FUNC_CNTL_4				0xC050014C
+
+#define	CG_SPLL_SPREAD_SPECTRUM				0xC0500164
+#define		SSEN					(1 << 0)
+#define		CLK_S(x)				((x) << 4)
+#define		CLK_S_MASK				(0xfff << 4)
+#define		CLK_S_SHIFT				4
+#define	CG_SPLL_SPREAD_SPECTRUM_2			0xC0500168
+#define		CLK_V(x)				((x) << 0)
+#define		CLK_V_MASK				(0x3ffffff << 0)
+#define		CLK_V_SHIFT				0
+
+#define	MPLL_BYPASSCLK_SEL				0xC050019C
+#	define MPLL_CLKOUT_SEL(x)			((x) << 8)
+#	define MPLL_CLKOUT_SEL_MASK			0xFF00
 #define CG_CLKPIN_CNTL                                    0xC05001A0
 #       define XTALIN_DIVIDE                              (1 << 1)
+#       define BCLK_AS_XCLK                               (1 << 2)
+#define CG_CLKPIN_CNTL_2                                  0xC05001A4
+#       define FORCE_BIF_REFCLK_EN                        (1 << 3)
+#       define MUX_TCLK_TO_XCLK                           (1 << 8)
+#define	THM_CLK_CNTL					0xC05001A8
+#	define CMON_CLK_SEL(x)				((x) << 0)
+#	define CMON_CLK_SEL_MASK			0xFF
+#	define TMON_CLK_SEL(x)				((x) << 8)
+#	define TMON_CLK_SEL_MASK			0xFF00
+#define	MISC_CLK_CTRL					0xC05001AC
+#	define DEEP_SLEEP_CLK_SEL(x)			((x) << 0)
+#	define DEEP_SLEEP_CLK_SEL_MASK			0xFF
+#	define ZCLK_SEL(x)				((x) << 8)
+#	define ZCLK_SEL_MASK				0xFF00
 
+/* KV/KB */
+#define	CG_THERMAL_INT_CTRL				0xC2100028
+#define		DIG_THERM_INTH(x)			((x) << 0)
+#define		DIG_THERM_INTH_MASK			0x000000FF
+#define		DIG_THERM_INTH_SHIFT			0
+#define		DIG_THERM_INTL(x)			((x) << 8)
+#define		DIG_THERM_INTL_MASK			0x0000FF00
+#define		DIG_THERM_INTL_SHIFT			8
+#define 	THERM_INTH_MASK				(1 << 24)
+#define 	THERM_INTL_MASK				(1 << 25)
+
+/* PCIE registers idx/data 0x38/0x3c */
+#define PB0_PIF_PWRDOWN_0                                 0x1100012 /* PCIE */
+#       define PLL_POWER_STATE_IN_TXS2_0(x)               ((x) << 7)
+#       define PLL_POWER_STATE_IN_TXS2_0_MASK             (0x7 << 7)
+#       define PLL_POWER_STATE_IN_TXS2_0_SHIFT            7
+#       define PLL_POWER_STATE_IN_OFF_0(x)                ((x) << 10)
+#       define PLL_POWER_STATE_IN_OFF_0_MASK              (0x7 << 10)
+#       define PLL_POWER_STATE_IN_OFF_0_SHIFT             10
+#       define PLL_RAMP_UP_TIME_0(x)                      ((x) << 24)
+#       define PLL_RAMP_UP_TIME_0_MASK                    (0x7 << 24)
+#       define PLL_RAMP_UP_TIME_0_SHIFT                   24
+#define PB0_PIF_PWRDOWN_1                                 0x1100013 /* PCIE */
+#       define PLL_POWER_STATE_IN_TXS2_1(x)               ((x) << 7)
+#       define PLL_POWER_STATE_IN_TXS2_1_MASK             (0x7 << 7)
+#       define PLL_POWER_STATE_IN_TXS2_1_SHIFT            7
+#       define PLL_POWER_STATE_IN_OFF_1(x)                ((x) << 10)
+#       define PLL_POWER_STATE_IN_OFF_1_MASK              (0x7 << 10)
+#       define PLL_POWER_STATE_IN_OFF_1_SHIFT             10
+#       define PLL_RAMP_UP_TIME_1(x)                      ((x) << 24)
+#       define PLL_RAMP_UP_TIME_1_MASK                    (0x7 << 24)
+#       define PLL_RAMP_UP_TIME_1_SHIFT                   24
+
+#define PCIE_CNTL2                                        0x1001001c /* PCIE */
+#       define SLV_MEM_LS_EN                              (1 << 16)
+#       define SLV_MEM_AGGRESSIVE_LS_EN                   (1 << 17)
+#       define MST_MEM_LS_EN                              (1 << 18)
+#       define REPLAY_MEM_LS_EN                           (1 << 19)
+
+#define PCIE_LC_STATUS1                                   0x1400028 /* PCIE */
+#       define LC_REVERSE_RCVR                            (1 << 0)
+#       define LC_REVERSE_XMIT                            (1 << 1)
+#       define LC_OPERATING_LINK_WIDTH_MASK               (0x7 << 2)
+#       define LC_OPERATING_LINK_WIDTH_SHIFT              2
+#       define LC_DETECTED_LINK_WIDTH_MASK                (0x7 << 5)
+#       define LC_DETECTED_LINK_WIDTH_SHIFT               5
+
+#define PCIE_P_CNTL                                       0x1400040 /* PCIE */
+#       define P_IGNORE_EDB_ERR                           (1 << 6)
+
+#define PB1_PIF_PWRDOWN_0                                 0x2100012 /* PCIE */
+#define PB1_PIF_PWRDOWN_1                                 0x2100013 /* PCIE */
+
+#define PCIE_LC_CNTL                                      0x100100A0 /* PCIE */
+#       define LC_L0S_INACTIVITY(x)                       ((x) << 8)
+#       define LC_L0S_INACTIVITY_MASK                     (0xf << 8)
+#       define LC_L0S_INACTIVITY_SHIFT                    8
+#       define LC_L1_INACTIVITY(x)                        ((x) << 12)
+#       define LC_L1_INACTIVITY_MASK                      (0xf << 12)
+#       define LC_L1_INACTIVITY_SHIFT                     12
+#       define LC_PMI_TO_L1_DIS                           (1 << 16)
+#       define LC_ASPM_TO_L1_DIS                          (1 << 24)
+
+#define PCIE_LC_LINK_WIDTH_CNTL                           0x100100A2 /* PCIE */
+#       define LC_LINK_WIDTH_SHIFT                        0
+#       define LC_LINK_WIDTH_MASK                         0x7
+#       define LC_LINK_WIDTH_X0                           0
+#       define LC_LINK_WIDTH_X1                           1
+#       define LC_LINK_WIDTH_X2                           2
+#       define LC_LINK_WIDTH_X4                           3
+#       define LC_LINK_WIDTH_X8                           4
+#       define LC_LINK_WIDTH_X16                          6
+#       define LC_LINK_WIDTH_RD_SHIFT                     4
+#       define LC_LINK_WIDTH_RD_MASK                      0x70
+#       define LC_RECONFIG_ARC_MISSING_ESCAPE             (1 << 7)
+#       define LC_RECONFIG_NOW                            (1 << 8)
+#       define LC_RENEGOTIATION_SUPPORT                   (1 << 9)
+#       define LC_RENEGOTIATE_EN                          (1 << 10)
+#       define LC_SHORT_RECONFIG_EN                       (1 << 11)
+#       define LC_UPCONFIGURE_SUPPORT                     (1 << 12)
+#       define LC_UPCONFIGURE_DIS                         (1 << 13)
+#       define LC_DYN_LANES_PWR_STATE(x)                  ((x) << 21)
+#       define LC_DYN_LANES_PWR_STATE_MASK                (0x3 << 21)
+#       define LC_DYN_LANES_PWR_STATE_SHIFT               21
+#define PCIE_LC_N_FTS_CNTL                                0x100100a3 /* PCIE */
+#       define LC_XMIT_N_FTS(x)                           ((x) << 0)
+#       define LC_XMIT_N_FTS_MASK                         (0xff << 0)
+#       define LC_XMIT_N_FTS_SHIFT                        0
+#       define LC_XMIT_N_FTS_OVERRIDE_EN                  (1 << 8)
+#       define LC_N_FTS_MASK                              (0xff << 24)
+#define PCIE_LC_SPEED_CNTL                                0x100100A4 /* PCIE */
+#       define LC_GEN2_EN_STRAP                           (1 << 0)
+#       define LC_GEN3_EN_STRAP                           (1 << 1)
+#       define LC_TARGET_LINK_SPEED_OVERRIDE_EN           (1 << 2)
+#       define LC_TARGET_LINK_SPEED_OVERRIDE_MASK         (0x3 << 3)
+#       define LC_TARGET_LINK_SPEED_OVERRIDE_SHIFT        3
+#       define LC_FORCE_EN_SW_SPEED_CHANGE                (1 << 5)
+#       define LC_FORCE_DIS_SW_SPEED_CHANGE               (1 << 6)
+#       define LC_FORCE_EN_HW_SPEED_CHANGE                (1 << 7)
+#       define LC_FORCE_DIS_HW_SPEED_CHANGE               (1 << 8)
+#       define LC_INITIATE_LINK_SPEED_CHANGE              (1 << 9)
+#       define LC_SPEED_CHANGE_ATTEMPTS_ALLOWED_MASK      (0x3 << 10)
+#       define LC_SPEED_CHANGE_ATTEMPTS_ALLOWED_SHIFT     10
+#       define LC_CURRENT_DATA_RATE_MASK                  (0x3 << 13) /* 0/1/2 = gen1/2/3 */
+#       define LC_CURRENT_DATA_RATE_SHIFT                 13
+#       define LC_CLR_FAILED_SPD_CHANGE_CNT               (1 << 16)
+#       define LC_OTHER_SIDE_EVER_SENT_GEN2               (1 << 18)
+#       define LC_OTHER_SIDE_SUPPORTS_GEN2                (1 << 19)
+#       define LC_OTHER_SIDE_EVER_SENT_GEN3               (1 << 20)
+#       define LC_OTHER_SIDE_SUPPORTS_GEN3                (1 << 21)
+
+#define PCIE_LC_CNTL2                                     0x100100B1 /* PCIE */
+#       define LC_ALLOW_PDWN_IN_L1                        (1 << 17)
+#       define LC_ALLOW_PDWN_IN_L23                       (1 << 18)
+
+#define PCIE_LC_CNTL3                                     0x100100B5 /* PCIE */
+#       define LC_GO_TO_RECOVERY                          (1 << 30)
+#define PCIE_LC_CNTL4                                     0x100100B6 /* PCIE */
+#       define LC_REDO_EQ                                 (1 << 5)
+#       define LC_SET_QUIESCE                             (1 << 13)
+
+/* direct registers */
 #define PCIE_INDEX  					0x38
 #define PCIE_DATA  					0x3C
 
+#define SMC_IND_INDEX_0  				0x200
+#define SMC_IND_DATA_0  				0x204
+
+#define SMC_IND_ACCESS_CNTL  				0x240
+#define		AUTO_INCREMENT_IND_0			(1 << 0)
+
+#define SMC_MESSAGE_0  					0x250
+#define		SMC_MSG_MASK				0xffff
+#define SMC_RESP_0  					0x254
+#define		SMC_RESP_MASK				0xffff
+
+#define SMC_MSG_ARG_0  					0x290
+
 #define VGA_HDP_CONTROL  				0x328
 #define		VGA_MEMORY_DISABLE				(1 << 4)
 
 #define DMIF_ADDR_CALC  				0xC00
 
+#define	PIPE0_DMIF_BUFFER_CONTROL			  0x0ca0
+#       define DMIF_BUFFERS_ALLOCATED(x)                  ((x) << 0)
+#       define DMIF_BUFFERS_ALLOCATED_COMPLETED           (1 << 4)
+
 #define	SRBM_GFX_CNTL				        0xE44
 #define		PIPEID(x)					((x) << 0)
 #define		MEID(x)						((x) << 2)
@@ -172,6 +526,10 @@
 #define	VM_CONTEXT0_PAGE_TABLE_END_ADDR			0x157C
 #define	VM_CONTEXT1_PAGE_TABLE_END_ADDR			0x1580
 
+#define VM_L2_CG           				0x15c0
+#define		MC_CG_ENABLE				(1 << 18)
+#define		MC_LS_ENABLE				(1 << 19)
+
 #define MC_SHARED_CHMAP						0x2004
 #define		NOOFCHAN_SHIFT					12
 #define		NOOFCHAN_MASK					0x0000f000
@@ -201,6 +559,17 @@
 
 #define MC_SHARED_BLACKOUT_CNTL           		0x20ac
 
+#define MC_HUB_MISC_HUB_CG           			0x20b8
+#define MC_HUB_MISC_VM_CG           			0x20bc
+
+#define MC_HUB_MISC_SIP_CG           			0x20c0
+
+#define MC_XPB_CLK_GAT           			0x2478
+
+#define MC_CITF_MISC_RD_CG           			0x2648
+#define MC_CITF_MISC_WR_CG           			0x264c
+#define MC_CITF_MISC_VM_CG           			0x2650
+
 #define	MC_ARB_RAMCFG					0x2760
 #define		NOOFBANK_SHIFT					0
 #define		NOOFBANK_MASK					0x00000003
@@ -215,9 +584,37 @@
 #define		NOOFGROUPS_SHIFT				12
 #define		NOOFGROUPS_MASK					0x00001000
 
+#define	MC_ARB_DRAM_TIMING				0x2774
+#define	MC_ARB_DRAM_TIMING2				0x2778
+
+#define MC_ARB_BURST_TIME                               0x2808
+#define		STATE0(x)				((x) << 0)
+#define		STATE0_MASK				(0x1f << 0)
+#define		STATE0_SHIFT				0
+#define		STATE1(x)				((x) << 5)
+#define		STATE1_MASK				(0x1f << 5)
+#define		STATE1_SHIFT				5
+#define		STATE2(x)				((x) << 10)
+#define		STATE2_MASK				(0x1f << 10)
+#define		STATE2_SHIFT				10
+#define		STATE3(x)				((x) << 15)
+#define		STATE3_MASK				(0x1f << 15)
+#define		STATE3_SHIFT				15
+
+#define MC_SEQ_RAS_TIMING                               0x28a0
+#define MC_SEQ_CAS_TIMING                               0x28a4
+#define MC_SEQ_MISC_TIMING                              0x28a8
+#define MC_SEQ_MISC_TIMING2                             0x28ac
+#define MC_SEQ_PMG_TIMING                               0x28b0
+#define MC_SEQ_RD_CTL_D0                                0x28b4
+#define MC_SEQ_RD_CTL_D1                                0x28b8
+#define MC_SEQ_WR_CTL_D0                                0x28bc
+#define MC_SEQ_WR_CTL_D1                                0x28c0
+
 #define MC_SEQ_SUP_CNTL           			0x28c8
 #define		RUN_MASK      				(1 << 0)
 #define MC_SEQ_SUP_PGM           			0x28cc
+#define MC_PMG_AUTO_CMD           			0x28d0
 
 #define	MC_SEQ_TRAIN_WAKEUP_CNTL			0x28e8
 #define		TRAIN_DONE_D0      			(1 << 30)
@@ -226,10 +623,92 @@
 #define MC_IO_PAD_CNTL_D0           			0x29d0
 #define		MEM_FALL_OUT_CMD      			(1 << 8)
 
+#define MC_SEQ_MISC0           				0x2a00
+#define 	MC_SEQ_MISC0_VEN_ID_SHIFT               8
+#define 	MC_SEQ_MISC0_VEN_ID_MASK                0x00000f00
+#define 	MC_SEQ_MISC0_VEN_ID_VALUE               3
+#define 	MC_SEQ_MISC0_REV_ID_SHIFT               12
+#define 	MC_SEQ_MISC0_REV_ID_MASK                0x0000f000
+#define 	MC_SEQ_MISC0_REV_ID_VALUE               1
+#define 	MC_SEQ_MISC0_GDDR5_SHIFT                28
+#define 	MC_SEQ_MISC0_GDDR5_MASK                 0xf0000000
+#define 	MC_SEQ_MISC0_GDDR5_VALUE                5
+#define MC_SEQ_MISC1                                    0x2a04
+#define MC_SEQ_RESERVE_M                                0x2a08
+#define MC_PMG_CMD_EMRS                                 0x2a0c
+
 #define MC_SEQ_IO_DEBUG_INDEX           		0x2a44
 #define MC_SEQ_IO_DEBUG_DATA           			0x2a48
 
+#define MC_SEQ_MISC5                                    0x2a54
+#define MC_SEQ_MISC6                                    0x2a58
+
+#define MC_SEQ_MISC7                                    0x2a64
+
+#define MC_SEQ_RAS_TIMING_LP                            0x2a6c
+#define MC_SEQ_CAS_TIMING_LP                            0x2a70
+#define MC_SEQ_MISC_TIMING_LP                           0x2a74
+#define MC_SEQ_MISC_TIMING2_LP                          0x2a78
+#define MC_SEQ_WR_CTL_D0_LP                             0x2a7c
+#define MC_SEQ_WR_CTL_D1_LP                             0x2a80
+#define MC_SEQ_PMG_CMD_EMRS_LP                          0x2a84
+#define MC_SEQ_PMG_CMD_MRS_LP                           0x2a88
+
+#define MC_PMG_CMD_MRS                                  0x2aac
+
+#define MC_SEQ_RD_CTL_D0_LP                             0x2b1c
+#define MC_SEQ_RD_CTL_D1_LP                             0x2b20
+
+#define MC_PMG_CMD_MRS1                                 0x2b44
+#define MC_SEQ_PMG_CMD_MRS1_LP                          0x2b48
+#define MC_SEQ_PMG_TIMING_LP                            0x2b4c
+
+#define MC_SEQ_WR_CTL_2                                 0x2b54
+#define MC_SEQ_WR_CTL_2_LP                              0x2b58
+#define MC_PMG_CMD_MRS2                                 0x2b5c
+#define MC_SEQ_PMG_CMD_MRS2_LP                          0x2b60
+
+#define	MCLK_PWRMGT_CNTL				0x2ba0
+#       define DLL_SPEED(x)				((x) << 0)
+#       define DLL_SPEED_MASK				(0x1f << 0)
+#       define DLL_READY                                (1 << 6)
+#       define MC_INT_CNTL                              (1 << 7)
+#       define MRDCK0_PDNB                              (1 << 8)
+#       define MRDCK1_PDNB                              (1 << 9)
+#       define MRDCK0_RESET                             (1 << 16)
+#       define MRDCK1_RESET                             (1 << 17)
+#       define DLL_READY_READ                           (1 << 24)
+#define	DLL_CNTL					0x2ba4
+#       define MRDCK0_BYPASS                            (1 << 24)
+#       define MRDCK1_BYPASS                            (1 << 25)
+
+#define	MPLL_FUNC_CNTL					0x2bb4
+#define		BWCTRL(x)				((x) << 20)
+#define		BWCTRL_MASK				(0xff << 20)
+#define	MPLL_FUNC_CNTL_1				0x2bb8
+#define		VCO_MODE(x)				((x) << 0)
+#define		VCO_MODE_MASK				(3 << 0)
+#define		CLKFRAC(x)				((x) << 4)
+#define		CLKFRAC_MASK				(0xfff << 4)
+#define		CLKF(x)					((x) << 16)
+#define		CLKF_MASK				(0xfff << 16)
+#define	MPLL_FUNC_CNTL_2				0x2bbc
+#define	MPLL_AD_FUNC_CNTL				0x2bc0
+#define		YCLK_POST_DIV(x)			((x) << 0)
+#define		YCLK_POST_DIV_MASK			(7 << 0)
+#define	MPLL_DQ_FUNC_CNTL				0x2bc4
+#define		YCLK_SEL(x)				((x) << 4)
+#define		YCLK_SEL_MASK				(1 << 4)
+
+#define	MPLL_SS1					0x2bcc
+#define		CLKV(x)					((x) << 0)
+#define		CLKV_MASK				(0x3ffffff << 0)
+#define	MPLL_SS2					0x2bd0
+#define		CLKS(x)					((x) << 0)
+#define		CLKS_MASK				(0xfff << 0)
+
 #define	HDP_HOST_PATH_CNTL				0x2C00
+#define 	CLOCK_GATING_DIS			(1 << 23)
 #define	HDP_NONSURFACE_BASE				0x2C04
 #define	HDP_NONSURFACE_INFO				0x2C08
 #define	HDP_NONSURFACE_SIZE				0x2C0C
@@ -237,6 +716,26 @@
 #define HDP_ADDR_CONFIG  				0x2F48
 #define HDP_MISC_CNTL					0x2F4C
 #define 	HDP_FLUSH_INVALIDATE_CACHE			(1 << 0)
+#define HDP_MEM_POWER_LS				0x2F50
+#define 	HDP_LS_ENABLE				(1 << 0)
+
+#define ATC_MISC_CG           				0x3350
+
+#define MC_SEQ_CNTL_3                                     0x3600
+#       define CAC_EN                                     (1 << 31)
+#define MC_SEQ_G5PDX_CTRL                                 0x3604
+#define MC_SEQ_G5PDX_CTRL_LP                              0x3608
+#define MC_SEQ_G5PDX_CMD0                                 0x360c
+#define MC_SEQ_G5PDX_CMD0_LP                              0x3610
+#define MC_SEQ_G5PDX_CMD1                                 0x3614
+#define MC_SEQ_G5PDX_CMD1_LP                              0x3618
+
+#define MC_SEQ_PMG_DVS_CTL                                0x3628
+#define MC_SEQ_PMG_DVS_CTL_LP                             0x362c
+#define MC_SEQ_PMG_DVS_CMD                                0x3630
+#define MC_SEQ_PMG_DVS_CMD_LP                             0x3634
+#define MC_SEQ_DLL_STBY                                   0x3638
+#define MC_SEQ_DLL_STBY_LP                                0x363c
 
 #define IH_RB_CNTL                                        0x3e00
 #       define IH_RB_ENABLE                               (1 << 0)
@@ -265,6 +764,9 @@
 #       define MC_WR_CLEAN_CNT(x)                         ((x) << 20)
 #       define MC_VMID(x)                                 ((x) << 25)
 
+#define	BIF_LNCNT_RESET					0x5220
+#       define RESET_LNCNT_EN                           (1 << 0)
+
 #define	CONFIG_MEMSIZE					0x5428
 
 #define INTERRUPT_CNTL                                    0x5468
@@ -401,6 +903,9 @@
 #       define DC_HPDx_RX_INT_TIMER(x)                    ((x) << 16)
 #       define DC_HPDx_EN                                 (1 << 28)
 
+#define DPG_PIPE_STUTTER_CONTROL                          0x6cd4
+#       define STUTTER_ENABLE                             (1 << 0)
+
 #define	GRBM_CNTL					0x8000
 #define		GRBM_READ_TIMEOUT(x)				((x) << 0)
 
@@ -504,6 +1009,9 @@
 
 #define	CP_RB0_RPTR					0x8700
 #define	CP_RB_WPTR_DELAY				0x8704
+#define	CP_RB_WPTR_POLL_CNTL				0x8708
+#define		IDLE_POLL_COUNT(x)			((x) << 16)
+#define		IDLE_POLL_COUNT_MASK			(0xffff << 16)
 
 #define CP_MEQ_THRESHOLDS				0x8764
 #define		MEQ1_START(x)				((x) << 0)
@@ -730,6 +1238,9 @@
 #       define CP_RINGID1_INT_STAT                      (1 << 30)
 #       define CP_RINGID0_INT_STAT                      (1 << 31)
 
+#define CP_MEM_SLP_CNTL                                 0xC1E4
+#       define CP_MEM_LS_EN                             (1 << 0)
+
 #define CP_CPF_DEBUG                                    0xC200
 
 #define CP_PQ_WPTR_POLL_CNTL                            0xC20C
@@ -775,14 +1286,20 @@
 
 #define RLC_MC_CNTL                                       0xC30C
 
+#define RLC_MEM_SLP_CNTL                                  0xC318
+#       define RLC_MEM_LS_EN                              (1 << 0)
+
 #define RLC_LB_CNTR_MAX                                   0xC348
 
 #define RLC_LB_CNTL                                       0xC364
+#       define LOAD_BALANCE_ENABLE                        (1 << 0)
 
 #define RLC_LB_CNTR_INIT                                  0xC36C
 
 #define RLC_SAVE_AND_RESTORE_BASE                         0xC374
-#define RLC_DRIVER_DMA_STATUS                             0xC378
+#define RLC_DRIVER_DMA_STATUS                             0xC378 /* dGPU */
+#define RLC_CP_TABLE_RESTORE                              0xC378 /* APU */
+#define RLC_PG_DELAY_2                                    0xC37C
 
 #define RLC_GPM_UCODE_ADDR                                0xC388
 #define RLC_GPM_UCODE_DATA                                0xC38C
@@ -791,12 +1308,52 @@
 #define RLC_CAPTURE_GPU_CLOCK_COUNT                       0xC398
 #define RLC_UCODE_CNTL                                    0xC39C
 
+#define RLC_GPM_STAT                                      0xC400
+#       define RLC_GPM_BUSY                               (1 << 0)
+#       define GFX_POWER_STATUS                           (1 << 1)
+#       define GFX_CLOCK_STATUS                           (1 << 2)
+
+#define RLC_PG_CNTL                                       0xC40C
+#       define GFX_PG_ENABLE                              (1 << 0)
+#       define GFX_PG_SRC                                 (1 << 1)
+#       define DYN_PER_CU_PG_ENABLE                       (1 << 2)
+#       define STATIC_PER_CU_PG_ENABLE                    (1 << 3)
+#       define DISABLE_GDS_PG                             (1 << 13)
+#       define DISABLE_CP_PG                              (1 << 15)
+#       define SMU_CLK_SLOWDOWN_ON_PU_ENABLE              (1 << 17)
+#       define SMU_CLK_SLOWDOWN_ON_PD_ENABLE              (1 << 18)
+
+#define RLC_CGTT_MGCG_OVERRIDE                            0xC420
 #define RLC_CGCG_CGLS_CTRL                                0xC424
+#       define CGCG_EN                                    (1 << 0)
+#       define CGLS_EN                                    (1 << 1)
+
+#define RLC_PG_DELAY                                      0xC434
 
 #define RLC_LB_INIT_CU_MASK                               0xC43C
 
 #define RLC_LB_PARAMS                                     0xC444
 
+#define RLC_PG_AO_CU_MASK                                 0xC44C
+
+#define	RLC_MAX_PG_CU					0xC450
+#	define MAX_PU_CU(x)				((x) << 0)
+#	define MAX_PU_CU_MASK				(0xff << 0)
+#define RLC_AUTO_PG_CTRL                                  0xC454
+#       define AUTO_PG_EN                                 (1 << 0)
+#	define GRBM_REG_SGIT(x)				((x) << 3)
+#	define GRBM_REG_SGIT_MASK			(0xffff << 3)
+
+#define RLC_SERDES_WR_CU_MASTER_MASK                      0xC474
+#define RLC_SERDES_WR_NONCU_MASTER_MASK                   0xC478
+#define RLC_SERDES_WR_CTRL                                0xC47C
+#define		BPM_ADDR(x)				((x) << 0)
+#define		BPM_ADDR_MASK      			(0xff << 0)
+#define		CGLS_ENABLE				(1 << 16)
+#define		CGCG_OVERRIDE_0				(1 << 20)
+#define		MGCG_OVERRIDE_0				(1 << 22)
+#define		MGCG_OVERRIDE_1				(1 << 23)
+
 #define RLC_SERDES_CU_MASTER_BUSY                         0xC484
 #define RLC_SERDES_NONCU_MASTER_BUSY                      0xC488
 #       define SE_MASTER_BUSY_MASK                        0x0000ffff
@@ -807,6 +1364,13 @@
 #define RLC_GPM_SCRATCH_ADDR                              0xC4B0
 #define RLC_GPM_SCRATCH_DATA                              0xC4B4
 
+#define RLC_GPR_REG2                                      0xC4E8
+#define		REQ      				0x00000001
+#define		MESSAGE(x)      			((x) << 1)
+#define		MESSAGE_MASK      			0x0000001e
+#define		MSG_ENTER_RLC_SAFE_MODE      			1
+#define		MSG_EXIT_RLC_SAFE_MODE      			0
+
 #define CP_HPD_EOP_BASE_ADDR                              0xC904
 #define CP_HPD_EOP_BASE_ADDR_HI                           0xC908
 #define CP_HPD_EOP_VMID                                   0xC90C
@@ -851,6 +1415,8 @@
 #define		MQD_VMID(x)				((x) << 0)
 #define		MQD_VMID_MASK      			(0xf << 0)
 
+#define DB_RENDER_CONTROL                               0x28000
+
 #define PA_SC_RASTER_CONFIG                             0x28350
 #       define RASTER_CONFIG_RB_MAP_0                   0
 #       define RASTER_CONFIG_RB_MAP_1                   1
@@ -944,6 +1510,16 @@
 
 #define	CP_PERFMON_CNTL					0x36020
 
+#define	CGTS_SM_CTRL_REG				0x3c000
+#define		SM_MODE(x)				((x) << 17)
+#define		SM_MODE_MASK				(0x7 << 17)
+#define		SM_MODE_ENABLE				(1 << 20)
+#define		CGTS_OVERRIDE				(1 << 21)
+#define		CGTS_LS_OVERRIDE			(1 << 22)
+#define		ON_MONITOR_ADD_EN			(1 << 23)
+#define		ON_MONITOR_ADD(x)			((x) << 24)
+#define		ON_MONITOR_ADD_MASK			(0xff << 24)
+
 #define	CGTS_TCC_DISABLE				0x3c00c
 #define	CGTS_USER_TCC_DISABLE				0x3c010
 #define		TCC_DISABLE_MASK				0xFFFF0000
@@ -1176,6 +1752,8 @@
 
 #define	SDMA0_UCODE_ADDR                                  0xD000
 #define	SDMA0_UCODE_DATA                                  0xD004
+#define	SDMA0_POWER_CNTL                                  0xD008
+#define	SDMA0_CLK_CTRL                                    0xD00C
 
 #define SDMA0_CNTL                                        0xD010
 #       define TRAP_ENABLE                                (1 << 0)
@@ -1300,6 +1878,13 @@
 #define UVD_RBC_RB_RPTR			0xf690
 #define UVD_RBC_RB_WPTR			0xf694
 
+#define	UVD_CGC_CTRL					0xF4B0
+#	define DCM					(1 << 0)
+#	define CG_DT(x)					((x) << 2)
+#	define CG_DT_MASK				(0xf << 2)
+#	define CLK_OD(x)				((x) << 6)
+#	define CLK_OD_MASK				(0x1f << 6)
+
 /* UVD clocks */
 
 #define CG_DCLK_CNTL			0xC050009C
@@ -1310,4 +1895,7 @@
 #define CG_VCLK_CNTL			0xC05000A4
 #define CG_VCLK_STATUS			0xC05000A8
 
+/* UVD CTX indirect */
+#define	UVD_CGC_MEM_CTRL				0xC0
+
 #endif
diff --git a/drivers/gpu/drm/radeon/clearstate_cayman.h b/drivers/gpu/drm/radeon/clearstate_cayman.h
index c003394..aa908c5 100644
--- a/drivers/gpu/drm/radeon/clearstate_cayman.h
+++ b/drivers/gpu/drm/radeon/clearstate_cayman.h
@@ -1073,7 +1073,7 @@
     {SECT_CTRLCONST_def_1, 0x0000f3fc, 2 },
     { 0, 0, 0 }
 };
-struct cs_section_def cayman_cs_data[] = {
+static const struct cs_section_def cayman_cs_data[] = {
     { SECT_CONTEXT_defs, SECT_CONTEXT },
     { SECT_CLEAR_defs, SECT_CLEAR },
     { SECT_CTRLCONST_defs, SECT_CTRLCONST },
diff --git a/drivers/gpu/drm/radeon/clearstate_ci.h b/drivers/gpu/drm/radeon/clearstate_ci.h
new file mode 100644
index 0000000..c3982f9
--- /dev/null
+++ b/drivers/gpu/drm/radeon/clearstate_ci.h
@@ -0,0 +1,944 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+static const unsigned int ci_SECT_CONTEXT_def_1[] =
+{
+    0x00000000, // DB_RENDER_CONTROL
+    0x00000000, // DB_COUNT_CONTROL
+    0x00000000, // DB_DEPTH_VIEW
+    0x00000000, // DB_RENDER_OVERRIDE
+    0x00000000, // DB_RENDER_OVERRIDE2
+    0x00000000, // DB_HTILE_DATA_BASE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // DB_DEPTH_BOUNDS_MIN
+    0x00000000, // DB_DEPTH_BOUNDS_MAX
+    0x00000000, // DB_STENCIL_CLEAR
+    0x00000000, // DB_DEPTH_CLEAR
+    0x00000000, // PA_SC_SCREEN_SCISSOR_TL
+    0x40004000, // PA_SC_SCREEN_SCISSOR_BR
+    0, // HOLE
+    0x00000000, // DB_DEPTH_INFO
+    0x00000000, // DB_Z_INFO
+    0x00000000, // DB_STENCIL_INFO
+    0x00000000, // DB_Z_READ_BASE
+    0x00000000, // DB_STENCIL_READ_BASE
+    0x00000000, // DB_Z_WRITE_BASE
+    0x00000000, // DB_STENCIL_WRITE_BASE
+    0x00000000, // DB_DEPTH_SIZE
+    0x00000000, // DB_DEPTH_SLICE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // TA_BC_BASE_ADDR
+    0x00000000, // TA_BC_BASE_ADDR_HI
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // COHER_DEST_BASE_HI_0
+    0x00000000, // COHER_DEST_BASE_HI_1
+    0x00000000, // COHER_DEST_BASE_HI_2
+    0x00000000, // COHER_DEST_BASE_HI_3
+    0x00000000, // COHER_DEST_BASE_2
+    0x00000000, // COHER_DEST_BASE_3
+    0x00000000, // PA_SC_WINDOW_OFFSET
+    0x80000000, // PA_SC_WINDOW_SCISSOR_TL
+    0x40004000, // PA_SC_WINDOW_SCISSOR_BR
+    0x0000ffff, // PA_SC_CLIPRECT_RULE
+    0x00000000, // PA_SC_CLIPRECT_0_TL
+    0x40004000, // PA_SC_CLIPRECT_0_BR
+    0x00000000, // PA_SC_CLIPRECT_1_TL
+    0x40004000, // PA_SC_CLIPRECT_1_BR
+    0x00000000, // PA_SC_CLIPRECT_2_TL
+    0x40004000, // PA_SC_CLIPRECT_2_BR
+    0x00000000, // PA_SC_CLIPRECT_3_TL
+    0x40004000, // PA_SC_CLIPRECT_3_BR
+    0xaa99aaaa, // PA_SC_EDGERULE
+    0x00000000, // PA_SU_HARDWARE_SCREEN_OFFSET
+    0xffffffff, // CB_TARGET_MASK
+    0xffffffff, // CB_SHADER_MASK
+    0x80000000, // PA_SC_GENERIC_SCISSOR_TL
+    0x40004000, // PA_SC_GENERIC_SCISSOR_BR
+    0x00000000, // COHER_DEST_BASE_0
+    0x00000000, // COHER_DEST_BASE_1
+    0x80000000, // PA_SC_VPORT_SCISSOR_0_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_0_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_1_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_1_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_2_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_2_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_3_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_3_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_4_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_4_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_5_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_5_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_6_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_6_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_7_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_7_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_8_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_8_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_9_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_9_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_10_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_10_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_11_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_11_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_12_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_12_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_13_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_13_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_14_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_14_BR
+    0x80000000, // PA_SC_VPORT_SCISSOR_15_TL
+    0x40004000, // PA_SC_VPORT_SCISSOR_15_BR
+    0x00000000, // PA_SC_VPORT_ZMIN_0
+    0x3f800000, // PA_SC_VPORT_ZMAX_0
+    0x00000000, // PA_SC_VPORT_ZMIN_1
+    0x3f800000, // PA_SC_VPORT_ZMAX_1
+    0x00000000, // PA_SC_VPORT_ZMIN_2
+    0x3f800000, // PA_SC_VPORT_ZMAX_2
+    0x00000000, // PA_SC_VPORT_ZMIN_3
+    0x3f800000, // PA_SC_VPORT_ZMAX_3
+    0x00000000, // PA_SC_VPORT_ZMIN_4
+    0x3f800000, // PA_SC_VPORT_ZMAX_4
+    0x00000000, // PA_SC_VPORT_ZMIN_5
+    0x3f800000, // PA_SC_VPORT_ZMAX_5
+    0x00000000, // PA_SC_VPORT_ZMIN_6
+    0x3f800000, // PA_SC_VPORT_ZMAX_6
+    0x00000000, // PA_SC_VPORT_ZMIN_7
+    0x3f800000, // PA_SC_VPORT_ZMAX_7
+    0x00000000, // PA_SC_VPORT_ZMIN_8
+    0x3f800000, // PA_SC_VPORT_ZMAX_8
+    0x00000000, // PA_SC_VPORT_ZMIN_9
+    0x3f800000, // PA_SC_VPORT_ZMAX_9
+    0x00000000, // PA_SC_VPORT_ZMIN_10
+    0x3f800000, // PA_SC_VPORT_ZMAX_10
+    0x00000000, // PA_SC_VPORT_ZMIN_11
+    0x3f800000, // PA_SC_VPORT_ZMAX_11
+    0x00000000, // PA_SC_VPORT_ZMIN_12
+    0x3f800000, // PA_SC_VPORT_ZMAX_12
+    0x00000000, // PA_SC_VPORT_ZMIN_13
+    0x3f800000, // PA_SC_VPORT_ZMAX_13
+    0x00000000, // PA_SC_VPORT_ZMIN_14
+    0x3f800000, // PA_SC_VPORT_ZMAX_14
+    0x00000000, // PA_SC_VPORT_ZMIN_15
+    0x3f800000, // PA_SC_VPORT_ZMAX_15
+};
+static const unsigned int ci_SECT_CONTEXT_def_2[] =
+{
+    0x00000000, // PA_SC_SCREEN_EXTENT_CONTROL
+    0, // HOLE
+    0x00000000, // CP_PERFMON_CNTX_CNTL
+    0x00000000, // CP_RINGID
+    0x00000000, // CP_VMID
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0xffffffff, // VGT_MAX_VTX_INDX
+    0x00000000, // VGT_MIN_VTX_INDX
+    0x00000000, // VGT_INDX_OFFSET
+    0x00000000, // VGT_MULTI_PRIM_IB_RESET_INDX
+    0, // HOLE
+    0x00000000, // CB_BLEND_RED
+    0x00000000, // CB_BLEND_GREEN
+    0x00000000, // CB_BLEND_BLUE
+    0x00000000, // CB_BLEND_ALPHA
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // DB_STENCIL_CONTROL
+    0x00000000, // DB_STENCILREFMASK
+    0x00000000, // DB_STENCILREFMASK_BF
+    0, // HOLE
+    0x00000000, // PA_CL_VPORT_XSCALE
+    0x00000000, // PA_CL_VPORT_XOFFSET
+    0x00000000, // PA_CL_VPORT_YSCALE
+    0x00000000, // PA_CL_VPORT_YOFFSET
+    0x00000000, // PA_CL_VPORT_ZSCALE
+    0x00000000, // PA_CL_VPORT_ZOFFSET
+    0x00000000, // PA_CL_VPORT_XSCALE_1
+    0x00000000, // PA_CL_VPORT_XOFFSET_1
+    0x00000000, // PA_CL_VPORT_YSCALE_1
+    0x00000000, // PA_CL_VPORT_YOFFSET_1
+    0x00000000, // PA_CL_VPORT_ZSCALE_1
+    0x00000000, // PA_CL_VPORT_ZOFFSET_1
+    0x00000000, // PA_CL_VPORT_XSCALE_2
+    0x00000000, // PA_CL_VPORT_XOFFSET_2
+    0x00000000, // PA_CL_VPORT_YSCALE_2
+    0x00000000, // PA_CL_VPORT_YOFFSET_2
+    0x00000000, // PA_CL_VPORT_ZSCALE_2
+    0x00000000, // PA_CL_VPORT_ZOFFSET_2
+    0x00000000, // PA_CL_VPORT_XSCALE_3
+    0x00000000, // PA_CL_VPORT_XOFFSET_3
+    0x00000000, // PA_CL_VPORT_YSCALE_3
+    0x00000000, // PA_CL_VPORT_YOFFSET_3
+    0x00000000, // PA_CL_VPORT_ZSCALE_3
+    0x00000000, // PA_CL_VPORT_ZOFFSET_3
+    0x00000000, // PA_CL_VPORT_XSCALE_4
+    0x00000000, // PA_CL_VPORT_XOFFSET_4
+    0x00000000, // PA_CL_VPORT_YSCALE_4
+    0x00000000, // PA_CL_VPORT_YOFFSET_4
+    0x00000000, // PA_CL_VPORT_ZSCALE_4
+    0x00000000, // PA_CL_VPORT_ZOFFSET_4
+    0x00000000, // PA_CL_VPORT_XSCALE_5
+    0x00000000, // PA_CL_VPORT_XOFFSET_5
+    0x00000000, // PA_CL_VPORT_YSCALE_5
+    0x00000000, // PA_CL_VPORT_YOFFSET_5
+    0x00000000, // PA_CL_VPORT_ZSCALE_5
+    0x00000000, // PA_CL_VPORT_ZOFFSET_5
+    0x00000000, // PA_CL_VPORT_XSCALE_6
+    0x00000000, // PA_CL_VPORT_XOFFSET_6
+    0x00000000, // PA_CL_VPORT_YSCALE_6
+    0x00000000, // PA_CL_VPORT_YOFFSET_6
+    0x00000000, // PA_CL_VPORT_ZSCALE_6
+    0x00000000, // PA_CL_VPORT_ZOFFSET_6
+    0x00000000, // PA_CL_VPORT_XSCALE_7
+    0x00000000, // PA_CL_VPORT_XOFFSET_7
+    0x00000000, // PA_CL_VPORT_YSCALE_7
+    0x00000000, // PA_CL_VPORT_YOFFSET_7
+    0x00000000, // PA_CL_VPORT_ZSCALE_7
+    0x00000000, // PA_CL_VPORT_ZOFFSET_7
+    0x00000000, // PA_CL_VPORT_XSCALE_8
+    0x00000000, // PA_CL_VPORT_XOFFSET_8
+    0x00000000, // PA_CL_VPORT_YSCALE_8
+    0x00000000, // PA_CL_VPORT_YOFFSET_8
+    0x00000000, // PA_CL_VPORT_ZSCALE_8
+    0x00000000, // PA_CL_VPORT_ZOFFSET_8
+    0x00000000, // PA_CL_VPORT_XSCALE_9
+    0x00000000, // PA_CL_VPORT_XOFFSET_9
+    0x00000000, // PA_CL_VPORT_YSCALE_9
+    0x00000000, // PA_CL_VPORT_YOFFSET_9
+    0x00000000, // PA_CL_VPORT_ZSCALE_9
+    0x00000000, // PA_CL_VPORT_ZOFFSET_9
+    0x00000000, // PA_CL_VPORT_XSCALE_10
+    0x00000000, // PA_CL_VPORT_XOFFSET_10
+    0x00000000, // PA_CL_VPORT_YSCALE_10
+    0x00000000, // PA_CL_VPORT_YOFFSET_10
+    0x00000000, // PA_CL_VPORT_ZSCALE_10
+    0x00000000, // PA_CL_VPORT_ZOFFSET_10
+    0x00000000, // PA_CL_VPORT_XSCALE_11
+    0x00000000, // PA_CL_VPORT_XOFFSET_11
+    0x00000000, // PA_CL_VPORT_YSCALE_11
+    0x00000000, // PA_CL_VPORT_YOFFSET_11
+    0x00000000, // PA_CL_VPORT_ZSCALE_11
+    0x00000000, // PA_CL_VPORT_ZOFFSET_11
+    0x00000000, // PA_CL_VPORT_XSCALE_12
+    0x00000000, // PA_CL_VPORT_XOFFSET_12
+    0x00000000, // PA_CL_VPORT_YSCALE_12
+    0x00000000, // PA_CL_VPORT_YOFFSET_12
+    0x00000000, // PA_CL_VPORT_ZSCALE_12
+    0x00000000, // PA_CL_VPORT_ZOFFSET_12
+    0x00000000, // PA_CL_VPORT_XSCALE_13
+    0x00000000, // PA_CL_VPORT_XOFFSET_13
+    0x00000000, // PA_CL_VPORT_YSCALE_13
+    0x00000000, // PA_CL_VPORT_YOFFSET_13
+    0x00000000, // PA_CL_VPORT_ZSCALE_13
+    0x00000000, // PA_CL_VPORT_ZOFFSET_13
+    0x00000000, // PA_CL_VPORT_XSCALE_14
+    0x00000000, // PA_CL_VPORT_XOFFSET_14
+    0x00000000, // PA_CL_VPORT_YSCALE_14
+    0x00000000, // PA_CL_VPORT_YOFFSET_14
+    0x00000000, // PA_CL_VPORT_ZSCALE_14
+    0x00000000, // PA_CL_VPORT_ZOFFSET_14
+    0x00000000, // PA_CL_VPORT_XSCALE_15
+    0x00000000, // PA_CL_VPORT_XOFFSET_15
+    0x00000000, // PA_CL_VPORT_YSCALE_15
+    0x00000000, // PA_CL_VPORT_YOFFSET_15
+    0x00000000, // PA_CL_VPORT_ZSCALE_15
+    0x00000000, // PA_CL_VPORT_ZOFFSET_15
+    0x00000000, // PA_CL_UCP_0_X
+    0x00000000, // PA_CL_UCP_0_Y
+    0x00000000, // PA_CL_UCP_0_Z
+    0x00000000, // PA_CL_UCP_0_W
+    0x00000000, // PA_CL_UCP_1_X
+    0x00000000, // PA_CL_UCP_1_Y
+    0x00000000, // PA_CL_UCP_1_Z
+    0x00000000, // PA_CL_UCP_1_W
+    0x00000000, // PA_CL_UCP_2_X
+    0x00000000, // PA_CL_UCP_2_Y
+    0x00000000, // PA_CL_UCP_2_Z
+    0x00000000, // PA_CL_UCP_2_W
+    0x00000000, // PA_CL_UCP_3_X
+    0x00000000, // PA_CL_UCP_3_Y
+    0x00000000, // PA_CL_UCP_3_Z
+    0x00000000, // PA_CL_UCP_3_W
+    0x00000000, // PA_CL_UCP_4_X
+    0x00000000, // PA_CL_UCP_4_Y
+    0x00000000, // PA_CL_UCP_4_Z
+    0x00000000, // PA_CL_UCP_4_W
+    0x00000000, // PA_CL_UCP_5_X
+    0x00000000, // PA_CL_UCP_5_Y
+    0x00000000, // PA_CL_UCP_5_Z
+    0x00000000, // PA_CL_UCP_5_W
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // SPI_PS_INPUT_CNTL_0
+    0x00000000, // SPI_PS_INPUT_CNTL_1
+    0x00000000, // SPI_PS_INPUT_CNTL_2
+    0x00000000, // SPI_PS_INPUT_CNTL_3
+    0x00000000, // SPI_PS_INPUT_CNTL_4
+    0x00000000, // SPI_PS_INPUT_CNTL_5
+    0x00000000, // SPI_PS_INPUT_CNTL_6
+    0x00000000, // SPI_PS_INPUT_CNTL_7
+    0x00000000, // SPI_PS_INPUT_CNTL_8
+    0x00000000, // SPI_PS_INPUT_CNTL_9
+    0x00000000, // SPI_PS_INPUT_CNTL_10
+    0x00000000, // SPI_PS_INPUT_CNTL_11
+    0x00000000, // SPI_PS_INPUT_CNTL_12
+    0x00000000, // SPI_PS_INPUT_CNTL_13
+    0x00000000, // SPI_PS_INPUT_CNTL_14
+    0x00000000, // SPI_PS_INPUT_CNTL_15
+    0x00000000, // SPI_PS_INPUT_CNTL_16
+    0x00000000, // SPI_PS_INPUT_CNTL_17
+    0x00000000, // SPI_PS_INPUT_CNTL_18
+    0x00000000, // SPI_PS_INPUT_CNTL_19
+    0x00000000, // SPI_PS_INPUT_CNTL_20
+    0x00000000, // SPI_PS_INPUT_CNTL_21
+    0x00000000, // SPI_PS_INPUT_CNTL_22
+    0x00000000, // SPI_PS_INPUT_CNTL_23
+    0x00000000, // SPI_PS_INPUT_CNTL_24
+    0x00000000, // SPI_PS_INPUT_CNTL_25
+    0x00000000, // SPI_PS_INPUT_CNTL_26
+    0x00000000, // SPI_PS_INPUT_CNTL_27
+    0x00000000, // SPI_PS_INPUT_CNTL_28
+    0x00000000, // SPI_PS_INPUT_CNTL_29
+    0x00000000, // SPI_PS_INPUT_CNTL_30
+    0x00000000, // SPI_PS_INPUT_CNTL_31
+    0x00000000, // SPI_VS_OUT_CONFIG
+    0, // HOLE
+    0x00000000, // SPI_PS_INPUT_ENA
+    0x00000000, // SPI_PS_INPUT_ADDR
+    0x00000000, // SPI_INTERP_CONTROL_0
+    0x00000002, // SPI_PS_IN_CONTROL
+    0, // HOLE
+    0x00000000, // SPI_BARYC_CNTL
+    0, // HOLE
+    0x00000000, // SPI_TMPRING_SIZE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // SPI_SHADER_POS_FORMAT
+    0x00000000, // SPI_SHADER_Z_FORMAT
+    0x00000000, // SPI_SHADER_COL_FORMAT
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_BLEND0_CONTROL
+    0x00000000, // CB_BLEND1_CONTROL
+    0x00000000, // CB_BLEND2_CONTROL
+    0x00000000, // CB_BLEND3_CONTROL
+    0x00000000, // CB_BLEND4_CONTROL
+    0x00000000, // CB_BLEND5_CONTROL
+    0x00000000, // CB_BLEND6_CONTROL
+    0x00000000, // CB_BLEND7_CONTROL
+};
+static const unsigned int ci_SECT_CONTEXT_def_3[] =
+{
+    0x00000000, // PA_CL_POINT_X_RAD
+    0x00000000, // PA_CL_POINT_Y_RAD
+    0x00000000, // PA_CL_POINT_SIZE
+    0x00000000, // PA_CL_POINT_CULL_RAD
+    0x00000000, // VGT_DMA_BASE_HI
+    0x00000000, // VGT_DMA_BASE
+};
+static const unsigned int ci_SECT_CONTEXT_def_4[] =
+{
+    0x00000000, // DB_DEPTH_CONTROL
+    0x00000000, // DB_EQAA
+    0x00000000, // CB_COLOR_CONTROL
+    0x00000000, // DB_SHADER_CONTROL
+    0x00090000, // PA_CL_CLIP_CNTL
+    0x00000004, // PA_SU_SC_MODE_CNTL
+    0x00000000, // PA_CL_VTE_CNTL
+    0x00000000, // PA_CL_VS_OUT_CNTL
+    0x00000000, // PA_CL_NANINF_CNTL
+    0x00000000, // PA_SU_LINE_STIPPLE_CNTL
+    0x00000000, // PA_SU_LINE_STIPPLE_SCALE
+    0x00000000, // PA_SU_PRIM_FILTER_CNTL
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // PA_SU_POINT_SIZE
+    0x00000000, // PA_SU_POINT_MINMAX
+    0x00000000, // PA_SU_LINE_CNTL
+    0x00000000, // PA_SC_LINE_STIPPLE
+    0x00000000, // VGT_OUTPUT_PATH_CNTL
+    0x00000000, // VGT_HOS_CNTL
+    0x00000000, // VGT_HOS_MAX_TESS_LEVEL
+    0x00000000, // VGT_HOS_MIN_TESS_LEVEL
+    0x00000000, // VGT_HOS_REUSE_DEPTH
+    0x00000000, // VGT_GROUP_PRIM_TYPE
+    0x00000000, // VGT_GROUP_FIRST_DECR
+    0x00000000, // VGT_GROUP_DECR
+    0x00000000, // VGT_GROUP_VECT_0_CNTL
+    0x00000000, // VGT_GROUP_VECT_1_CNTL
+    0x00000000, // VGT_GROUP_VECT_0_FMT_CNTL
+    0x00000000, // VGT_GROUP_VECT_1_FMT_CNTL
+    0x00000000, // VGT_GS_MODE
+    0x00000000, // VGT_GS_ONCHIP_CNTL
+    0x00000000, // PA_SC_MODE_CNTL_0
+    0x00000000, // PA_SC_MODE_CNTL_1
+    0x00000000, // VGT_ENHANCE
+    0x00000100, // VGT_GS_PER_ES
+    0x00000080, // VGT_ES_PER_GS
+    0x00000002, // VGT_GS_PER_VS
+    0x00000000, // VGT_GSVS_RING_OFFSET_1
+    0x00000000, // VGT_GSVS_RING_OFFSET_2
+    0x00000000, // VGT_GSVS_RING_OFFSET_3
+    0x00000000, // VGT_GS_OUT_PRIM_TYPE
+    0x00000000, // IA_ENHANCE
+};
+static const unsigned int ci_SECT_CONTEXT_def_5[] =
+{
+    0x00000000, // WD_ENHANCE
+    0x00000000, // VGT_PRIMITIVEID_EN
+};
+static const unsigned int ci_SECT_CONTEXT_def_6[] =
+{
+    0x00000000, // VGT_PRIMITIVEID_RESET
+};
+static const unsigned int ci_SECT_CONTEXT_def_7[] =
+{
+    0x00000000, // VGT_MULTI_PRIM_IB_RESET_EN
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // VGT_INSTANCE_STEP_RATE_0
+    0x00000000, // VGT_INSTANCE_STEP_RATE_1
+    0x000000ff, // IA_MULTI_VGT_PARAM
+    0x00000000, // VGT_ESGS_RING_ITEMSIZE
+    0x00000000, // VGT_GSVS_RING_ITEMSIZE
+    0x00000000, // VGT_REUSE_OFF
+    0x00000000, // VGT_VTX_CNT_EN
+    0x00000000, // DB_HTILE_SURFACE
+    0x00000000, // DB_SRESULTS_COMPARE_STATE0
+    0x00000000, // DB_SRESULTS_COMPARE_STATE1
+    0x00000000, // DB_PRELOAD_CONTROL
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_BUFFER_SIZE_0
+    0x00000000, // VGT_STRMOUT_VTX_STRIDE_0
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_0
+    0x00000000, // VGT_STRMOUT_BUFFER_SIZE_1
+    0x00000000, // VGT_STRMOUT_VTX_STRIDE_1
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_1
+    0x00000000, // VGT_STRMOUT_BUFFER_SIZE_2
+    0x00000000, // VGT_STRMOUT_VTX_STRIDE_2
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_2
+    0x00000000, // VGT_STRMOUT_BUFFER_SIZE_3
+    0x00000000, // VGT_STRMOUT_VTX_STRIDE_3
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_BUFFER_OFFSET_3
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_OFFSET
+    0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE
+    0x00000000, // VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE
+    0, // HOLE
+    0x00000000, // VGT_GS_MAX_VERT_OUT
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // VGT_SHADER_STAGES_EN
+    0x00000000, // VGT_LS_HS_CONFIG
+    0x00000000, // VGT_GS_VERT_ITEMSIZE
+    0x00000000, // VGT_GS_VERT_ITEMSIZE_1
+    0x00000000, // VGT_GS_VERT_ITEMSIZE_2
+    0x00000000, // VGT_GS_VERT_ITEMSIZE_3
+    0x00000000, // VGT_TF_PARAM
+    0x00000000, // DB_ALPHA_TO_MASK
+    0, // HOLE
+    0x00000000, // PA_SU_POLY_OFFSET_DB_FMT_CNTL
+    0x00000000, // PA_SU_POLY_OFFSET_CLAMP
+    0x00000000, // PA_SU_POLY_OFFSET_FRONT_SCALE
+    0x00000000, // PA_SU_POLY_OFFSET_FRONT_OFFSET
+    0x00000000, // PA_SU_POLY_OFFSET_BACK_SCALE
+    0x00000000, // PA_SU_POLY_OFFSET_BACK_OFFSET
+    0x00000000, // VGT_GS_INSTANCE_CNT
+    0x00000000, // VGT_STRMOUT_CONFIG
+    0x00000000, // VGT_STRMOUT_BUFFER_CONFIG
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // PA_SC_CENTROID_PRIORITY_0
+    0x00000000, // PA_SC_CENTROID_PRIORITY_1
+    0x00001000, // PA_SC_LINE_CNTL
+    0x00000000, // PA_SC_AA_CONFIG
+    0x00000005, // PA_SU_VTX_CNTL
+    0x3f800000, // PA_CL_GB_VERT_CLIP_ADJ
+    0x3f800000, // PA_CL_GB_VERT_DISC_ADJ
+    0x3f800000, // PA_CL_GB_HORZ_CLIP_ADJ
+    0x3f800000, // PA_CL_GB_HORZ_DISC_ADJ
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2
+    0x00000000, // PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3
+    0xffffffff, // PA_SC_AA_MASK_X0Y0_X1Y0
+    0xffffffff, // PA_SC_AA_MASK_X0Y1_X1Y1
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0, // HOLE
+    0x0000000e, // VGT_VERTEX_REUSE_BLOCK_CNTL
+    0x00000010, // VGT_OUT_DEALLOC_CNTL
+    0x00000000, // CB_COLOR0_BASE
+    0x00000000, // CB_COLOR0_PITCH
+    0x00000000, // CB_COLOR0_SLICE
+    0x00000000, // CB_COLOR0_VIEW
+    0x00000000, // CB_COLOR0_INFO
+    0x00000000, // CB_COLOR0_ATTRIB
+    0, // HOLE
+    0x00000000, // CB_COLOR0_CMASK
+    0x00000000, // CB_COLOR0_CMASK_SLICE
+    0x00000000, // CB_COLOR0_FMASK
+    0x00000000, // CB_COLOR0_FMASK_SLICE
+    0x00000000, // CB_COLOR0_CLEAR_WORD0
+    0x00000000, // CB_COLOR0_CLEAR_WORD1
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR1_BASE
+    0x00000000, // CB_COLOR1_PITCH
+    0x00000000, // CB_COLOR1_SLICE
+    0x00000000, // CB_COLOR1_VIEW
+    0x00000000, // CB_COLOR1_INFO
+    0x00000000, // CB_COLOR1_ATTRIB
+    0, // HOLE
+    0x00000000, // CB_COLOR1_CMASK
+    0x00000000, // CB_COLOR1_CMASK_SLICE
+    0x00000000, // CB_COLOR1_FMASK
+    0x00000000, // CB_COLOR1_FMASK_SLICE
+    0x00000000, // CB_COLOR1_CLEAR_WORD0
+    0x00000000, // CB_COLOR1_CLEAR_WORD1
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR2_BASE
+    0x00000000, // CB_COLOR2_PITCH
+    0x00000000, // CB_COLOR2_SLICE
+    0x00000000, // CB_COLOR2_VIEW
+    0x00000000, // CB_COLOR2_INFO
+    0x00000000, // CB_COLOR2_ATTRIB
+    0, // HOLE
+    0x00000000, // CB_COLOR2_CMASK
+    0x00000000, // CB_COLOR2_CMASK_SLICE
+    0x00000000, // CB_COLOR2_FMASK
+    0x00000000, // CB_COLOR2_FMASK_SLICE
+    0x00000000, // CB_COLOR2_CLEAR_WORD0
+    0x00000000, // CB_COLOR2_CLEAR_WORD1
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR3_BASE
+    0x00000000, // CB_COLOR3_PITCH
+    0x00000000, // CB_COLOR3_SLICE
+    0x00000000, // CB_COLOR3_VIEW
+    0x00000000, // CB_COLOR3_INFO
+    0x00000000, // CB_COLOR3_ATTRIB
+    0, // HOLE
+    0x00000000, // CB_COLOR3_CMASK
+    0x00000000, // CB_COLOR3_CMASK_SLICE
+    0x00000000, // CB_COLOR3_FMASK
+    0x00000000, // CB_COLOR3_FMASK_SLICE
+    0x00000000, // CB_COLOR3_CLEAR_WORD0
+    0x00000000, // CB_COLOR3_CLEAR_WORD1
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR4_BASE
+    0x00000000, // CB_COLOR4_PITCH
+    0x00000000, // CB_COLOR4_SLICE
+    0x00000000, // CB_COLOR4_VIEW
+    0x00000000, // CB_COLOR4_INFO
+    0x00000000, // CB_COLOR4_ATTRIB
+    0, // HOLE
+    0x00000000, // CB_COLOR4_CMASK
+    0x00000000, // CB_COLOR4_CMASK_SLICE
+    0x00000000, // CB_COLOR4_FMASK
+    0x00000000, // CB_COLOR4_FMASK_SLICE
+    0x00000000, // CB_COLOR4_CLEAR_WORD0
+    0x00000000, // CB_COLOR4_CLEAR_WORD1
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR5_BASE
+    0x00000000, // CB_COLOR5_PITCH
+    0x00000000, // CB_COLOR5_SLICE
+    0x00000000, // CB_COLOR5_VIEW
+    0x00000000, // CB_COLOR5_INFO
+    0x00000000, // CB_COLOR5_ATTRIB
+    0, // HOLE
+    0x00000000, // CB_COLOR5_CMASK
+    0x00000000, // CB_COLOR5_CMASK_SLICE
+    0x00000000, // CB_COLOR5_FMASK
+    0x00000000, // CB_COLOR5_FMASK_SLICE
+    0x00000000, // CB_COLOR5_CLEAR_WORD0
+    0x00000000, // CB_COLOR5_CLEAR_WORD1
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR6_BASE
+    0x00000000, // CB_COLOR6_PITCH
+    0x00000000, // CB_COLOR6_SLICE
+    0x00000000, // CB_COLOR6_VIEW
+    0x00000000, // CB_COLOR6_INFO
+    0x00000000, // CB_COLOR6_ATTRIB
+    0, // HOLE
+    0x00000000, // CB_COLOR6_CMASK
+    0x00000000, // CB_COLOR6_CMASK_SLICE
+    0x00000000, // CB_COLOR6_FMASK
+    0x00000000, // CB_COLOR6_FMASK_SLICE
+    0x00000000, // CB_COLOR6_CLEAR_WORD0
+    0x00000000, // CB_COLOR6_CLEAR_WORD1
+    0, // HOLE
+    0, // HOLE
+    0x00000000, // CB_COLOR7_BASE
+    0x00000000, // CB_COLOR7_PITCH
+    0x00000000, // CB_COLOR7_SLICE
+    0x00000000, // CB_COLOR7_VIEW
+    0x00000000, // CB_COLOR7_INFO
+    0x00000000, // CB_COLOR7_ATTRIB
+    0, // HOLE
+    0x00000000, // CB_COLOR7_CMASK
+    0x00000000, // CB_COLOR7_CMASK_SLICE
+    0x00000000, // CB_COLOR7_FMASK
+    0x00000000, // CB_COLOR7_FMASK_SLICE
+    0x00000000, // CB_COLOR7_CLEAR_WORD0
+    0x00000000, // CB_COLOR7_CLEAR_WORD1
+};
+static const struct cs_extent_def ci_SECT_CONTEXT_defs[] =
+{
+    {ci_SECT_CONTEXT_def_1, 0x0000a000, 212 },
+    {ci_SECT_CONTEXT_def_2, 0x0000a0d6, 274 },
+    {ci_SECT_CONTEXT_def_3, 0x0000a1f5, 6 },
+    {ci_SECT_CONTEXT_def_4, 0x0000a200, 157 },
+    {ci_SECT_CONTEXT_def_5, 0x0000a2a0, 2 },
+    {ci_SECT_CONTEXT_def_6, 0x0000a2a3, 1 },
+    {ci_SECT_CONTEXT_def_7, 0x0000a2a5, 233 },
+    { 0, 0, 0 }
+};
+static const struct cs_section_def ci_cs_data[] = {
+    { ci_SECT_CONTEXT_defs, SECT_CONTEXT },
+    { 0, SECT_NONE }
+};
diff --git a/drivers/gpu/drm/radeon/clearstate_evergreen.h b/drivers/gpu/drm/radeon/clearstate_evergreen.h
index 4791d85..63a1ffb 100644
--- a/drivers/gpu/drm/radeon/clearstate_evergreen.h
+++ b/drivers/gpu/drm/radeon/clearstate_evergreen.h
@@ -1072,7 +1072,7 @@
     {SECT_CTRLCONST_def_1, 0x0000f3fc, 2 },
     { 0, 0, 0 }
 };
-struct cs_section_def evergreen_cs_data[] = {
+static const struct cs_section_def evergreen_cs_data[] = {
     { SECT_CONTEXT_defs, SECT_CONTEXT },
     { SECT_CLEAR_defs, SECT_CLEAR },
     { SECT_CTRLCONST_defs, SECT_CTRLCONST },
diff --git a/drivers/gpu/drm/radeon/cypress_dpm.c b/drivers/gpu/drm/radeon/cypress_dpm.c
index 9bcdd17..95a66db 100644
--- a/drivers/gpu/drm/radeon/cypress_dpm.c
+++ b/drivers/gpu/drm/radeon/cypress_dpm.c
@@ -2038,9 +2038,6 @@
 {
 	struct rv7xx_power_info *pi;
 	struct evergreen_power_info *eg_pi;
-	int index = GetIndexIntoMasterTable(DATA, ASIC_InternalSS_Info);
-	uint16_t data_offset, size;
-	uint8_t frev, crev;
 	struct atom_clock_dividers dividers;
 	int ret;
 
@@ -2092,16 +2089,7 @@
 	eg_pi->vddci_control =
 		radeon_atom_is_voltage_gpio(rdev, SET_VOLTAGE_TYPE_ASIC_VDDCI, 0);
 
-	if (atom_parse_data_header(rdev->mode_info.atom_context, index, &size,
-                                   &frev, &crev, &data_offset)) {
-		pi->sclk_ss = true;
-		pi->mclk_ss = true;
-		pi->dynamic_ss = true;
-	} else {
-		pi->sclk_ss = false;
-		pi->mclk_ss = false;
-		pi->dynamic_ss = true;
-	}
+	rv770_get_engine_memory_ss(rdev);
 
 	pi->asi = RV770_ASI_DFLT;
 	pi->pasi = CYPRESS_HASI_DFLT;
@@ -2122,8 +2110,7 @@
 
 	pi->dynamic_pcie_gen2 = true;
 
-	if (pi->gfx_clock_gating &&
-	    (rdev->pm.int_thermal_type != THERMAL_TYPE_NONE))
+	if (rdev->pm.int_thermal_type != THERMAL_TYPE_NONE)
 		pi->thermal_protection = true;
 	else
 		pi->thermal_protection = false;
@@ -2179,7 +2166,8 @@
 {
 	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
 	u32 vblank_time = r600_dpm_get_vblank_time(rdev);
-	u32 switch_limit = pi->mem_gddr5 ? 450 : 300;
+	/* we never hit the non-gddr5 limit so disable it */
+	u32 switch_limit = pi->mem_gddr5 ? 450 : 0;
 
 	if (vblank_time < switch_limit)
 		return true;
diff --git a/drivers/gpu/drm/radeon/dce6_afmt.c b/drivers/gpu/drm/radeon/dce6_afmt.c
new file mode 100644
index 0000000..8953255e
--- /dev/null
+++ b/drivers/gpu/drm/radeon/dce6_afmt.c
@@ -0,0 +1,278 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/hdmi.h>
+#include <drm/drmP.h>
+#include "radeon.h"
+#include "sid.h"
+
+static u32 dce6_endpoint_rreg(struct radeon_device *rdev,
+			      u32 block_offset, u32 reg)
+{
+	u32 r;
+
+	WREG32(AZ_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
+	r = RREG32(AZ_F0_CODEC_ENDPOINT_DATA + block_offset);
+	return r;
+}
+
+static void dce6_endpoint_wreg(struct radeon_device *rdev,
+			       u32 block_offset, u32 reg, u32 v)
+{
+	if (ASIC_IS_DCE8(rdev))
+		WREG32(AZ_F0_CODEC_ENDPOINT_INDEX + block_offset, reg);
+	else
+		WREG32(AZ_F0_CODEC_ENDPOINT_INDEX + block_offset,
+		       AZ_ENDPOINT_REG_WRITE_EN | AZ_ENDPOINT_REG_INDEX(reg));
+	WREG32(AZ_F0_CODEC_ENDPOINT_DATA + block_offset, v);
+}
+
+#define RREG32_ENDPOINT(block, reg) dce6_endpoint_rreg(rdev, (block), (reg))
+#define WREG32_ENDPOINT(block, reg, v) dce6_endpoint_wreg(rdev, (block), (reg), (v))
+
+
+static void dce6_afmt_get_connected_pins(struct radeon_device *rdev)
+{
+	int i;
+	u32 offset, tmp;
+
+	for (i = 0; i < rdev->audio.num_pins; i++) {
+		offset = rdev->audio.pin[i].offset;
+		tmp = RREG32_ENDPOINT(offset,
+				      AZ_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT);
+		if (((tmp & PORT_CONNECTIVITY_MASK) >> PORT_CONNECTIVITY_SHIFT) == 1)
+			rdev->audio.pin[i].connected = false;
+		else
+			rdev->audio.pin[i].connected = true;
+	}
+}
+
+struct r600_audio_pin *dce6_audio_get_pin(struct radeon_device *rdev)
+{
+	int i;
+
+	dce6_afmt_get_connected_pins(rdev);
+
+	for (i = 0; i < rdev->audio.num_pins; i++) {
+		if (rdev->audio.pin[i].connected)
+			return &rdev->audio.pin[i];
+	}
+	DRM_ERROR("No connected audio pins found!\n");
+	return NULL;
+}
+
+void dce6_afmt_select_pin(struct drm_encoder *encoder)
+{
+	struct radeon_device *rdev = encoder->dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
+	u32 offset = dig->afmt->offset;
+	u32 id = dig->afmt->pin->id;
+
+	if (!dig->afmt->pin)
+		return;
+
+	WREG32(AFMT_AUDIO_SRC_CONTROL + offset, AFMT_AUDIO_SRC_SELECT(id));
+}
+
+void dce6_afmt_write_speaker_allocation(struct drm_encoder *encoder)
+{
+	struct radeon_device *rdev = encoder->dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
+	struct drm_connector *connector;
+	struct radeon_connector *radeon_connector = NULL;
+	u32 offset, tmp;
+	u8 *sadb;
+	int sad_count;
+
+	if (!dig->afmt->pin)
+		return;
+
+	offset = dig->afmt->pin->offset;
+
+	list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+		if (connector->encoder == encoder)
+			radeon_connector = to_radeon_connector(connector);
+	}
+
+	if (!radeon_connector) {
+		DRM_ERROR("Couldn't find encoder's connector\n");
+		return;
+	}
+
+	sad_count = drm_edid_to_speaker_allocation(radeon_connector->edid, &sadb);
+	if (sad_count < 0) {
+		DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
+		return;
+	}
+
+	/* program the speaker allocation */
+	tmp = RREG32_ENDPOINT(offset, AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER);
+	tmp &= ~(DP_CONNECTION | SPEAKER_ALLOCATION_MASK);
+	/* set HDMI mode */
+	tmp |= HDMI_CONNECTION;
+	if (sad_count)
+		tmp |= SPEAKER_ALLOCATION(sadb[0]);
+	else
+		tmp |= SPEAKER_ALLOCATION(5); /* stereo */
+	WREG32_ENDPOINT(offset, AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp);
+
+	kfree(sadb);
+}
+
+void dce6_afmt_write_sad_regs(struct drm_encoder *encoder)
+{
+	struct radeon_device *rdev = encoder->dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
+	u32 offset;
+	struct drm_connector *connector;
+	struct radeon_connector *radeon_connector = NULL;
+	struct cea_sad *sads;
+	int i, sad_count;
+
+	static const u16 eld_reg_to_type[][2] = {
+		{ AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, HDMI_AUDIO_CODING_TYPE_PCM },
+		{ AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1, HDMI_AUDIO_CODING_TYPE_AC3 },
+		{ AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2, HDMI_AUDIO_CODING_TYPE_MPEG1 },
+		{ AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3, HDMI_AUDIO_CODING_TYPE_MP3 },
+		{ AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4, HDMI_AUDIO_CODING_TYPE_MPEG2 },
+		{ AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5, HDMI_AUDIO_CODING_TYPE_AAC_LC },
+		{ AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6, HDMI_AUDIO_CODING_TYPE_DTS },
+		{ AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7, HDMI_AUDIO_CODING_TYPE_ATRAC },
+		{ AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9, HDMI_AUDIO_CODING_TYPE_EAC3 },
+		{ AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10, HDMI_AUDIO_CODING_TYPE_DTS_HD },
+		{ AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11, HDMI_AUDIO_CODING_TYPE_MLP },
+		{ AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO },
+	};
+
+	if (!dig->afmt->pin)
+		return;
+
+	offset = dig->afmt->pin->offset;
+
+	list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+		if (connector->encoder == encoder)
+			radeon_connector = to_radeon_connector(connector);
+	}
+
+	if (!radeon_connector) {
+		DRM_ERROR("Couldn't find encoder's connector\n");
+		return;
+	}
+
+	sad_count = drm_edid_to_sad(radeon_connector->edid, &sads);
+	if (sad_count < 0) {
+		DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
+		return;
+	}
+	BUG_ON(!sads);
+
+	for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
+		u32 value = 0;
+		int j;
+
+		for (j = 0; j < sad_count; j++) {
+			struct cea_sad *sad = &sads[j];
+
+			if (sad->format == eld_reg_to_type[i][1]) {
+				value = MAX_CHANNELS(sad->channels) |
+					DESCRIPTOR_BYTE_2(sad->byte2) |
+					SUPPORTED_FREQUENCIES(sad->freq);
+				if (sad->format == HDMI_AUDIO_CODING_TYPE_PCM)
+					value |= SUPPORTED_FREQUENCIES_STEREO(sad->freq);
+				break;
+			}
+		}
+		WREG32_ENDPOINT(offset, eld_reg_to_type[i][0], value);
+	}
+
+	kfree(sads);
+}
+
+static int dce6_audio_chipset_supported(struct radeon_device *rdev)
+{
+	return !ASIC_IS_NODCE(rdev);
+}
+
+static void dce6_audio_enable(struct radeon_device *rdev,
+			      struct r600_audio_pin *pin,
+			      bool enable)
+{
+	WREG32_ENDPOINT(pin->offset, AZ_F0_CODEC_PIN_CONTROL_HOTPLUG_CONTROL,
+			AUDIO_ENABLED);
+	DRM_INFO("%s audio %d support\n", enable ? "Enabling" : "Disabling", pin->id);
+}
+
+static const u32 pin_offsets[7] =
+{
+	(0x5e00 - 0x5e00),
+	(0x5e18 - 0x5e00),
+	(0x5e30 - 0x5e00),
+	(0x5e48 - 0x5e00),
+	(0x5e60 - 0x5e00),
+	(0x5e78 - 0x5e00),
+	(0x5e90 - 0x5e00),
+};
+
+int dce6_audio_init(struct radeon_device *rdev)
+{
+	int i;
+
+	if (!radeon_audio || !dce6_audio_chipset_supported(rdev))
+		return 0;
+
+	rdev->audio.enabled = true;
+
+	if (ASIC_IS_DCE8(rdev))
+		rdev->audio.num_pins = 7;
+	else
+		rdev->audio.num_pins = 6;
+
+	for (i = 0; i < rdev->audio.num_pins; i++) {
+		rdev->audio.pin[i].channels = -1;
+		rdev->audio.pin[i].rate = -1;
+		rdev->audio.pin[i].bits_per_sample = -1;
+		rdev->audio.pin[i].status_bits = 0;
+		rdev->audio.pin[i].category_code = 0;
+		rdev->audio.pin[i].connected = false;
+		rdev->audio.pin[i].offset = pin_offsets[i];
+		rdev->audio.pin[i].id = i;
+		dce6_audio_enable(rdev, &rdev->audio.pin[i], true);
+	}
+
+	return 0;
+}
+
+void dce6_audio_fini(struct radeon_device *rdev)
+{
+	int i;
+
+	if (!rdev->audio.enabled)
+		return;
+
+	for (i = 0; i < rdev->audio.num_pins; i++)
+		dce6_audio_enable(rdev, &rdev->audio.pin[i], false);
+
+	rdev->audio.enabled = false;
+}
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 038dcac..555164e 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -47,7 +47,7 @@
 
 #include "clearstate_evergreen.h"
 
-static u32 sumo_rlc_save_restore_register_list[] =
+static const u32 sumo_rlc_save_restore_register_list[] =
 {
 	0x98fc,
 	0x9830,
@@ -131,7 +131,6 @@
 	0x9150,
 	0x802c,
 };
-static u32 sumo_rlc_save_restore_register_list_size = ARRAY_SIZE(sumo_rlc_save_restore_register_list);
 
 static void evergreen_gpu_init(struct radeon_device *rdev);
 void evergreen_fini(struct radeon_device *rdev);
@@ -141,6 +140,12 @@
 				     int ring, u32 cp_int_cntl);
 extern void cayman_vm_decode_fault(struct radeon_device *rdev,
 				   u32 status, u32 addr);
+void cik_init_cp_pg_table(struct radeon_device *rdev);
+
+extern u32 si_get_csb_size(struct radeon_device *rdev);
+extern void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer);
+extern u32 cik_get_csb_size(struct radeon_device *rdev);
+extern void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer);
 
 static const u32 evergreen_golden_registers[] =
 {
@@ -1807,7 +1812,8 @@
 					struct drm_display_mode *mode,
 					struct drm_display_mode *other_mode)
 {
-	u32 tmp;
+	u32 tmp, buffer_alloc, i;
+	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
 	/*
 	 * Line Buffer Setup
 	 * There are 3 line buffers, each one shared by 2 display controllers.
@@ -1830,18 +1836,34 @@
 	 * non-linked crtcs for maximum line buffer allocation.
 	 */
 	if (radeon_crtc->base.enabled && mode) {
-		if (other_mode)
+		if (other_mode) {
 			tmp = 0; /* 1/2 */
-		else
+			buffer_alloc = 1;
+		} else {
 			tmp = 2; /* whole */
-	} else
+			buffer_alloc = 2;
+		}
+	} else {
 		tmp = 0;
+		buffer_alloc = 0;
+	}
 
 	/* second controller of the pair uses second half of the lb */
 	if (radeon_crtc->crtc_id % 2)
 		tmp += 4;
 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset, tmp);
 
+	if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE5(rdev)) {
+		WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
+		       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
+		for (i = 0; i < rdev->usec_timeout; i++) {
+			if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
+			    DMIF_BUFFERS_ALLOCATED_COMPLETED)
+				break;
+			udelay(1);
+		}
+	}
+
 	if (radeon_crtc->base.enabled && mode) {
 		switch (tmp) {
 		case 0:
@@ -2881,8 +2903,8 @@
 	RREG32(GRBM_SOFT_RESET);
 
 	/* Set ring buffer size */
-	rb_bufsz = drm_order(ring->ring_size / 8);
-	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
+	rb_bufsz = order_base_2(ring->ring_size / 8);
+	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
 #ifdef __BIG_ENDIAN
 	tmp |= BUF_SWAP_32BIT;
 #endif
@@ -3613,7 +3635,7 @@
 	return true;
 }
 
-static u32 evergreen_gpu_check_soft_reset(struct radeon_device *rdev)
+u32 evergreen_gpu_check_soft_reset(struct radeon_device *rdev)
 {
 	u32 reset_mask = 0;
 	u32 tmp;
@@ -3839,28 +3861,6 @@
 	return radeon_ring_test_lockup(rdev, ring);
 }
 
-/**
- * evergreen_dma_is_lockup - Check if the DMA engine is locked up
- *
- * @rdev: radeon_device pointer
- * @ring: radeon_ring structure holding ring information
- *
- * Check if the async DMA engine is locked up.
- * Returns true if the engine appears to be locked up, false if not.
- */
-bool evergreen_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
-{
-	u32 reset_mask = evergreen_gpu_check_soft_reset(rdev);
-
-	if (!(reset_mask & RADEON_RESET_DMA)) {
-		radeon_ring_lockup_update(ring);
-		return false;
-	}
-	/* force ring activities */
-	radeon_ring_force_activity(rdev, ring);
-	return radeon_ring_test_lockup(rdev, ring);
-}
-
 /*
  * RLC
  */
@@ -3894,147 +3894,231 @@
 		radeon_bo_unref(&rdev->rlc.clear_state_obj);
 		rdev->rlc.clear_state_obj = NULL;
 	}
+
+	/* clear state block */
+	if (rdev->rlc.cp_table_obj) {
+		r = radeon_bo_reserve(rdev->rlc.cp_table_obj, false);
+		if (unlikely(r != 0))
+			dev_warn(rdev->dev, "(%d) reserve RLC cp table bo failed\n", r);
+		radeon_bo_unpin(rdev->rlc.cp_table_obj);
+		radeon_bo_unreserve(rdev->rlc.cp_table_obj);
+
+		radeon_bo_unref(&rdev->rlc.cp_table_obj);
+		rdev->rlc.cp_table_obj = NULL;
+	}
 }
 
+#define CP_ME_TABLE_SIZE    96
+
 int sumo_rlc_init(struct radeon_device *rdev)
 {
-	u32 *src_ptr;
+	const u32 *src_ptr;
 	volatile u32 *dst_ptr;
 	u32 dws, data, i, j, k, reg_num;
-	u32 reg_list_num, reg_list_hdr_blk_index, reg_list_blk_index;
+	u32 reg_list_num, reg_list_hdr_blk_index, reg_list_blk_index = 0;
 	u64 reg_list_mc_addr;
-	struct cs_section_def *cs_data;
+	const struct cs_section_def *cs_data;
 	int r;
 
 	src_ptr = rdev->rlc.reg_list;
 	dws = rdev->rlc.reg_list_size;
+	if (rdev->family >= CHIP_BONAIRE) {
+		dws += (5 * 16) + 48 + 48 + 64;
+	}
 	cs_data = rdev->rlc.cs_data;
 
-	/* save restore block */
-	if (rdev->rlc.save_restore_obj == NULL) {
-		r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
-				     RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.save_restore_obj);
-		if (r) {
-			dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
-			return r;
+	if (src_ptr) {
+		/* save restore block */
+		if (rdev->rlc.save_restore_obj == NULL) {
+			r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
+					     RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.save_restore_obj);
+			if (r) {
+				dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
+				return r;
+			}
 		}
-	}
 
-	r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
-	if (unlikely(r != 0)) {
-		sumo_rlc_fini(rdev);
-		return r;
-	}
-	r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
-			  &rdev->rlc.save_restore_gpu_addr);
-	if (r) {
-		radeon_bo_unreserve(rdev->rlc.save_restore_obj);
-		dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
-		sumo_rlc_fini(rdev);
-		return r;
-	}
-	r = radeon_bo_kmap(rdev->rlc.save_restore_obj, (void **)&rdev->rlc.sr_ptr);
-	if (r) {
-		dev_warn(rdev->dev, "(%d) map RLC sr bo failed\n", r);
-		sumo_rlc_fini(rdev);
-		return r;
-	}
-	/* write the sr buffer */
-	dst_ptr = rdev->rlc.sr_ptr;
-	/* format:
-	 * dw0: (reg2 << 16) | reg1
-	 * dw1: reg1 save space
-	 * dw2: reg2 save space
-	 */
-	for (i = 0; i < dws; i++) {
-		data = src_ptr[i] >> 2;
-		i++;
-		if (i < dws)
-			data |= (src_ptr[i] >> 2) << 16;
-		j = (((i - 1) * 3) / 2);
-		dst_ptr[j] = data;
-	}
-	j = ((i * 3) / 2);
-	dst_ptr[j] = RLC_SAVE_RESTORE_LIST_END_MARKER;
-
-	radeon_bo_kunmap(rdev->rlc.save_restore_obj);
-	radeon_bo_unreserve(rdev->rlc.save_restore_obj);
-
-	/* clear state block */
-	reg_list_num = 0;
-	dws = 0;
-	for (i = 0; cs_data[i].section != NULL; i++) {
-		for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
-			reg_list_num++;
-			dws += cs_data[i].section[j].reg_count;
-		}
-	}
-	reg_list_blk_index = (3 * reg_list_num + 2);
-	dws += reg_list_blk_index;
-
-	if (rdev->rlc.clear_state_obj == NULL) {
-		r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
-				     RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.clear_state_obj);
-		if (r) {
-			dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
+		r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
+		if (unlikely(r != 0)) {
 			sumo_rlc_fini(rdev);
 			return r;
 		}
-	}
-	r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
-	if (unlikely(r != 0)) {
-		sumo_rlc_fini(rdev);
-		return r;
-	}
-	r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
-			  &rdev->rlc.clear_state_gpu_addr);
-	if (r) {
-
-		radeon_bo_unreserve(rdev->rlc.clear_state_obj);
-		dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
-		sumo_rlc_fini(rdev);
-		return r;
-	}
-	r = radeon_bo_kmap(rdev->rlc.clear_state_obj, (void **)&rdev->rlc.cs_ptr);
-	if (r) {
-		dev_warn(rdev->dev, "(%d) map RLC c bo failed\n", r);
-		sumo_rlc_fini(rdev);
-		return r;
-	}
-	/* set up the cs buffer */
-	dst_ptr = rdev->rlc.cs_ptr;
-	reg_list_hdr_blk_index = 0;
-	reg_list_mc_addr = rdev->rlc.clear_state_gpu_addr + (reg_list_blk_index * 4);
-	data = upper_32_bits(reg_list_mc_addr);
-	dst_ptr[reg_list_hdr_blk_index] = data;
-	reg_list_hdr_blk_index++;
-	for (i = 0; cs_data[i].section != NULL; i++) {
-		for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
-			reg_num = cs_data[i].section[j].reg_count;
-			data = reg_list_mc_addr & 0xffffffff;
-			dst_ptr[reg_list_hdr_blk_index] = data;
-			reg_list_hdr_blk_index++;
-
-			data = (cs_data[i].section[j].reg_index * 4) & 0xffffffff;
-			dst_ptr[reg_list_hdr_blk_index] = data;
-			reg_list_hdr_blk_index++;
-
-			data = 0x08000000 | (reg_num * 4);
-			dst_ptr[reg_list_hdr_blk_index] = data;
-			reg_list_hdr_blk_index++;
-
-			for (k = 0; k < reg_num; k++) {
-				data = cs_data[i].section[j].extent[k];
-				dst_ptr[reg_list_blk_index + k] = data;
-			}
-			reg_list_mc_addr += reg_num * 4;
-			reg_list_blk_index += reg_num;
+		r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
+				  &rdev->rlc.save_restore_gpu_addr);
+		if (r) {
+			radeon_bo_unreserve(rdev->rlc.save_restore_obj);
+			dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
+			sumo_rlc_fini(rdev);
+			return r;
 		}
-	}
-	dst_ptr[reg_list_hdr_blk_index] = RLC_CLEAR_STATE_END_MARKER;
 
-	radeon_bo_kunmap(rdev->rlc.clear_state_obj);
-	radeon_bo_unreserve(rdev->rlc.clear_state_obj);
+		r = radeon_bo_kmap(rdev->rlc.save_restore_obj, (void **)&rdev->rlc.sr_ptr);
+		if (r) {
+			dev_warn(rdev->dev, "(%d) map RLC sr bo failed\n", r);
+			sumo_rlc_fini(rdev);
+			return r;
+		}
+		/* write the sr buffer */
+		dst_ptr = rdev->rlc.sr_ptr;
+		if (rdev->family >= CHIP_TAHITI) {
+			/* SI */
+			for (i = 0; i < rdev->rlc.reg_list_size; i++)
+				dst_ptr[i] = src_ptr[i];
+		} else {
+			/* ON/LN/TN */
+			/* format:
+			 * dw0: (reg2 << 16) | reg1
+			 * dw1: reg1 save space
+			 * dw2: reg2 save space
+			 */
+			for (i = 0; i < dws; i++) {
+				data = src_ptr[i] >> 2;
+				i++;
+				if (i < dws)
+					data |= (src_ptr[i] >> 2) << 16;
+				j = (((i - 1) * 3) / 2);
+				dst_ptr[j] = data;
+			}
+			j = ((i * 3) / 2);
+			dst_ptr[j] = RLC_SAVE_RESTORE_LIST_END_MARKER;
+		}
+		radeon_bo_kunmap(rdev->rlc.save_restore_obj);
+		radeon_bo_unreserve(rdev->rlc.save_restore_obj);
+	}
+
+	if (cs_data) {
+		/* clear state block */
+		if (rdev->family >= CHIP_BONAIRE) {
+			rdev->rlc.clear_state_size = dws = cik_get_csb_size(rdev);
+		} else if (rdev->family >= CHIP_TAHITI) {
+			rdev->rlc.clear_state_size = si_get_csb_size(rdev);
+			dws = rdev->rlc.clear_state_size + (256 / 4);
+		} else {
+			reg_list_num = 0;
+			dws = 0;
+			for (i = 0; cs_data[i].section != NULL; i++) {
+				for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
+					reg_list_num++;
+					dws += cs_data[i].section[j].reg_count;
+				}
+			}
+			reg_list_blk_index = (3 * reg_list_num + 2);
+			dws += reg_list_blk_index;
+			rdev->rlc.clear_state_size = dws;
+		}
+
+		if (rdev->rlc.clear_state_obj == NULL) {
+			r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
+					     RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.clear_state_obj);
+			if (r) {
+				dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
+				sumo_rlc_fini(rdev);
+				return r;
+			}
+		}
+		r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
+		if (unlikely(r != 0)) {
+			sumo_rlc_fini(rdev);
+			return r;
+		}
+		r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
+				  &rdev->rlc.clear_state_gpu_addr);
+		if (r) {
+			radeon_bo_unreserve(rdev->rlc.clear_state_obj);
+			dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
+			sumo_rlc_fini(rdev);
+			return r;
+		}
+
+		r = radeon_bo_kmap(rdev->rlc.clear_state_obj, (void **)&rdev->rlc.cs_ptr);
+		if (r) {
+			dev_warn(rdev->dev, "(%d) map RLC c bo failed\n", r);
+			sumo_rlc_fini(rdev);
+			return r;
+		}
+		/* set up the cs buffer */
+		dst_ptr = rdev->rlc.cs_ptr;
+		if (rdev->family >= CHIP_BONAIRE) {
+			cik_get_csb_buffer(rdev, dst_ptr);
+		} else if (rdev->family >= CHIP_TAHITI) {
+			reg_list_mc_addr = rdev->rlc.clear_state_gpu_addr + 256;
+			dst_ptr[0] = upper_32_bits(reg_list_mc_addr);
+			dst_ptr[1] = lower_32_bits(reg_list_mc_addr);
+			dst_ptr[2] = rdev->rlc.clear_state_size;
+			si_get_csb_buffer(rdev, &dst_ptr[(256/4)]);
+		} else {
+			reg_list_hdr_blk_index = 0;
+			reg_list_mc_addr = rdev->rlc.clear_state_gpu_addr + (reg_list_blk_index * 4);
+			data = upper_32_bits(reg_list_mc_addr);
+			dst_ptr[reg_list_hdr_blk_index] = data;
+			reg_list_hdr_blk_index++;
+			for (i = 0; cs_data[i].section != NULL; i++) {
+				for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
+					reg_num = cs_data[i].section[j].reg_count;
+					data = reg_list_mc_addr & 0xffffffff;
+					dst_ptr[reg_list_hdr_blk_index] = data;
+					reg_list_hdr_blk_index++;
+
+					data = (cs_data[i].section[j].reg_index * 4) & 0xffffffff;
+					dst_ptr[reg_list_hdr_blk_index] = data;
+					reg_list_hdr_blk_index++;
+
+					data = 0x08000000 | (reg_num * 4);
+					dst_ptr[reg_list_hdr_blk_index] = data;
+					reg_list_hdr_blk_index++;
+
+					for (k = 0; k < reg_num; k++) {
+						data = cs_data[i].section[j].extent[k];
+						dst_ptr[reg_list_blk_index + k] = data;
+					}
+					reg_list_mc_addr += reg_num * 4;
+					reg_list_blk_index += reg_num;
+				}
+			}
+			dst_ptr[reg_list_hdr_blk_index] = RLC_CLEAR_STATE_END_MARKER;
+		}
+		radeon_bo_kunmap(rdev->rlc.clear_state_obj);
+		radeon_bo_unreserve(rdev->rlc.clear_state_obj);
+	}
+
+	if (rdev->rlc.cp_table_size) {
+		if (rdev->rlc.cp_table_obj == NULL) {
+			r = radeon_bo_create(rdev, rdev->rlc.cp_table_size, PAGE_SIZE, true,
+					     RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.cp_table_obj);
+			if (r) {
+				dev_warn(rdev->dev, "(%d) create RLC cp table bo failed\n", r);
+				sumo_rlc_fini(rdev);
+				return r;
+			}
+		}
+
+		r = radeon_bo_reserve(rdev->rlc.cp_table_obj, false);
+		if (unlikely(r != 0)) {
+			dev_warn(rdev->dev, "(%d) reserve RLC cp table bo failed\n", r);
+			sumo_rlc_fini(rdev);
+			return r;
+		}
+		r = radeon_bo_pin(rdev->rlc.cp_table_obj, RADEON_GEM_DOMAIN_VRAM,
+				  &rdev->rlc.cp_table_gpu_addr);
+		if (r) {
+			radeon_bo_unreserve(rdev->rlc.cp_table_obj);
+			dev_warn(rdev->dev, "(%d) pin RLC cp_table bo failed\n", r);
+			sumo_rlc_fini(rdev);
+			return r;
+		}
+		r = radeon_bo_kmap(rdev->rlc.cp_table_obj, (void **)&rdev->rlc.cp_table_ptr);
+		if (r) {
+			dev_warn(rdev->dev, "(%d) map RLC cp table bo failed\n", r);
+			sumo_rlc_fini(rdev);
+			return r;
+		}
+
+		cik_init_cp_pg_table(rdev);
+
+		radeon_bo_kunmap(rdev->rlc.cp_table_obj);
+		radeon_bo_unreserve(rdev->rlc.cp_table_obj);
+
+	}
 
 	return 0;
 }
@@ -4959,143 +5043,6 @@
 	return IRQ_HANDLED;
 }
 
-/**
- * evergreen_dma_fence_ring_emit - emit a fence on the DMA ring
- *
- * @rdev: radeon_device pointer
- * @fence: radeon fence object
- *
- * Add a DMA fence packet to the ring to write
- * the fence seq number and DMA trap packet to generate
- * an interrupt if needed (evergreen-SI).
- */
-void evergreen_dma_fence_ring_emit(struct radeon_device *rdev,
-				   struct radeon_fence *fence)
-{
-	struct radeon_ring *ring = &rdev->ring[fence->ring];
-	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
-	/* write the fence */
-	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0));
-	radeon_ring_write(ring, addr & 0xfffffffc);
-	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
-	radeon_ring_write(ring, fence->seq);
-	/* generate an interrupt */
-	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0));
-	/* flush HDP */
-	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0));
-	radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
-	radeon_ring_write(ring, 1);
-}
-
-/**
- * evergreen_dma_ring_ib_execute - schedule an IB on the DMA engine
- *
- * @rdev: radeon_device pointer
- * @ib: IB object to schedule
- *
- * Schedule an IB in the DMA ring (evergreen).
- */
-void evergreen_dma_ring_ib_execute(struct radeon_device *rdev,
-				   struct radeon_ib *ib)
-{
-	struct radeon_ring *ring = &rdev->ring[ib->ring];
-
-	if (rdev->wb.enabled) {
-		u32 next_rptr = ring->wptr + 4;
-		while ((next_rptr & 7) != 5)
-			next_rptr++;
-		next_rptr += 3;
-		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 1));
-		radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-		radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
-		radeon_ring_write(ring, next_rptr);
-	}
-
-	/* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
-	 * Pad as necessary with NOPs.
-	 */
-	while ((ring->wptr & 7) != 5)
-		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0));
-	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0));
-	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
-	radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
-
-}
-
-/**
- * evergreen_copy_dma - copy pages using the DMA engine
- *
- * @rdev: radeon_device pointer
- * @src_offset: src GPU address
- * @dst_offset: dst GPU address
- * @num_gpu_pages: number of GPU pages to xfer
- * @fence: radeon fence object
- *
- * Copy GPU paging using the DMA engine (evergreen-cayman).
- * Used by the radeon ttm implementation to move pages if
- * registered as the asic copy callback.
- */
-int evergreen_copy_dma(struct radeon_device *rdev,
-		       uint64_t src_offset, uint64_t dst_offset,
-		       unsigned num_gpu_pages,
-		       struct radeon_fence **fence)
-{
-	struct radeon_semaphore *sem = NULL;
-	int ring_index = rdev->asic->copy.dma_ring_index;
-	struct radeon_ring *ring = &rdev->ring[ring_index];
-	u32 size_in_dw, cur_size_in_dw;
-	int i, num_loops;
-	int r = 0;
-
-	r = radeon_semaphore_create(rdev, &sem);
-	if (r) {
-		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		return r;
-	}
-
-	size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
-	num_loops = DIV_ROUND_UP(size_in_dw, 0xfffff);
-	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
-	if (r) {
-		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
-	}
-
-	if (radeon_fence_need_sync(*fence, ring->idx)) {
-		radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
-					    ring->idx);
-		radeon_fence_note_sync(*fence, ring->idx);
-	} else {
-		radeon_semaphore_free(rdev, &sem, NULL);
-	}
-
-	for (i = 0; i < num_loops; i++) {
-		cur_size_in_dw = size_in_dw;
-		if (cur_size_in_dw > 0xFFFFF)
-			cur_size_in_dw = 0xFFFFF;
-		size_in_dw -= cur_size_in_dw;
-		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, cur_size_in_dw));
-		radeon_ring_write(ring, dst_offset & 0xfffffffc);
-		radeon_ring_write(ring, src_offset & 0xfffffffc);
-		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
-		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
-		src_offset += cur_size_in_dw * 4;
-		dst_offset += cur_size_in_dw * 4;
-	}
-
-	r = radeon_fence_emit(rdev, fence, ring->idx);
-	if (r) {
-		radeon_ring_unlock_undo(rdev, ring);
-		return r;
-	}
-
-	radeon_ring_unlock_commit(rdev, ring);
-	radeon_semaphore_free(rdev, &sem, *fence);
-
-	return r;
-}
-
 static int evergreen_startup(struct radeon_device *rdev)
 {
 	struct radeon_ring *ring;
@@ -5106,6 +5053,13 @@
 	/* enable aspm */
 	evergreen_program_aspm(rdev);
 
+	/* scratch needs to be initialized before MC */
+	r = r600_vram_scratch_init(rdev);
+	if (r)
+		return r;
+
+	evergreen_mc_program(rdev);
+
 	if (ASIC_IS_DCE5(rdev)) {
 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) {
 			r = ni_init_microcode(rdev);
@@ -5129,11 +5083,6 @@
 		}
 	}
 
-	r = r600_vram_scratch_init(rdev);
-	if (r)
-		return r;
-
-	evergreen_mc_program(rdev);
 	if (rdev->flags & RADEON_IS_AGP) {
 		evergreen_agp_enable(rdev);
 	} else {
@@ -5143,17 +5092,11 @@
 	}
 	evergreen_gpu_init(rdev);
 
-	r = evergreen_blit_init(rdev);
-	if (r) {
-		r600_blit_fini(rdev);
-		rdev->asic->copy.copy = NULL;
-		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
-	}
-
 	/* allocate rlc buffers */
 	if (rdev->flags & RADEON_IS_IGP) {
 		rdev->rlc.reg_list = sumo_rlc_save_restore_register_list;
-		rdev->rlc.reg_list_size = sumo_rlc_save_restore_register_list_size;
+		rdev->rlc.reg_list_size =
+			(u32)ARRAY_SIZE(sumo_rlc_save_restore_register_list);
 		rdev->rlc.cs_data = evergreen_cs_data;
 		r = sumo_rlc_init(rdev);
 		if (r) {
@@ -5179,7 +5122,7 @@
 		return r;
 	}
 
-	r = rv770_uvd_resume(rdev);
+	r = uvd_v2_2_resume(rdev);
 	if (!r) {
 		r = radeon_fence_driver_start_ring(rdev,
 						   R600_RING_TYPE_UVD_INDEX);
@@ -5208,14 +5151,14 @@
 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
 			     R600_CP_RB_RPTR, R600_CP_RB_WPTR,
-			     0, 0xfffff, RADEON_CP_PACKET2);
+			     RADEON_CP_PACKET2);
 	if (r)
 		return r;
 
 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
 			     DMA_RB_RPTR, DMA_RB_WPTR,
-			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0));
+			     DMA_PACKET(DMA_PACKET_NOP, 0, 0));
 	if (r)
 		return r;
 
@@ -5231,12 +5174,11 @@
 
 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
 	if (ring->ring_size) {
-		r = radeon_ring_init(rdev, ring, ring->ring_size,
-				     R600_WB_UVD_RPTR_OFFSET,
+		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
 				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
-				     0, 0xfffff, RADEON_CP_PACKET2);
+				     RADEON_CP_PACKET2);
 		if (!r)
-			r = r600_uvd_init(rdev);
+			r = uvd_v1_0_init(rdev);
 
 		if (r)
 			DRM_ERROR("radeon: error initializing UVD (%d).\n", r);
@@ -5291,10 +5233,10 @@
 int evergreen_suspend(struct radeon_device *rdev)
 {
 	r600_audio_fini(rdev);
+	uvd_v1_0_fini(rdev);
 	radeon_uvd_suspend(rdev);
 	r700_cp_stop(rdev);
 	r600_dma_stop(rdev);
-	r600_uvd_rbc_stop(rdev);
 	evergreen_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	evergreen_pcie_gart_disable(rdev);
@@ -5419,7 +5361,6 @@
 void evergreen_fini(struct radeon_device *rdev)
 {
 	r600_audio_fini(rdev);
-	r600_blit_fini(rdev);
 	r700_cp_fini(rdev);
 	r600_dma_fini(rdev);
 	r600_irq_fini(rdev);
@@ -5429,6 +5370,7 @@
 	radeon_ib_pool_fini(rdev);
 	radeon_irq_kms_fini(rdev);
 	evergreen_pcie_gart_fini(rdev);
+	uvd_v1_0_fini(rdev);
 	radeon_uvd_fini(rdev);
 	r600_vram_scratch_fini(rdev);
 	radeon_gem_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
deleted file mode 100644
index 057c87b..0000000
--- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c
+++ /dev/null
@@ -1,729 +0,0 @@
-/*
- * Copyright 2010 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Authors:
- *     Alex Deucher <alexander.deucher@amd.com>
- */
-
-#include <drm/drmP.h>
-#include <drm/radeon_drm.h>
-#include "radeon.h"
-
-#include "evergreend.h"
-#include "evergreen_blit_shaders.h"
-#include "cayman_blit_shaders.h"
-#include "radeon_blit_common.h"
-
-/* emits 17 */
-static void
-set_render_target(struct radeon_device *rdev, int format,
-		  int w, int h, u64 gpu_addr)
-{
-	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
-	u32 cb_color_info;
-	int pitch, slice;
-
-	h = ALIGN(h, 8);
-	if (h < 8)
-		h = 8;
-
-	cb_color_info = CB_FORMAT(format) |
-		CB_SOURCE_FORMAT(CB_SF_EXPORT_NORM) |
-		CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
-	pitch = (w / 8) - 1;
-	slice = ((w * h) / 64) - 1;
-
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 15));
-	radeon_ring_write(ring, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_START) >> 2);
-	radeon_ring_write(ring, gpu_addr >> 8);
-	radeon_ring_write(ring, pitch);
-	radeon_ring_write(ring, slice);
-	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, cb_color_info);
-	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, (w - 1) | ((h - 1) << 16));
-	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, 0);
-}
-
-/* emits 5dw */
-static void
-cp_set_surface_sync(struct radeon_device *rdev,
-		    u32 sync_type, u32 size,
-		    u64 mc_addr)
-{
-	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
-	u32 cp_coher_size;
-
-	if (size == 0xffffffff)
-		cp_coher_size = 0xffffffff;
-	else
-		cp_coher_size = ((size + 255) >> 8);
-
-	if (rdev->family >= CHIP_CAYMAN) {
-		/* CP_COHER_CNTL2 has to be set manually when submitting a surface_sync
-		 * to the RB directly. For IBs, the CP programs this as part of the
-		 * surface_sync packet.
-		 */
-		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
-		radeon_ring_write(ring, (0x85e8 - PACKET3_SET_CONFIG_REG_START) >> 2);
-		radeon_ring_write(ring, 0); /* CP_COHER_CNTL2 */
-	}
-	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
-	radeon_ring_write(ring, sync_type);
-	radeon_ring_write(ring, cp_coher_size);
-	radeon_ring_write(ring, mc_addr >> 8);
-	radeon_ring_write(ring, 10); /* poll interval */
-}
-
-/* emits 11dw + 1 surface sync = 16dw */
-static void
-set_shaders(struct radeon_device *rdev)
-{
-	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
-	u64 gpu_addr;
-
-	/* VS */
-	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 3));
-	radeon_ring_write(ring, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_START) >> 2);
-	radeon_ring_write(ring, gpu_addr >> 8);
-	radeon_ring_write(ring, 2);
-	radeon_ring_write(ring, 0);
-
-	/* PS */
-	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset;
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 4));
-	radeon_ring_write(ring, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_START) >> 2);
-	radeon_ring_write(ring, gpu_addr >> 8);
-	radeon_ring_write(ring, 1);
-	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, 2);
-
-	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
-	cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr);
-}
-
-/* emits 10 + 1 sync (5) = 15 */
-static void
-set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
-{
-	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
-	u32 sq_vtx_constant_word2, sq_vtx_constant_word3;
-
-	/* high addr, stride */
-	sq_vtx_constant_word2 = SQ_VTXC_BASE_ADDR_HI(upper_32_bits(gpu_addr) & 0xff) |
-		SQ_VTXC_STRIDE(16);
-#ifdef __BIG_ENDIAN
-	sq_vtx_constant_word2 |= SQ_VTXC_ENDIAN_SWAP(SQ_ENDIAN_8IN32);
-#endif
-	/* xyzw swizzles */
-	sq_vtx_constant_word3 = SQ_VTCX_SEL_X(SQ_SEL_X) |
-		SQ_VTCX_SEL_Y(SQ_SEL_Y) |
-		SQ_VTCX_SEL_Z(SQ_SEL_Z) |
-		SQ_VTCX_SEL_W(SQ_SEL_W);
-
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_RESOURCE, 8));
-	radeon_ring_write(ring, 0x580);
-	radeon_ring_write(ring, gpu_addr & 0xffffffff);
-	radeon_ring_write(ring, 48 - 1); /* size */
-	radeon_ring_write(ring, sq_vtx_constant_word2);
-	radeon_ring_write(ring, sq_vtx_constant_word3);
-	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, S__SQ_CONSTANT_TYPE(SQ_TEX_VTX_VALID_BUFFER));
-
-	if ((rdev->family == CHIP_CEDAR) ||
-	    (rdev->family == CHIP_PALM) ||
-	    (rdev->family == CHIP_SUMO) ||
-	    (rdev->family == CHIP_SUMO2) ||
-	    (rdev->family == CHIP_CAICOS))
-		cp_set_surface_sync(rdev,
-				    PACKET3_TC_ACTION_ENA, 48, gpu_addr);
-	else
-		cp_set_surface_sync(rdev,
-				    PACKET3_VC_ACTION_ENA, 48, gpu_addr);
-
-}
-
-/* emits 10 */
-static void
-set_tex_resource(struct radeon_device *rdev,
-		 int format, int w, int h, int pitch,
-		 u64 gpu_addr, u32 size)
-{
-	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
-	u32 sq_tex_resource_word0, sq_tex_resource_word1;
-	u32 sq_tex_resource_word4, sq_tex_resource_word7;
-
-	if (h < 1)
-		h = 1;
-
-	sq_tex_resource_word0 = TEX_DIM(SQ_TEX_DIM_2D);
-	sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 6) |
-				  ((w - 1) << 18));
-	sq_tex_resource_word1 = ((h - 1) << 0) |
-				TEX_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
-	/* xyzw swizzles */
-	sq_tex_resource_word4 = TEX_DST_SEL_X(SQ_SEL_X) |
-				TEX_DST_SEL_Y(SQ_SEL_Y) |
-				TEX_DST_SEL_Z(SQ_SEL_Z) |
-				TEX_DST_SEL_W(SQ_SEL_W);
-
-	sq_tex_resource_word7 = format |
-		S__SQ_CONSTANT_TYPE(SQ_TEX_VTX_VALID_TEXTURE);
-
-	cp_set_surface_sync(rdev,
-			    PACKET3_TC_ACTION_ENA, size, gpu_addr);
-
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_RESOURCE, 8));
-	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, sq_tex_resource_word0);
-	radeon_ring_write(ring, sq_tex_resource_word1);
-	radeon_ring_write(ring, gpu_addr >> 8);
-	radeon_ring_write(ring, gpu_addr >> 8);
-	radeon_ring_write(ring, sq_tex_resource_word4);
-	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, sq_tex_resource_word7);
-}
-
-/* emits 12 */
-static void
-set_scissors(struct radeon_device *rdev, int x1, int y1,
-	     int x2, int y2)
-{
-	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
-	/* workaround some hw bugs */
-	if (x2 == 0)
-		x1 = 1;
-	if (y2 == 0)
-		y1 = 1;
-	if (rdev->family >= CHIP_CAYMAN) {
-		if ((x2 == 1) && (y2 == 1))
-			x2 = 2;
-	}
-
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
-	radeon_ring_write(ring, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2);
-	radeon_ring_write(ring, (x1 << 0) | (y1 << 16));
-	radeon_ring_write(ring, (x2 << 0) | (y2 << 16));
-
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
-	radeon_ring_write(ring, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2);
-	radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1 << 31));
-	radeon_ring_write(ring, (x2 << 0) | (y2 << 16));
-
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
-	radeon_ring_write(ring, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_START) >> 2);
-	radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1 << 31));
-	radeon_ring_write(ring, (x2 << 0) | (y2 << 16));
-}
-
-/* emits 10 */
-static void
-draw_auto(struct radeon_device *rdev)
-{
-	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
-	radeon_ring_write(ring, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_START) >> 2);
-	radeon_ring_write(ring, DI_PT_RECTLIST);
-
-	radeon_ring_write(ring, PACKET3(PACKET3_INDEX_TYPE, 0));
-	radeon_ring_write(ring,
-#ifdef __BIG_ENDIAN
-			  (2 << 2) |
-#endif
-			  DI_INDEX_SIZE_16_BIT);
-
-	radeon_ring_write(ring, PACKET3(PACKET3_NUM_INSTANCES, 0));
-	radeon_ring_write(ring, 1);
-
-	radeon_ring_write(ring, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1));
-	radeon_ring_write(ring, 3);
-	radeon_ring_write(ring, DI_SRC_SEL_AUTO_INDEX);
-
-}
-
-/* emits 39 */
-static void
-set_default_state(struct radeon_device *rdev)
-{
-	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
-	u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2, sq_gpr_resource_mgmt_3;
-	u32 sq_thread_resource_mgmt, sq_thread_resource_mgmt_2;
-	u32 sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2, sq_stack_resource_mgmt_3;
-	int num_ps_gprs, num_vs_gprs, num_temp_gprs;
-	int num_gs_gprs, num_es_gprs, num_hs_gprs, num_ls_gprs;
-	int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
-	int num_hs_threads, num_ls_threads;
-	int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
-	int num_hs_stack_entries, num_ls_stack_entries;
-	u64 gpu_addr;
-	int dwords;
-
-	/* set clear context state */
-	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
-	radeon_ring_write(ring, 0);
-
-	if (rdev->family < CHIP_CAYMAN) {
-		switch (rdev->family) {
-		case CHIP_CEDAR:
-		default:
-			num_ps_gprs = 93;
-			num_vs_gprs = 46;
-			num_temp_gprs = 4;
-			num_gs_gprs = 31;
-			num_es_gprs = 31;
-			num_hs_gprs = 23;
-			num_ls_gprs = 23;
-			num_ps_threads = 96;
-			num_vs_threads = 16;
-			num_gs_threads = 16;
-			num_es_threads = 16;
-			num_hs_threads = 16;
-			num_ls_threads = 16;
-			num_ps_stack_entries = 42;
-			num_vs_stack_entries = 42;
-			num_gs_stack_entries = 42;
-			num_es_stack_entries = 42;
-			num_hs_stack_entries = 42;
-			num_ls_stack_entries = 42;
-			break;
-		case CHIP_REDWOOD:
-			num_ps_gprs = 93;
-			num_vs_gprs = 46;
-			num_temp_gprs = 4;
-			num_gs_gprs = 31;
-			num_es_gprs = 31;
-			num_hs_gprs = 23;
-			num_ls_gprs = 23;
-			num_ps_threads = 128;
-			num_vs_threads = 20;
-			num_gs_threads = 20;
-			num_es_threads = 20;
-			num_hs_threads = 20;
-			num_ls_threads = 20;
-			num_ps_stack_entries = 42;
-			num_vs_stack_entries = 42;
-			num_gs_stack_entries = 42;
-			num_es_stack_entries = 42;
-			num_hs_stack_entries = 42;
-			num_ls_stack_entries = 42;
-			break;
-		case CHIP_JUNIPER:
-			num_ps_gprs = 93;
-			num_vs_gprs = 46;
-			num_temp_gprs = 4;
-			num_gs_gprs = 31;
-			num_es_gprs = 31;
-			num_hs_gprs = 23;
-			num_ls_gprs = 23;
-			num_ps_threads = 128;
-			num_vs_threads = 20;
-			num_gs_threads = 20;
-			num_es_threads = 20;
-			num_hs_threads = 20;
-			num_ls_threads = 20;
-			num_ps_stack_entries = 85;
-			num_vs_stack_entries = 85;
-			num_gs_stack_entries = 85;
-			num_es_stack_entries = 85;
-			num_hs_stack_entries = 85;
-			num_ls_stack_entries = 85;
-			break;
-		case CHIP_CYPRESS:
-		case CHIP_HEMLOCK:
-			num_ps_gprs = 93;
-			num_vs_gprs = 46;
-			num_temp_gprs = 4;
-			num_gs_gprs = 31;
-			num_es_gprs = 31;
-			num_hs_gprs = 23;
-			num_ls_gprs = 23;
-			num_ps_threads = 128;
-			num_vs_threads = 20;
-			num_gs_threads = 20;
-			num_es_threads = 20;
-			num_hs_threads = 20;
-			num_ls_threads = 20;
-			num_ps_stack_entries = 85;
-			num_vs_stack_entries = 85;
-			num_gs_stack_entries = 85;
-			num_es_stack_entries = 85;
-			num_hs_stack_entries = 85;
-			num_ls_stack_entries = 85;
-			break;
-		case CHIP_PALM:
-			num_ps_gprs = 93;
-			num_vs_gprs = 46;
-			num_temp_gprs = 4;
-			num_gs_gprs = 31;
-			num_es_gprs = 31;
-			num_hs_gprs = 23;
-			num_ls_gprs = 23;
-			num_ps_threads = 96;
-			num_vs_threads = 16;
-			num_gs_threads = 16;
-			num_es_threads = 16;
-			num_hs_threads = 16;
-			num_ls_threads = 16;
-			num_ps_stack_entries = 42;
-			num_vs_stack_entries = 42;
-			num_gs_stack_entries = 42;
-			num_es_stack_entries = 42;
-			num_hs_stack_entries = 42;
-			num_ls_stack_entries = 42;
-			break;
-		case CHIP_SUMO:
-			num_ps_gprs = 93;
-			num_vs_gprs = 46;
-			num_temp_gprs = 4;
-			num_gs_gprs = 31;
-			num_es_gprs = 31;
-			num_hs_gprs = 23;
-			num_ls_gprs = 23;
-			num_ps_threads = 96;
-			num_vs_threads = 25;
-			num_gs_threads = 25;
-			num_es_threads = 25;
-			num_hs_threads = 25;
-			num_ls_threads = 25;
-			num_ps_stack_entries = 42;
-			num_vs_stack_entries = 42;
-			num_gs_stack_entries = 42;
-			num_es_stack_entries = 42;
-			num_hs_stack_entries = 42;
-			num_ls_stack_entries = 42;
-			break;
-		case CHIP_SUMO2:
-			num_ps_gprs = 93;
-			num_vs_gprs = 46;
-			num_temp_gprs = 4;
-			num_gs_gprs = 31;
-			num_es_gprs = 31;
-			num_hs_gprs = 23;
-			num_ls_gprs = 23;
-			num_ps_threads = 96;
-			num_vs_threads = 25;
-			num_gs_threads = 25;
-			num_es_threads = 25;
-			num_hs_threads = 25;
-			num_ls_threads = 25;
-			num_ps_stack_entries = 85;
-			num_vs_stack_entries = 85;
-			num_gs_stack_entries = 85;
-			num_es_stack_entries = 85;
-			num_hs_stack_entries = 85;
-			num_ls_stack_entries = 85;
-			break;
-		case CHIP_BARTS:
-			num_ps_gprs = 93;
-			num_vs_gprs = 46;
-			num_temp_gprs = 4;
-			num_gs_gprs = 31;
-			num_es_gprs = 31;
-			num_hs_gprs = 23;
-			num_ls_gprs = 23;
-			num_ps_threads = 128;
-			num_vs_threads = 20;
-			num_gs_threads = 20;
-			num_es_threads = 20;
-			num_hs_threads = 20;
-			num_ls_threads = 20;
-			num_ps_stack_entries = 85;
-			num_vs_stack_entries = 85;
-			num_gs_stack_entries = 85;
-			num_es_stack_entries = 85;
-			num_hs_stack_entries = 85;
-			num_ls_stack_entries = 85;
-			break;
-		case CHIP_TURKS:
-			num_ps_gprs = 93;
-			num_vs_gprs = 46;
-			num_temp_gprs = 4;
-			num_gs_gprs = 31;
-			num_es_gprs = 31;
-			num_hs_gprs = 23;
-			num_ls_gprs = 23;
-			num_ps_threads = 128;
-			num_vs_threads = 20;
-			num_gs_threads = 20;
-			num_es_threads = 20;
-			num_hs_threads = 20;
-			num_ls_threads = 20;
-			num_ps_stack_entries = 42;
-			num_vs_stack_entries = 42;
-			num_gs_stack_entries = 42;
-			num_es_stack_entries = 42;
-			num_hs_stack_entries = 42;
-			num_ls_stack_entries = 42;
-			break;
-		case CHIP_CAICOS:
-			num_ps_gprs = 93;
-			num_vs_gprs = 46;
-			num_temp_gprs = 4;
-			num_gs_gprs = 31;
-			num_es_gprs = 31;
-			num_hs_gprs = 23;
-			num_ls_gprs = 23;
-			num_ps_threads = 128;
-			num_vs_threads = 10;
-			num_gs_threads = 10;
-			num_es_threads = 10;
-			num_hs_threads = 10;
-			num_ls_threads = 10;
-			num_ps_stack_entries = 42;
-			num_vs_stack_entries = 42;
-			num_gs_stack_entries = 42;
-			num_es_stack_entries = 42;
-			num_hs_stack_entries = 42;
-			num_ls_stack_entries = 42;
-			break;
-		}
-
-		if ((rdev->family == CHIP_CEDAR) ||
-		    (rdev->family == CHIP_PALM) ||
-		    (rdev->family == CHIP_SUMO) ||
-		    (rdev->family == CHIP_SUMO2) ||
-		    (rdev->family == CHIP_CAICOS))
-			sq_config = 0;
-		else
-			sq_config = VC_ENABLE;
-
-		sq_config |= (EXPORT_SRC_C |
-			      CS_PRIO(0) |
-			      LS_PRIO(0) |
-			      HS_PRIO(0) |
-			      PS_PRIO(0) |
-			      VS_PRIO(1) |
-			      GS_PRIO(2) |
-			      ES_PRIO(3));
-
-		sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |
-					  NUM_VS_GPRS(num_vs_gprs) |
-					  NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
-		sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |
-					  NUM_ES_GPRS(num_es_gprs));
-		sq_gpr_resource_mgmt_3 = (NUM_HS_GPRS(num_hs_gprs) |
-					  NUM_LS_GPRS(num_ls_gprs));
-		sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |
-					   NUM_VS_THREADS(num_vs_threads) |
-					   NUM_GS_THREADS(num_gs_threads) |
-					   NUM_ES_THREADS(num_es_threads));
-		sq_thread_resource_mgmt_2 = (NUM_HS_THREADS(num_hs_threads) |
-					     NUM_LS_THREADS(num_ls_threads));
-		sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
-					    NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
-		sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
-					    NUM_ES_STACK_ENTRIES(num_es_stack_entries));
-		sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) |
-					    NUM_LS_STACK_ENTRIES(num_ls_stack_entries));
-
-		/* disable dyn gprs */
-		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
-		radeon_ring_write(ring, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2);
-		radeon_ring_write(ring, 0);
-
-		/* setup LDS */
-		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
-		radeon_ring_write(ring, (SQ_LDS_RESOURCE_MGMT - PACKET3_SET_CONFIG_REG_START) >> 2);
-		radeon_ring_write(ring, 0x10001000);
-
-		/* SQ config */
-		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 11));
-		radeon_ring_write(ring, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2);
-		radeon_ring_write(ring, sq_config);
-		radeon_ring_write(ring, sq_gpr_resource_mgmt_1);
-		radeon_ring_write(ring, sq_gpr_resource_mgmt_2);
-		radeon_ring_write(ring, sq_gpr_resource_mgmt_3);
-		radeon_ring_write(ring, 0);
-		radeon_ring_write(ring, 0);
-		radeon_ring_write(ring, sq_thread_resource_mgmt);
-		radeon_ring_write(ring, sq_thread_resource_mgmt_2);
-		radeon_ring_write(ring, sq_stack_resource_mgmt_1);
-		radeon_ring_write(ring, sq_stack_resource_mgmt_2);
-		radeon_ring_write(ring, sq_stack_resource_mgmt_3);
-	}
-
-	/* CONTEXT_CONTROL */
-	radeon_ring_write(ring, 0xc0012800);
-	radeon_ring_write(ring, 0x80000000);
-	radeon_ring_write(ring, 0x80000000);
-
-	/* SQ_VTX_BASE_VTX_LOC */
-	radeon_ring_write(ring, 0xc0026f00);
-	radeon_ring_write(ring, 0x00000000);
-	radeon_ring_write(ring, 0x00000000);
-	radeon_ring_write(ring, 0x00000000);
-
-	/* SET_SAMPLER */
-	radeon_ring_write(ring, 0xc0036e00);
-	radeon_ring_write(ring, 0x00000000);
-	radeon_ring_write(ring, 0x00000012);
-	radeon_ring_write(ring, 0x00000000);
-	radeon_ring_write(ring, 0x00000000);
-
-	/* set to DX10/11 mode */
-	radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
-	radeon_ring_write(ring, 1);
-
-	/* emit an IB pointing at default state */
-	dwords = ALIGN(rdev->r600_blit.state_len, 0x10);
-	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset;
-	radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
-	radeon_ring_write(ring, gpu_addr & 0xFFFFFFFC);
-	radeon_ring_write(ring, upper_32_bits(gpu_addr) & 0xFF);
-	radeon_ring_write(ring, dwords);
-
-}
-
-int evergreen_blit_init(struct radeon_device *rdev)
-{
-	u32 obj_size;
-	int i, r, dwords;
-	void *ptr;
-	u32 packet2s[16];
-	int num_packet2s = 0;
-
-	rdev->r600_blit.primitives.set_render_target = set_render_target;
-	rdev->r600_blit.primitives.cp_set_surface_sync = cp_set_surface_sync;
-	rdev->r600_blit.primitives.set_shaders = set_shaders;
-	rdev->r600_blit.primitives.set_vtx_resource = set_vtx_resource;
-	rdev->r600_blit.primitives.set_tex_resource = set_tex_resource;
-	rdev->r600_blit.primitives.set_scissors = set_scissors;
-	rdev->r600_blit.primitives.draw_auto = draw_auto;
-	rdev->r600_blit.primitives.set_default_state = set_default_state;
-
-	rdev->r600_blit.ring_size_common = 8; /* sync semaphore */
-	rdev->r600_blit.ring_size_common += 55; /* shaders + def state */
-	rdev->r600_blit.ring_size_common += 16; /* fence emit for VB IB */
-	rdev->r600_blit.ring_size_common += 5; /* done copy */
-	rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */
-
-	rdev->r600_blit.ring_size_per_loop = 74;
-	if (rdev->family >= CHIP_CAYMAN)
-		rdev->r600_blit.ring_size_per_loop += 9; /* additional DWs for surface sync */
-
-	rdev->r600_blit.max_dim = 16384;
-
-	rdev->r600_blit.state_offset = 0;
-
-	if (rdev->family < CHIP_CAYMAN)
-		rdev->r600_blit.state_len = evergreen_default_size;
-	else
-		rdev->r600_blit.state_len = cayman_default_size;
-
-	dwords = rdev->r600_blit.state_len;
-	while (dwords & 0xf) {
-		packet2s[num_packet2s++] = cpu_to_le32(PACKET2(0));
-		dwords++;
-	}
-
-	obj_size = dwords * 4;
-	obj_size = ALIGN(obj_size, 256);
-
-	rdev->r600_blit.vs_offset = obj_size;
-	if (rdev->family < CHIP_CAYMAN)
-		obj_size += evergreen_vs_size * 4;
-	else
-		obj_size += cayman_vs_size * 4;
-	obj_size = ALIGN(obj_size, 256);
-
-	rdev->r600_blit.ps_offset = obj_size;
-	if (rdev->family < CHIP_CAYMAN)
-		obj_size += evergreen_ps_size * 4;
-	else
-		obj_size += cayman_ps_size * 4;
-	obj_size = ALIGN(obj_size, 256);
-
-	/* pin copy shader into vram if not already initialized */
-	if (!rdev->r600_blit.shader_obj) {
-		r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true,
-				     RADEON_GEM_DOMAIN_VRAM,
-				     NULL, &rdev->r600_blit.shader_obj);
-		if (r) {
-			DRM_ERROR("evergreen failed to allocate shader\n");
-			return r;
-		}
-
-		r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
-		if (unlikely(r != 0))
-			return r;
-		r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM,
-				  &rdev->r600_blit.shader_gpu_addr);
-		radeon_bo_unreserve(rdev->r600_blit.shader_obj);
-		if (r) {
-			dev_err(rdev->dev, "(%d) pin blit object failed\n", r);
-			return r;
-		}
-	}
-
-	DRM_DEBUG("evergreen blit allocated bo %08x vs %08x ps %08x\n",
-		  obj_size,
-		  rdev->r600_blit.vs_offset, rdev->r600_blit.ps_offset);
-
-	r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
-	if (unlikely(r != 0))
-		return r;
-	r = radeon_bo_kmap(rdev->r600_blit.shader_obj, &ptr);
-	if (r) {
-		DRM_ERROR("failed to map blit object %d\n", r);
-		return r;
-	}
-
-	if (rdev->family < CHIP_CAYMAN) {
-		memcpy_toio(ptr + rdev->r600_blit.state_offset,
-			    evergreen_default_state, rdev->r600_blit.state_len * 4);
-
-		if (num_packet2s)
-			memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
-				    packet2s, num_packet2s * 4);
-		for (i = 0; i < evergreen_vs_size; i++)
-			*(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(evergreen_vs[i]);
-		for (i = 0; i < evergreen_ps_size; i++)
-			*(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(evergreen_ps[i]);
-	} else {
-		memcpy_toio(ptr + rdev->r600_blit.state_offset,
-			    cayman_default_state, rdev->r600_blit.state_len * 4);
-
-		if (num_packet2s)
-			memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
-				    packet2s, num_packet2s * 4);
-		for (i = 0; i < cayman_vs_size; i++)
-			*(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(cayman_vs[i]);
-		for (i = 0; i < cayman_ps_size; i++)
-			*(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(cayman_ps[i]);
-	}
-	radeon_bo_kunmap(rdev->r600_blit.shader_obj);
-	radeon_bo_unreserve(rdev->r600_blit.shader_obj);
-
-	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
-	return 0;
-}
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_shaders.c b/drivers/gpu/drm/radeon/evergreen_blit_shaders.c
index f85c0af..d433834 100644
--- a/drivers/gpu/drm/radeon/evergreen_blit_shaders.c
+++ b/drivers/gpu/drm/radeon/evergreen_blit_shaders.c
@@ -300,58 +300,4 @@
 	0x00000010, /*  */
 };
 
-const u32 evergreen_vs[] =
-{
-	0x00000004,
-	0x80800400,
-	0x0000a03c,
-	0x95000688,
-	0x00004000,
-	0x15200688,
-	0x00000000,
-	0x00000000,
-	0x3c000000,
-	0x67961001,
-#ifdef __BIG_ENDIAN
-	0x000a0000,
-#else
-	0x00080000,
-#endif
-	0x00000000,
-	0x1c000000,
-	0x67961000,
-#ifdef __BIG_ENDIAN
-	0x00020008,
-#else
-	0x00000008,
-#endif
-	0x00000000,
-};
-
-const u32 evergreen_ps[] =
-{
-	0x00000003,
-	0xa00c0000,
-	0x00000008,
-	0x80400000,
-	0x00000000,
-	0x95200688,
-	0x00380400,
-	0x00146b10,
-	0x00380000,
-	0x20146b10,
-	0x00380400,
-	0x40146b00,
-	0x80380000,
-	0x60146b00,
-	0x00000000,
-	0x00000000,
-	0x00000010,
-	0x000d1000,
-	0xb0800000,
-	0x00000000,
-};
-
-const u32 evergreen_ps_size = ARRAY_SIZE(evergreen_ps);
-const u32 evergreen_vs_size = ARRAY_SIZE(evergreen_vs);
 const u32 evergreen_default_size = ARRAY_SIZE(evergreen_default_state);
diff --git a/drivers/gpu/drm/radeon/evergreen_dma.c b/drivers/gpu/drm/radeon/evergreen_dma.c
new file mode 100644
index 0000000..6a0656d
--- /dev/null
+++ b/drivers/gpu/drm/radeon/evergreen_dma.c
@@ -0,0 +1,190 @@
+/*
+ * Copyright 2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Alex Deucher
+ */
+#include <drm/drmP.h>
+#include "radeon.h"
+#include "radeon_asic.h"
+#include "evergreend.h"
+
+u32 evergreen_gpu_check_soft_reset(struct radeon_device *rdev);
+
+/**
+ * evergreen_dma_fence_ring_emit - emit a fence on the DMA ring
+ *
+ * @rdev: radeon_device pointer
+ * @fence: radeon fence object
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed (evergreen-SI).
+ */
+void evergreen_dma_fence_ring_emit(struct radeon_device *rdev,
+				   struct radeon_fence *fence)
+{
+	struct radeon_ring *ring = &rdev->ring[fence->ring];
+	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
+	/* write the fence */
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0));
+	radeon_ring_write(ring, addr & 0xfffffffc);
+	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
+	radeon_ring_write(ring, fence->seq);
+	/* generate an interrupt */
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0));
+	/* flush HDP */
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0));
+	radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
+	radeon_ring_write(ring, 1);
+}
+
+/**
+ * evergreen_dma_ring_ib_execute - schedule an IB on the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @ib: IB object to schedule
+ *
+ * Schedule an IB in the DMA ring (evergreen).
+ */
+void evergreen_dma_ring_ib_execute(struct radeon_device *rdev,
+				   struct radeon_ib *ib)
+{
+	struct radeon_ring *ring = &rdev->ring[ib->ring];
+
+	if (rdev->wb.enabled) {
+		u32 next_rptr = ring->wptr + 4;
+		while ((next_rptr & 7) != 5)
+			next_rptr++;
+		next_rptr += 3;
+		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 1));
+		radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
+		radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
+		radeon_ring_write(ring, next_rptr);
+	}
+
+	/* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
+	 * Pad as necessary with NOPs.
+	 */
+	while ((ring->wptr & 7) != 5)
+		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0));
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0));
+	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
+	radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
+
+}
+
+/**
+ * evergreen_copy_dma - copy pages using the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @num_gpu_pages: number of GPU pages to xfer
+ * @fence: radeon fence object
+ *
+ * Copy GPU paging using the DMA engine (evergreen-cayman).
+ * Used by the radeon ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+int evergreen_copy_dma(struct radeon_device *rdev,
+		       uint64_t src_offset, uint64_t dst_offset,
+		       unsigned num_gpu_pages,
+		       struct radeon_fence **fence)
+{
+	struct radeon_semaphore *sem = NULL;
+	int ring_index = rdev->asic->copy.dma_ring_index;
+	struct radeon_ring *ring = &rdev->ring[ring_index];
+	u32 size_in_dw, cur_size_in_dw;
+	int i, num_loops;
+	int r = 0;
+
+	r = radeon_semaphore_create(rdev, &sem);
+	if (r) {
+		DRM_ERROR("radeon: moving bo (%d).\n", r);
+		return r;
+	}
+
+	size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
+	num_loops = DIV_ROUND_UP(size_in_dw, 0xfffff);
+	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
+	if (r) {
+		DRM_ERROR("radeon: moving bo (%d).\n", r);
+		radeon_semaphore_free(rdev, &sem, NULL);
+		return r;
+	}
+
+	if (radeon_fence_need_sync(*fence, ring->idx)) {
+		radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
+					    ring->idx);
+		radeon_fence_note_sync(*fence, ring->idx);
+	} else {
+		radeon_semaphore_free(rdev, &sem, NULL);
+	}
+
+	for (i = 0; i < num_loops; i++) {
+		cur_size_in_dw = size_in_dw;
+		if (cur_size_in_dw > 0xFFFFF)
+			cur_size_in_dw = 0xFFFFF;
+		size_in_dw -= cur_size_in_dw;
+		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, cur_size_in_dw));
+		radeon_ring_write(ring, dst_offset & 0xfffffffc);
+		radeon_ring_write(ring, src_offset & 0xfffffffc);
+		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
+		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
+		src_offset += cur_size_in_dw * 4;
+		dst_offset += cur_size_in_dw * 4;
+	}
+
+	r = radeon_fence_emit(rdev, fence, ring->idx);
+	if (r) {
+		radeon_ring_unlock_undo(rdev, ring);
+		return r;
+	}
+
+	radeon_ring_unlock_commit(rdev, ring);
+	radeon_semaphore_free(rdev, &sem, *fence);
+
+	return r;
+}
+
+/**
+ * evergreen_dma_is_lockup - Check if the DMA engine is locked up
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Check if the async DMA engine is locked up.
+ * Returns true if the engine appears to be locked up, false if not.
+ */
+bool evergreen_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+	u32 reset_mask = evergreen_gpu_check_soft_reset(rdev);
+
+	if (!(reset_mask & RADEON_RESET_DMA)) {
+		radeon_ring_lockup_update(ring);
+		return false;
+	}
+	/* force ring activities */
+	radeon_ring_force_activity(rdev, ring);
+	return radeon_ring_test_lockup(rdev, ring);
+}
+
+
diff --git a/drivers/gpu/drm/radeon/evergreen_hdmi.c b/drivers/gpu/drm/radeon/evergreen_hdmi.c
index b0d3fb3..f71ce39 100644
--- a/drivers/gpu/drm/radeon/evergreen_hdmi.c
+++ b/drivers/gpu/drm/radeon/evergreen_hdmi.c
@@ -32,6 +32,10 @@
 #include "evergreend.h"
 #include "atom.h"
 
+extern void dce6_afmt_write_speaker_allocation(struct drm_encoder *encoder);
+extern void dce6_afmt_write_sad_regs(struct drm_encoder *encoder);
+extern void dce6_afmt_select_pin(struct drm_encoder *encoder);
+
 /*
  * update the N and CTS parameters for a given pixel clock rate
  */
@@ -54,6 +58,45 @@
 	WREG32(HDMI_ACR_48_1 + offset, acr.n_48khz);
 }
 
+static void dce4_afmt_write_speaker_allocation(struct drm_encoder *encoder)
+{
+	struct radeon_device *rdev = encoder->dev->dev_private;
+	struct drm_connector *connector;
+	struct radeon_connector *radeon_connector = NULL;
+	u32 tmp;
+	u8 *sadb;
+	int sad_count;
+
+	list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+		if (connector->encoder == encoder)
+			radeon_connector = to_radeon_connector(connector);
+	}
+
+	if (!radeon_connector) {
+		DRM_ERROR("Couldn't find encoder's connector\n");
+		return;
+	}
+
+	sad_count = drm_edid_to_speaker_allocation(radeon_connector->edid, &sadb);
+	if (sad_count < 0) {
+		DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
+		return;
+	}
+
+	/* program the speaker allocation */
+	tmp = RREG32(AZ_F0_CODEC_PIN0_CONTROL_CHANNEL_SPEAKER);
+	tmp &= ~(DP_CONNECTION | SPEAKER_ALLOCATION_MASK);
+	/* set HDMI mode */
+	tmp |= HDMI_CONNECTION;
+	if (sad_count)
+		tmp |= SPEAKER_ALLOCATION(sadb[0]);
+	else
+		tmp |= SPEAKER_ALLOCATION(5); /* stereo */
+	WREG32(AZ_F0_CODEC_PIN0_CONTROL_CHANNEL_SPEAKER, tmp);
+
+	kfree(sadb);
+}
+
 static void evergreen_hdmi_write_sad_regs(struct drm_encoder *encoder)
 {
 	struct radeon_device *rdev = encoder->dev->dev_private;
@@ -148,18 +191,44 @@
 	struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
 	u32 base_rate = 24000;
+	u32 max_ratio = clock / base_rate;
+	u32 dto_phase;
+	u32 dto_modulo = clock;
+	u32 wallclock_ratio;
+	u32 dto_cntl;
 
 	if (!dig || !dig->afmt)
 		return;
 
+	if (ASIC_IS_DCE6(rdev)) {
+		dto_phase = 24 * 1000;
+	} else {
+		if (max_ratio >= 8) {
+			dto_phase = 192 * 1000;
+			wallclock_ratio = 3;
+		} else if (max_ratio >= 4) {
+			dto_phase = 96 * 1000;
+			wallclock_ratio = 2;
+		} else if (max_ratio >= 2) {
+			dto_phase = 48 * 1000;
+			wallclock_ratio = 1;
+		} else {
+			dto_phase = 24 * 1000;
+			wallclock_ratio = 0;
+		}
+		dto_cntl = RREG32(DCCG_AUDIO_DTO0_CNTL) & ~DCCG_AUDIO_DTO_WALLCLOCK_RATIO_MASK;
+		dto_cntl |= DCCG_AUDIO_DTO_WALLCLOCK_RATIO(wallclock_ratio);
+		WREG32(DCCG_AUDIO_DTO0_CNTL, dto_cntl);
+	}
+
 	/* XXX two dtos; generally use dto0 for hdmi */
 	/* Express [24MHz / target pixel clock] as an exact rational
 	 * number (coefficient of two integer numbers.  DCCG_AUDIO_DTOx_PHASE
 	 * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator
 	 */
-	WREG32(DCCG_AUDIO_DTO0_PHASE, base_rate * 100);
-	WREG32(DCCG_AUDIO_DTO0_MODULE, clock * 100);
 	WREG32(DCCG_AUDIO_DTO_SOURCE, DCCG_AUDIO_DTO0_SOURCE_SEL(radeon_crtc->crtc_id));
+	WREG32(DCCG_AUDIO_DTO0_PHASE, dto_phase);
+	WREG32(DCCG_AUDIO_DTO0_MODULE, dto_modulo);
 }
 
 
@@ -238,13 +307,23 @@
 	       AFMT_60958_CS_CHANNEL_NUMBER_6(7) |
 	       AFMT_60958_CS_CHANNEL_NUMBER_7(8));
 
-	/* fglrx sets 0x0001005f | (x & 0x00fc0000) in 0x5f78 here */
+	if (ASIC_IS_DCE6(rdev)) {
+		dce6_afmt_write_speaker_allocation(encoder);
+	} else {
+		dce4_afmt_write_speaker_allocation(encoder);
+	}
 
 	WREG32(AFMT_AUDIO_PACKET_CONTROL2 + offset,
 	       AFMT_AUDIO_CHANNEL_ENABLE(0xff));
 
 	/* fglrx sets 0x40 in 0x5f80 here */
-	evergreen_hdmi_write_sad_regs(encoder);
+
+	if (ASIC_IS_DCE6(rdev)) {
+		dce6_afmt_select_pin(encoder);
+		dce6_afmt_write_sad_regs(encoder);
+	} else {
+		evergreen_hdmi_write_sad_regs(encoder);
+	}
 
 	err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode);
 	if (err < 0) {
@@ -280,6 +359,8 @@
 
 void evergreen_hdmi_enable(struct drm_encoder *encoder, bool enable)
 {
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
 	struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
 
@@ -292,6 +373,15 @@
 	if (!enable && !dig->afmt->enabled)
 		return;
 
+	if (enable) {
+		if (ASIC_IS_DCE6(rdev))
+			dig->afmt->pin = dce6_audio_get_pin(rdev);
+		else
+			dig->afmt->pin = r600_audio_get_pin(rdev);
+	} else {
+		dig->afmt->pin = NULL;
+	}
+
 	dig->afmt->enabled = enable;
 
 	DRM_DEBUG("%sabling HDMI interface @ 0x%04X for encoder 0x%x\n",
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index a7baf67..8768fd6 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -497,6 +497,9 @@
 #define DCCG_AUDIO_DTO0_MODULE            0x05b4
 #define DCCG_AUDIO_DTO0_LOAD              0x05b8
 #define DCCG_AUDIO_DTO0_CNTL              0x05bc
+#       define DCCG_AUDIO_DTO_WALLCLOCK_RATIO(x) (((x) & 7) << 0)
+#       define DCCG_AUDIO_DTO_WALLCLOCK_RATIO_MASK 7
+#       define DCCG_AUDIO_DTO_WALLCLOCK_RATIO_SHIFT 0
 
 #define DCCG_AUDIO_DTO1_PHASE             0x05c0
 #define DCCG_AUDIO_DTO1_MODULE            0x05c4
@@ -711,6 +714,13 @@
 #define AFMT_GENERIC0_7                      0x7138
 
 /* DCE4/5 ELD audio interface */
+#define AZ_F0_CODEC_PIN0_CONTROL_CHANNEL_SPEAKER          0x5f78
+#define		SPEAKER_ALLOCATION(x)			(((x) & 0x7f) << 0)
+#define		SPEAKER_ALLOCATION_MASK			(0x7f << 0)
+#define		SPEAKER_ALLOCATION_SHIFT		0
+#define		HDMI_CONNECTION				(1 << 16)
+#define		DP_CONNECTION				(1 << 17)
+
 #define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR0        0x5f84 /* LPCM */
 #define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR1        0x5f88 /* AC3 */
 #define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR2        0x5f8c /* MPEG1 */
@@ -1150,6 +1160,10 @@
 #       define LATENCY_LOW_WATERMARK(x)                   ((x) << 0)
 #       define LATENCY_HIGH_WATERMARK(x)                  ((x) << 16)
 
+#define	PIPE0_DMIF_BUFFER_CONTROL			  0x0ca0
+#       define DMIF_BUFFERS_ALLOCATED(x)                  ((x) << 0)
+#       define DMIF_BUFFERS_ALLOCATED_COMPLETED           (1 << 4)
+
 #define IH_RB_CNTL                                        0x3e00
 #       define IH_RB_ENABLE                               (1 << 0)
 #       define IH_IB_SIZE(x)                              ((x) << 1) /* log2 */
diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c
new file mode 100644
index 0000000..ecd6080
--- /dev/null
+++ b/drivers/gpu/drm/radeon/kv_dpm.c
@@ -0,0 +1,2645 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "radeon.h"
+#include "cikd.h"
+#include "r600_dpm.h"
+#include "kv_dpm.h"
+#include "radeon_asic.h"
+#include <linux/seq_file.h>
+
+#define KV_MAX_DEEPSLEEP_DIVIDER_ID     5
+#define KV_MINIMUM_ENGINE_CLOCK         800
+#define SMC_RAM_END                     0x40000
+
+static void kv_init_graphics_levels(struct radeon_device *rdev);
+static int kv_calculate_ds_divider(struct radeon_device *rdev);
+static int kv_calculate_nbps_level_settings(struct radeon_device *rdev);
+static int kv_calculate_dpm_settings(struct radeon_device *rdev);
+static void kv_enable_new_levels(struct radeon_device *rdev);
+static void kv_program_nbps_index_settings(struct radeon_device *rdev,
+					   struct radeon_ps *new_rps);
+static int kv_set_enabled_levels(struct radeon_device *rdev);
+static int kv_force_dpm_highest(struct radeon_device *rdev);
+static int kv_force_dpm_lowest(struct radeon_device *rdev);
+static void kv_apply_state_adjust_rules(struct radeon_device *rdev,
+					struct radeon_ps *new_rps,
+					struct radeon_ps *old_rps);
+static int kv_set_thermal_temperature_range(struct radeon_device *rdev,
+					    int min_temp, int max_temp);
+static int kv_init_fps_limits(struct radeon_device *rdev);
+
+void kv_dpm_powergate_uvd(struct radeon_device *rdev, bool gate);
+static void kv_dpm_powergate_vce(struct radeon_device *rdev, bool gate);
+static void kv_dpm_powergate_samu(struct radeon_device *rdev, bool gate);
+static void kv_dpm_powergate_acp(struct radeon_device *rdev, bool gate);
+
+extern void cik_enter_rlc_safe_mode(struct radeon_device *rdev);
+extern void cik_exit_rlc_safe_mode(struct radeon_device *rdev);
+extern void cik_update_cg(struct radeon_device *rdev,
+			  u32 block, bool enable);
+
+static const struct kv_lcac_config_values sx_local_cac_cfg_kv[] =
+{
+	{  0,       4,        1    },
+	{  1,       4,        1    },
+	{  2,       5,        1    },
+	{  3,       4,        2    },
+	{  4,       1,        1    },
+	{  5,       5,        2    },
+	{  6,       6,        1    },
+	{  7,       9,        2    },
+	{ 0xffffffff }
+};
+
+static const struct kv_lcac_config_values mc0_local_cac_cfg_kv[] =
+{
+	{  0,       4,        1    },
+	{ 0xffffffff }
+};
+
+static const struct kv_lcac_config_values mc1_local_cac_cfg_kv[] =
+{
+	{  0,       4,        1    },
+	{ 0xffffffff }
+};
+
+static const struct kv_lcac_config_values mc2_local_cac_cfg_kv[] =
+{
+	{  0,       4,        1    },
+	{ 0xffffffff }
+};
+
+static const struct kv_lcac_config_values mc3_local_cac_cfg_kv[] =
+{
+	{  0,       4,        1    },
+	{ 0xffffffff }
+};
+
+static const struct kv_lcac_config_values cpl_local_cac_cfg_kv[] =
+{
+	{  0,       4,        1    },
+	{  1,       4,        1    },
+	{  2,       5,        1    },
+	{  3,       4,        1    },
+	{  4,       1,        1    },
+	{  5,       5,        1    },
+	{  6,       6,        1    },
+	{  7,       9,        1    },
+	{  8,       4,        1    },
+	{  9,       2,        1    },
+	{  10,      3,        1    },
+	{  11,      6,        1    },
+	{  12,      8,        2    },
+	{  13,      1,        1    },
+	{  14,      2,        1    },
+	{  15,      3,        1    },
+	{  16,      1,        1    },
+	{  17,      4,        1    },
+	{  18,      3,        1    },
+	{  19,      1,        1    },
+	{  20,      8,        1    },
+	{  21,      5,        1    },
+	{  22,      1,        1    },
+	{  23,      1,        1    },
+	{  24,      4,        1    },
+	{  27,      6,        1    },
+	{  28,      1,        1    },
+	{ 0xffffffff }
+};
+
+static const struct kv_lcac_config_reg sx0_cac_config_reg[] =
+{
+	{ 0xc0400d00, 0x003e0000, 17, 0x3fc00000, 22, 0x0001fffe, 1, 0x00000001, 0 }
+};
+
+static const struct kv_lcac_config_reg mc0_cac_config_reg[] =
+{
+	{ 0xc0400d30, 0x003e0000, 17, 0x3fc00000, 22, 0x0001fffe, 1, 0x00000001, 0 }
+};
+
+static const struct kv_lcac_config_reg mc1_cac_config_reg[] =
+{
+	{ 0xc0400d3c, 0x003e0000, 17, 0x3fc00000, 22, 0x0001fffe, 1, 0x00000001, 0 }
+};
+
+static const struct kv_lcac_config_reg mc2_cac_config_reg[] =
+{
+	{ 0xc0400d48, 0x003e0000, 17, 0x3fc00000, 22, 0x0001fffe, 1, 0x00000001, 0 }
+};
+
+static const struct kv_lcac_config_reg mc3_cac_config_reg[] =
+{
+	{ 0xc0400d54, 0x003e0000, 17, 0x3fc00000, 22, 0x0001fffe, 1, 0x00000001, 0 }
+};
+
+static const struct kv_lcac_config_reg cpl_cac_config_reg[] =
+{
+	{ 0xc0400d80, 0x003e0000, 17, 0x3fc00000, 22, 0x0001fffe, 1, 0x00000001, 0 }
+};
+
+static const struct kv_pt_config_reg didt_config_kv[] =
+{
+	{ 0x10, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x10, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x10, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x10, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x11, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x11, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x11, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x11, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x12, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x12, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x12, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x12, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x2, 0x00003fff, 0, 0x4, KV_CONFIGREG_DIDT_IND },
+	{ 0x2, 0x03ff0000, 16, 0x80, KV_CONFIGREG_DIDT_IND },
+	{ 0x2, 0x78000000, 27, 0x3, KV_CONFIGREG_DIDT_IND },
+	{ 0x1, 0x0000ffff, 0, 0x3FFF, KV_CONFIGREG_DIDT_IND },
+	{ 0x1, 0xffff0000, 16, 0x3FFF, KV_CONFIGREG_DIDT_IND },
+	{ 0x0, 0x00000001, 0, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x30, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x30, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x30, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x30, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x31, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x31, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x31, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x31, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x32, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x32, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x32, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x32, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x22, 0x00003fff, 0, 0x4, KV_CONFIGREG_DIDT_IND },
+	{ 0x22, 0x03ff0000, 16, 0x80, KV_CONFIGREG_DIDT_IND },
+	{ 0x22, 0x78000000, 27, 0x3, KV_CONFIGREG_DIDT_IND },
+	{ 0x21, 0x0000ffff, 0, 0x3FFF, KV_CONFIGREG_DIDT_IND },
+	{ 0x21, 0xffff0000, 16, 0x3FFF, KV_CONFIGREG_DIDT_IND },
+	{ 0x20, 0x00000001, 0, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x50, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x50, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x50, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x50, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x51, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x51, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x51, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x51, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x52, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x52, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x52, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x52, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x42, 0x00003fff, 0, 0x4, KV_CONFIGREG_DIDT_IND },
+	{ 0x42, 0x03ff0000, 16, 0x80, KV_CONFIGREG_DIDT_IND },
+	{ 0x42, 0x78000000, 27, 0x3, KV_CONFIGREG_DIDT_IND },
+	{ 0x41, 0x0000ffff, 0, 0x3FFF, KV_CONFIGREG_DIDT_IND },
+	{ 0x41, 0xffff0000, 16, 0x3FFF, KV_CONFIGREG_DIDT_IND },
+	{ 0x40, 0x00000001, 0, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x70, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x70, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x70, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x70, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x71, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x71, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x71, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x71, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x72, 0x000000ff, 0, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x72, 0x0000ff00, 8, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x72, 0x00ff0000, 16, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x72, 0xff000000, 24, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0x62, 0x00003fff, 0, 0x4, KV_CONFIGREG_DIDT_IND },
+	{ 0x62, 0x03ff0000, 16, 0x80, KV_CONFIGREG_DIDT_IND },
+	{ 0x62, 0x78000000, 27, 0x3, KV_CONFIGREG_DIDT_IND },
+	{ 0x61, 0x0000ffff, 0, 0x3FFF, KV_CONFIGREG_DIDT_IND },
+	{ 0x61, 0xffff0000, 16, 0x3FFF, KV_CONFIGREG_DIDT_IND },
+	{ 0x60, 0x00000001, 0, 0x0, KV_CONFIGREG_DIDT_IND },
+	{ 0xFFFFFFFF }
+};
+
+static struct kv_ps *kv_get_ps(struct radeon_ps *rps)
+{
+	struct kv_ps *ps = rps->ps_priv;
+
+	return ps;
+}
+
+static struct kv_power_info *kv_get_pi(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = rdev->pm.dpm.priv;
+
+	return pi;
+}
+
+#if 0
+static void kv_program_local_cac_table(struct radeon_device *rdev,
+				       const struct kv_lcac_config_values *local_cac_table,
+				       const struct kv_lcac_config_reg *local_cac_reg)
+{
+	u32 i, count, data;
+	const struct kv_lcac_config_values *values = local_cac_table;
+
+	while (values->block_id != 0xffffffff) {
+		count = values->signal_id;
+		for (i = 0; i < count; i++) {
+			data = ((values->block_id << local_cac_reg->block_shift) &
+				local_cac_reg->block_mask);
+			data |= ((i << local_cac_reg->signal_shift) &
+				 local_cac_reg->signal_mask);
+			data |= ((values->t << local_cac_reg->t_shift) &
+				 local_cac_reg->t_mask);
+			data |= ((1 << local_cac_reg->enable_shift) &
+				 local_cac_reg->enable_mask);
+			WREG32_SMC(local_cac_reg->cntl, data);
+		}
+		values++;
+	}
+}
+#endif
+
+static int kv_program_pt_config_registers(struct radeon_device *rdev,
+					  const struct kv_pt_config_reg *cac_config_regs)
+{
+	const struct kv_pt_config_reg *config_regs = cac_config_regs;
+	u32 data;
+	u32 cache = 0;
+
+	if (config_regs == NULL)
+		return -EINVAL;
+
+	while (config_regs->offset != 0xFFFFFFFF) {
+		if (config_regs->type == KV_CONFIGREG_CACHE) {
+			cache |= ((config_regs->value << config_regs->shift) & config_regs->mask);
+		} else {
+			switch (config_regs->type) {
+			case KV_CONFIGREG_SMC_IND:
+				data = RREG32_SMC(config_regs->offset);
+				break;
+			case KV_CONFIGREG_DIDT_IND:
+				data = RREG32_DIDT(config_regs->offset);
+				break;
+			default:
+				data = RREG32(config_regs->offset << 2);
+				break;
+			}
+
+			data &= ~config_regs->mask;
+			data |= ((config_regs->value << config_regs->shift) & config_regs->mask);
+			data |= cache;
+			cache = 0;
+
+			switch (config_regs->type) {
+			case KV_CONFIGREG_SMC_IND:
+				WREG32_SMC(config_regs->offset, data);
+				break;
+			case KV_CONFIGREG_DIDT_IND:
+				WREG32_DIDT(config_regs->offset, data);
+				break;
+			default:
+				WREG32(config_regs->offset << 2, data);
+				break;
+			}
+		}
+		config_regs++;
+	}
+
+	return 0;
+}
+
+static void kv_do_enable_didt(struct radeon_device *rdev, bool enable)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	u32 data;
+
+	if (pi->caps_sq_ramping) {
+		data = RREG32_DIDT(DIDT_SQ_CTRL0);
+		if (enable)
+			data |= DIDT_CTRL_EN;
+		else
+			data &= ~DIDT_CTRL_EN;
+		WREG32_DIDT(DIDT_SQ_CTRL0, data);
+	}
+
+	if (pi->caps_db_ramping) {
+		data = RREG32_DIDT(DIDT_DB_CTRL0);
+		if (enable)
+			data |= DIDT_CTRL_EN;
+		else
+			data &= ~DIDT_CTRL_EN;
+		WREG32_DIDT(DIDT_DB_CTRL0, data);
+	}
+
+	if (pi->caps_td_ramping) {
+		data = RREG32_DIDT(DIDT_TD_CTRL0);
+		if (enable)
+			data |= DIDT_CTRL_EN;
+		else
+			data &= ~DIDT_CTRL_EN;
+		WREG32_DIDT(DIDT_TD_CTRL0, data);
+	}
+
+	if (pi->caps_tcp_ramping) {
+		data = RREG32_DIDT(DIDT_TCP_CTRL0);
+		if (enable)
+			data |= DIDT_CTRL_EN;
+		else
+			data &= ~DIDT_CTRL_EN;
+		WREG32_DIDT(DIDT_TCP_CTRL0, data);
+	}
+}
+
+static int kv_enable_didt(struct radeon_device *rdev, bool enable)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	int ret;
+
+	if (pi->caps_sq_ramping ||
+	    pi->caps_db_ramping ||
+	    pi->caps_td_ramping ||
+	    pi->caps_tcp_ramping) {
+		cik_enter_rlc_safe_mode(rdev);
+
+		if (enable) {
+			ret = kv_program_pt_config_registers(rdev, didt_config_kv);
+			if (ret) {
+				cik_exit_rlc_safe_mode(rdev);
+				return ret;
+			}
+		}
+
+		kv_do_enable_didt(rdev, enable);
+
+		cik_exit_rlc_safe_mode(rdev);
+	}
+
+	return 0;
+}
+
+#if 0
+static void kv_initialize_hardware_cac_manager(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+
+	if (pi->caps_cac) {
+		WREG32_SMC(LCAC_SX0_OVR_SEL, 0);
+		WREG32_SMC(LCAC_SX0_OVR_VAL, 0);
+		kv_program_local_cac_table(rdev, sx_local_cac_cfg_kv, sx0_cac_config_reg);
+
+		WREG32_SMC(LCAC_MC0_OVR_SEL, 0);
+		WREG32_SMC(LCAC_MC0_OVR_VAL, 0);
+		kv_program_local_cac_table(rdev, mc0_local_cac_cfg_kv, mc0_cac_config_reg);
+
+		WREG32_SMC(LCAC_MC1_OVR_SEL, 0);
+		WREG32_SMC(LCAC_MC1_OVR_VAL, 0);
+		kv_program_local_cac_table(rdev, mc1_local_cac_cfg_kv, mc1_cac_config_reg);
+
+		WREG32_SMC(LCAC_MC2_OVR_SEL, 0);
+		WREG32_SMC(LCAC_MC2_OVR_VAL, 0);
+		kv_program_local_cac_table(rdev, mc2_local_cac_cfg_kv, mc2_cac_config_reg);
+
+		WREG32_SMC(LCAC_MC3_OVR_SEL, 0);
+		WREG32_SMC(LCAC_MC3_OVR_VAL, 0);
+		kv_program_local_cac_table(rdev, mc3_local_cac_cfg_kv, mc3_cac_config_reg);
+
+		WREG32_SMC(LCAC_CPL_OVR_SEL, 0);
+		WREG32_SMC(LCAC_CPL_OVR_VAL, 0);
+		kv_program_local_cac_table(rdev, cpl_local_cac_cfg_kv, cpl_cac_config_reg);
+	}
+}
+#endif
+
+static int kv_enable_smc_cac(struct radeon_device *rdev, bool enable)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	int ret = 0;
+
+	if (pi->caps_cac) {
+		if (enable) {
+			ret = kv_notify_message_to_smu(rdev, PPSMC_MSG_EnableCac);
+			if (ret)
+				pi->cac_enabled = false;
+			else
+				pi->cac_enabled = true;
+		} else if (pi->cac_enabled) {
+			kv_notify_message_to_smu(rdev, PPSMC_MSG_DisableCac);
+			pi->cac_enabled = false;
+		}
+	}
+
+	return ret;
+}
+
+static int kv_process_firmware_header(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	u32 tmp;
+	int ret;
+
+	ret = kv_read_smc_sram_dword(rdev, SMU7_FIRMWARE_HEADER_LOCATION +
+				     offsetof(SMU7_Firmware_Header, DpmTable),
+				     &tmp, pi->sram_end);
+
+	if (ret == 0)
+		pi->dpm_table_start = tmp;
+
+	ret = kv_read_smc_sram_dword(rdev, SMU7_FIRMWARE_HEADER_LOCATION +
+				     offsetof(SMU7_Firmware_Header, SoftRegisters),
+				     &tmp, pi->sram_end);
+
+	if (ret == 0)
+		pi->soft_regs_start = tmp;
+
+	return ret;
+}
+
+static int kv_enable_dpm_voltage_scaling(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	int ret;
+
+	pi->graphics_voltage_change_enable = 1;
+
+	ret = kv_copy_bytes_to_smc(rdev,
+				   pi->dpm_table_start +
+				   offsetof(SMU7_Fusion_DpmTable, GraphicsVoltageChangeEnable),
+				   &pi->graphics_voltage_change_enable,
+				   sizeof(u8), pi->sram_end);
+
+	return ret;
+}
+
+static int kv_set_dpm_interval(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	int ret;
+
+	pi->graphics_interval = 1;
+
+	ret = kv_copy_bytes_to_smc(rdev,
+				   pi->dpm_table_start +
+				   offsetof(SMU7_Fusion_DpmTable, GraphicsInterval),
+				   &pi->graphics_interval,
+				   sizeof(u8), pi->sram_end);
+
+	return ret;
+}
+
+static int kv_set_dpm_boot_state(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	int ret;
+
+	ret = kv_copy_bytes_to_smc(rdev,
+				   pi->dpm_table_start +
+				   offsetof(SMU7_Fusion_DpmTable, GraphicsBootLevel),
+				   &pi->graphics_boot_level,
+				   sizeof(u8), pi->sram_end);
+
+	return ret;
+}
+
+static void kv_program_vc(struct radeon_device *rdev)
+{
+	WREG32_SMC(CG_FTV_0, 0x3FFFC000);
+}
+
+static void kv_clear_vc(struct radeon_device *rdev)
+{
+	WREG32_SMC(CG_FTV_0, 0);
+}
+
+static int kv_set_divider_value(struct radeon_device *rdev,
+				u32 index, u32 sclk)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	struct atom_clock_dividers dividers;
+	int ret;
+
+	ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
+					     sclk, false, &dividers);
+	if (ret)
+		return ret;
+
+	pi->graphics_level[index].SclkDid = (u8)dividers.post_div;
+	pi->graphics_level[index].SclkFrequency = cpu_to_be32(sclk);
+
+	return 0;
+}
+
+static u16 kv_convert_8bit_index_to_voltage(struct radeon_device *rdev,
+					    u16 voltage)
+{
+	return 6200 - (voltage * 25);
+}
+
+static u16 kv_convert_2bit_index_to_voltage(struct radeon_device *rdev,
+					    u32 vid_2bit)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	u32 vid_8bit = sumo_convert_vid2_to_vid7(rdev,
+						 &pi->sys_info.vid_mapping_table,
+						 vid_2bit);
+
+	return kv_convert_8bit_index_to_voltage(rdev, (u16)vid_8bit);
+}
+
+
+static int kv_set_vid(struct radeon_device *rdev, u32 index, u32 vid)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+
+	pi->graphics_level[index].VoltageDownH = (u8)pi->voltage_drop_t;
+	pi->graphics_level[index].MinVddNb =
+		cpu_to_be32(kv_convert_2bit_index_to_voltage(rdev, vid));
+
+	return 0;
+}
+
+static int kv_set_at(struct radeon_device *rdev, u32 index, u32 at)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+
+	pi->graphics_level[index].AT = cpu_to_be16((u16)at);
+
+	return 0;
+}
+
+static void kv_dpm_power_level_enable(struct radeon_device *rdev,
+				      u32 index, bool enable)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+
+	pi->graphics_level[index].EnabledForActivity = enable ? 1 : 0;
+}
+
+static void kv_start_dpm(struct radeon_device *rdev)
+{
+	u32 tmp = RREG32_SMC(GENERAL_PWRMGT);
+
+	tmp |= GLOBAL_PWRMGT_EN;
+	WREG32_SMC(GENERAL_PWRMGT, tmp);
+
+	kv_smc_dpm_enable(rdev, true);
+}
+
+static void kv_stop_dpm(struct radeon_device *rdev)
+{
+	kv_smc_dpm_enable(rdev, false);
+}
+
+static void kv_start_am(struct radeon_device *rdev)
+{
+	u32 sclk_pwrmgt_cntl = RREG32_SMC(SCLK_PWRMGT_CNTL);
+
+	sclk_pwrmgt_cntl &= ~(RESET_SCLK_CNT | RESET_BUSY_CNT);
+	sclk_pwrmgt_cntl |= DYNAMIC_PM_EN;
+
+	WREG32_SMC(SCLK_PWRMGT_CNTL, sclk_pwrmgt_cntl);
+}
+
+static void kv_reset_am(struct radeon_device *rdev)
+{
+	u32 sclk_pwrmgt_cntl = RREG32_SMC(SCLK_PWRMGT_CNTL);
+
+	sclk_pwrmgt_cntl |= (RESET_SCLK_CNT | RESET_BUSY_CNT);
+
+	WREG32_SMC(SCLK_PWRMGT_CNTL, sclk_pwrmgt_cntl);
+}
+
+static int kv_freeze_sclk_dpm(struct radeon_device *rdev, bool freeze)
+{
+	return kv_notify_message_to_smu(rdev, freeze ?
+					PPSMC_MSG_SCLKDPM_FreezeLevel : PPSMC_MSG_SCLKDPM_UnfreezeLevel);
+}
+
+static int kv_force_lowest_valid(struct radeon_device *rdev)
+{
+	return kv_force_dpm_lowest(rdev);
+}
+
+static int kv_unforce_levels(struct radeon_device *rdev)
+{
+	return kv_notify_message_to_smu(rdev, PPSMC_MSG_NoForcedLevel);
+}
+
+static int kv_update_sclk_t(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	u32 low_sclk_interrupt_t = 0;
+	int ret = 0;
+
+	if (pi->caps_sclk_throttle_low_notification) {
+		low_sclk_interrupt_t = cpu_to_be32(pi->low_sclk_interrupt_t);
+
+		ret = kv_copy_bytes_to_smc(rdev,
+					   pi->dpm_table_start +
+					   offsetof(SMU7_Fusion_DpmTable, LowSclkInterruptT),
+					   (u8 *)&low_sclk_interrupt_t,
+					   sizeof(u32), pi->sram_end);
+	}
+	return ret;
+}
+
+static int kv_program_bootup_state(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	u32 i;
+	struct radeon_clock_voltage_dependency_table *table =
+		&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk;
+
+	if (table && table->count) {
+		for (i = pi->graphics_dpm_level_count - 1; i >= 0; i--) {
+			if ((table->entries[i].clk == pi->boot_pl.sclk) ||
+			    (i == 0))
+				break;
+		}
+
+		pi->graphics_boot_level = (u8)i;
+		kv_dpm_power_level_enable(rdev, i, true);
+	} else {
+		struct sumo_sclk_voltage_mapping_table *table =
+			&pi->sys_info.sclk_voltage_mapping_table;
+
+		if (table->num_max_dpm_entries == 0)
+			return -EINVAL;
+
+		for (i = pi->graphics_dpm_level_count - 1; i >= 0; i--) {
+			if ((table->entries[i].sclk_frequency == pi->boot_pl.sclk) ||
+			    (i == 0))
+				break;
+		}
+
+		pi->graphics_boot_level = (u8)i;
+		kv_dpm_power_level_enable(rdev, i, true);
+	}
+	return 0;
+}
+
+static int kv_enable_auto_thermal_throttling(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	int ret;
+
+	pi->graphics_therm_throttle_enable = 1;
+
+	ret = kv_copy_bytes_to_smc(rdev,
+				   pi->dpm_table_start +
+				   offsetof(SMU7_Fusion_DpmTable, GraphicsThermThrottleEnable),
+				   &pi->graphics_therm_throttle_enable,
+				   sizeof(u8), pi->sram_end);
+
+	return ret;
+}
+
+static int kv_upload_dpm_settings(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	int ret;
+
+	ret = kv_copy_bytes_to_smc(rdev,
+				   pi->dpm_table_start +
+				   offsetof(SMU7_Fusion_DpmTable, GraphicsLevel),
+				   (u8 *)&pi->graphics_level,
+				   sizeof(SMU7_Fusion_GraphicsLevel) * SMU7_MAX_LEVELS_GRAPHICS,
+				   pi->sram_end);
+
+	if (ret)
+		return ret;
+
+	ret = kv_copy_bytes_to_smc(rdev,
+				   pi->dpm_table_start +
+				   offsetof(SMU7_Fusion_DpmTable, GraphicsDpmLevelCount),
+				   &pi->graphics_dpm_level_count,
+				   sizeof(u8), pi->sram_end);
+
+	return ret;
+}
+
+static u32 kv_get_clock_difference(u32 a, u32 b)
+{
+	return (a >= b) ? a - b : b - a;
+}
+
+static u32 kv_get_clk_bypass(struct radeon_device *rdev, u32 clk)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	u32 value;
+
+	if (pi->caps_enable_dfs_bypass) {
+		if (kv_get_clock_difference(clk, 40000) < 200)
+			value = 3;
+		else if (kv_get_clock_difference(clk, 30000) < 200)
+			value = 2;
+		else if (kv_get_clock_difference(clk, 20000) < 200)
+			value = 7;
+		else if (kv_get_clock_difference(clk, 15000) < 200)
+			value = 6;
+		else if (kv_get_clock_difference(clk, 10000) < 200)
+			value = 8;
+		else
+			value = 0;
+	} else {
+		value = 0;
+	}
+
+	return value;
+}
+
+static int kv_populate_uvd_table(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	struct radeon_uvd_clock_voltage_dependency_table *table =
+		&rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table;
+	struct atom_clock_dividers dividers;
+	int ret;
+	u32 i;
+
+	if (table == NULL || table->count == 0)
+		return 0;
+
+	pi->uvd_level_count = 0;
+	for (i = 0; i < table->count; i++) {
+		if (pi->high_voltage_t &&
+		    (pi->high_voltage_t < table->entries[i].v))
+			break;
+
+		pi->uvd_level[i].VclkFrequency = cpu_to_be32(table->entries[i].vclk);
+		pi->uvd_level[i].DclkFrequency = cpu_to_be32(table->entries[i].dclk);
+		pi->uvd_level[i].MinVddNb = cpu_to_be16(table->entries[i].v);
+
+		pi->uvd_level[i].VClkBypassCntl =
+			(u8)kv_get_clk_bypass(rdev, table->entries[i].vclk);
+		pi->uvd_level[i].DClkBypassCntl =
+			(u8)kv_get_clk_bypass(rdev, table->entries[i].dclk);
+
+		ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
+						     table->entries[i].vclk, false, &dividers);
+		if (ret)
+			return ret;
+		pi->uvd_level[i].VclkDivider = (u8)dividers.post_div;
+
+		ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
+						     table->entries[i].dclk, false, &dividers);
+		if (ret)
+			return ret;
+		pi->uvd_level[i].DclkDivider = (u8)dividers.post_div;
+
+		pi->uvd_level_count++;
+	}
+
+	ret = kv_copy_bytes_to_smc(rdev,
+				   pi->dpm_table_start +
+				   offsetof(SMU7_Fusion_DpmTable, UvdLevelCount),
+				   (u8 *)&pi->uvd_level_count,
+				   sizeof(u8), pi->sram_end);
+	if (ret)
+		return ret;
+
+	pi->uvd_interval = 1;
+
+	ret = kv_copy_bytes_to_smc(rdev,
+				   pi->dpm_table_start +
+				   offsetof(SMU7_Fusion_DpmTable, UVDInterval),
+				   &pi->uvd_interval,
+				   sizeof(u8), pi->sram_end);
+	if (ret)
+		return ret;
+
+	ret = kv_copy_bytes_to_smc(rdev,
+				   pi->dpm_table_start +
+				   offsetof(SMU7_Fusion_DpmTable, UvdLevel),
+				   (u8 *)&pi->uvd_level,
+				   sizeof(SMU7_Fusion_UvdLevel) * SMU7_MAX_LEVELS_UVD,
+				   pi->sram_end);
+
+	return ret;
+
+}
+
+static int kv_populate_vce_table(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	int ret;
+	u32 i;
+	struct radeon_vce_clock_voltage_dependency_table *table =
+		&rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table;
+	struct atom_clock_dividers dividers;
+
+	if (table == NULL || table->count == 0)
+		return 0;
+
+	pi->vce_level_count = 0;
+	for (i = 0; i < table->count; i++) {
+		if (pi->high_voltage_t &&
+		    pi->high_voltage_t < table->entries[i].v)
+			break;
+
+		pi->vce_level[i].Frequency = cpu_to_be32(table->entries[i].evclk);
+		pi->vce_level[i].MinVoltage = cpu_to_be16(table->entries[i].v);
+
+		pi->vce_level[i].ClkBypassCntl =
+			(u8)kv_get_clk_bypass(rdev, table->entries[i].evclk);
+
+		ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
+						     table->entries[i].evclk, false, &dividers);
+		if (ret)
+			return ret;
+		pi->vce_level[i].Divider = (u8)dividers.post_div;
+
+		pi->vce_level_count++;
+	}
+
+	ret = kv_copy_bytes_to_smc(rdev,
+				   pi->dpm_table_start +
+				   offsetof(SMU7_Fusion_DpmTable, VceLevelCount),
+				   (u8 *)&pi->vce_level_count,
+				   sizeof(u8),
+				   pi->sram_end);
+	if (ret)
+		return ret;
+
+	pi->vce_interval = 1;
+
+	ret = kv_copy_bytes_to_smc(rdev,
+				   pi->dpm_table_start +
+				   offsetof(SMU7_Fusion_DpmTable, VCEInterval),
+				   (u8 *)&pi->vce_interval,
+				   sizeof(u8),
+				   pi->sram_end);
+	if (ret)
+		return ret;
+
+	ret = kv_copy_bytes_to_smc(rdev,
+				   pi->dpm_table_start +
+				   offsetof(SMU7_Fusion_DpmTable, VceLevel),
+				   (u8 *)&pi->vce_level,
+				   sizeof(SMU7_Fusion_ExtClkLevel) * SMU7_MAX_LEVELS_VCE,
+				   pi->sram_end);
+
+	return ret;
+}
+
+static int kv_populate_samu_table(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	struct radeon_clock_voltage_dependency_table *table =
+		&rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table;
+	struct atom_clock_dividers dividers;
+	int ret;
+	u32 i;
+
+	if (table == NULL || table->count == 0)
+		return 0;
+
+	pi->samu_level_count = 0;
+	for (i = 0; i < table->count; i++) {
+		if (pi->high_voltage_t &&
+		    pi->high_voltage_t < table->entries[i].v)
+			break;
+
+		pi->samu_level[i].Frequency = cpu_to_be32(table->entries[i].clk);
+		pi->samu_level[i].MinVoltage = cpu_to_be16(table->entries[i].v);
+
+		pi->samu_level[i].ClkBypassCntl =
+			(u8)kv_get_clk_bypass(rdev, table->entries[i].clk);
+
+		ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
+						     table->entries[i].clk, false, &dividers);
+		if (ret)
+			return ret;
+		pi->samu_level[i].Divider = (u8)dividers.post_div;
+
+		pi->samu_level_count++;
+	}
+
+	ret = kv_copy_bytes_to_smc(rdev,
+				   pi->dpm_table_start +
+				   offsetof(SMU7_Fusion_DpmTable, SamuLevelCount),
+				   (u8 *)&pi->samu_level_count,
+				   sizeof(u8),
+				   pi->sram_end);
+	if (ret)
+		return ret;
+
+	pi->samu_interval = 1;
+
+	ret = kv_copy_bytes_to_smc(rdev,
+				   pi->dpm_table_start +
+				   offsetof(SMU7_Fusion_DpmTable, SAMUInterval),
+				   (u8 *)&pi->samu_interval,
+				   sizeof(u8),
+				   pi->sram_end);
+	if (ret)
+		return ret;
+
+	ret = kv_copy_bytes_to_smc(rdev,
+				   pi->dpm_table_start +
+				   offsetof(SMU7_Fusion_DpmTable, SamuLevel),
+				   (u8 *)&pi->samu_level,
+				   sizeof(SMU7_Fusion_ExtClkLevel) * SMU7_MAX_LEVELS_SAMU,
+				   pi->sram_end);
+	if (ret)
+		return ret;
+
+	return ret;
+}
+
+
+static int kv_populate_acp_table(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	struct radeon_clock_voltage_dependency_table *table =
+		&rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table;
+	struct atom_clock_dividers dividers;
+	int ret;
+	u32 i;
+
+	if (table == NULL || table->count == 0)
+		return 0;
+
+	pi->acp_level_count = 0;
+	for (i = 0; i < table->count; i++) {
+		pi->acp_level[i].Frequency = cpu_to_be32(table->entries[i].clk);
+		pi->acp_level[i].MinVoltage = cpu_to_be16(table->entries[i].v);
+
+		ret = radeon_atom_get_clock_dividers(rdev, COMPUTE_ENGINE_PLL_PARAM,
+						     table->entries[i].clk, false, &dividers);
+		if (ret)
+			return ret;
+		pi->acp_level[i].Divider = (u8)dividers.post_div;
+
+		pi->acp_level_count++;
+	}
+
+	ret = kv_copy_bytes_to_smc(rdev,
+				   pi->dpm_table_start +
+				   offsetof(SMU7_Fusion_DpmTable, AcpLevelCount),
+				   (u8 *)&pi->acp_level_count,
+				   sizeof(u8),
+				   pi->sram_end);
+	if (ret)
+		return ret;
+
+	pi->acp_interval = 1;
+
+	ret = kv_copy_bytes_to_smc(rdev,
+				   pi->dpm_table_start +
+				   offsetof(SMU7_Fusion_DpmTable, ACPInterval),
+				   (u8 *)&pi->acp_interval,
+				   sizeof(u8),
+				   pi->sram_end);
+	if (ret)
+		return ret;
+
+	ret = kv_copy_bytes_to_smc(rdev,
+				   pi->dpm_table_start +
+				   offsetof(SMU7_Fusion_DpmTable, AcpLevel),
+				   (u8 *)&pi->acp_level,
+				   sizeof(SMU7_Fusion_ExtClkLevel) * SMU7_MAX_LEVELS_ACP,
+				   pi->sram_end);
+	if (ret)
+		return ret;
+
+	return ret;
+}
+
+static void kv_calculate_dfs_bypass_settings(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	u32 i;
+	struct radeon_clock_voltage_dependency_table *table =
+		&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk;
+
+	if (table && table->count) {
+		for (i = 0; i < pi->graphics_dpm_level_count; i++) {
+			if (pi->caps_enable_dfs_bypass) {
+				if (kv_get_clock_difference(table->entries[i].clk, 40000) < 200)
+					pi->graphics_level[i].ClkBypassCntl = 3;
+				else if (kv_get_clock_difference(table->entries[i].clk, 30000) < 200)
+					pi->graphics_level[i].ClkBypassCntl = 2;
+				else if (kv_get_clock_difference(table->entries[i].clk, 26600) < 200)
+					pi->graphics_level[i].ClkBypassCntl = 7;
+				else if (kv_get_clock_difference(table->entries[i].clk , 20000) < 200)
+					pi->graphics_level[i].ClkBypassCntl = 6;
+				else if (kv_get_clock_difference(table->entries[i].clk , 10000) < 200)
+					pi->graphics_level[i].ClkBypassCntl = 8;
+				else
+					pi->graphics_level[i].ClkBypassCntl = 0;
+			} else {
+				pi->graphics_level[i].ClkBypassCntl = 0;
+			}
+		}
+	} else {
+		struct sumo_sclk_voltage_mapping_table *table =
+			&pi->sys_info.sclk_voltage_mapping_table;
+		for (i = 0; i < pi->graphics_dpm_level_count; i++) {
+			if (pi->caps_enable_dfs_bypass) {
+				if (kv_get_clock_difference(table->entries[i].sclk_frequency, 40000) < 200)
+					pi->graphics_level[i].ClkBypassCntl = 3;
+				else if (kv_get_clock_difference(table->entries[i].sclk_frequency, 30000) < 200)
+					pi->graphics_level[i].ClkBypassCntl = 2;
+				else if (kv_get_clock_difference(table->entries[i].sclk_frequency, 26600) < 200)
+					pi->graphics_level[i].ClkBypassCntl = 7;
+				else if (kv_get_clock_difference(table->entries[i].sclk_frequency, 20000) < 200)
+					pi->graphics_level[i].ClkBypassCntl = 6;
+				else if (kv_get_clock_difference(table->entries[i].sclk_frequency, 10000) < 200)
+					pi->graphics_level[i].ClkBypassCntl = 8;
+				else
+					pi->graphics_level[i].ClkBypassCntl = 0;
+			} else {
+				pi->graphics_level[i].ClkBypassCntl = 0;
+			}
+		}
+	}
+}
+
+static int kv_enable_ulv(struct radeon_device *rdev, bool enable)
+{
+	return kv_notify_message_to_smu(rdev, enable ?
+					PPSMC_MSG_EnableULV : PPSMC_MSG_DisableULV);
+}
+
+static void kv_update_current_ps(struct radeon_device *rdev,
+				 struct radeon_ps *rps)
+{
+	struct kv_ps *new_ps = kv_get_ps(rps);
+	struct kv_power_info *pi = kv_get_pi(rdev);
+
+	pi->current_rps = *rps;
+	pi->current_ps = *new_ps;
+	pi->current_rps.ps_priv = &pi->current_ps;
+}
+
+static void kv_update_requested_ps(struct radeon_device *rdev,
+				   struct radeon_ps *rps)
+{
+	struct kv_ps *new_ps = kv_get_ps(rps);
+	struct kv_power_info *pi = kv_get_pi(rdev);
+
+	pi->requested_rps = *rps;
+	pi->requested_ps = *new_ps;
+	pi->requested_rps.ps_priv = &pi->requested_ps;
+}
+
+int kv_dpm_enable(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	int ret;
+
+	cik_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
+			     RADEON_CG_BLOCK_SDMA |
+			     RADEON_CG_BLOCK_BIF |
+			     RADEON_CG_BLOCK_HDP), false);
+
+	ret = kv_process_firmware_header(rdev);
+	if (ret) {
+		DRM_ERROR("kv_process_firmware_header failed\n");
+		return ret;
+	}
+	kv_init_fps_limits(rdev);
+	kv_init_graphics_levels(rdev);
+	ret = kv_program_bootup_state(rdev);
+	if (ret) {
+		DRM_ERROR("kv_program_bootup_state failed\n");
+		return ret;
+	}
+	kv_calculate_dfs_bypass_settings(rdev);
+	ret = kv_upload_dpm_settings(rdev);
+	if (ret) {
+		DRM_ERROR("kv_upload_dpm_settings failed\n");
+		return ret;
+	}
+	ret = kv_populate_uvd_table(rdev);
+	if (ret) {
+		DRM_ERROR("kv_populate_uvd_table failed\n");
+		return ret;
+	}
+	ret = kv_populate_vce_table(rdev);
+	if (ret) {
+		DRM_ERROR("kv_populate_vce_table failed\n");
+		return ret;
+	}
+	ret = kv_populate_samu_table(rdev);
+	if (ret) {
+		DRM_ERROR("kv_populate_samu_table failed\n");
+		return ret;
+	}
+	ret = kv_populate_acp_table(rdev);
+	if (ret) {
+		DRM_ERROR("kv_populate_acp_table failed\n");
+		return ret;
+	}
+	kv_program_vc(rdev);
+#if 0
+	kv_initialize_hardware_cac_manager(rdev);
+#endif
+	kv_start_am(rdev);
+	if (pi->enable_auto_thermal_throttling) {
+		ret = kv_enable_auto_thermal_throttling(rdev);
+		if (ret) {
+			DRM_ERROR("kv_enable_auto_thermal_throttling failed\n");
+			return ret;
+		}
+	}
+	ret = kv_enable_dpm_voltage_scaling(rdev);
+	if (ret) {
+		DRM_ERROR("kv_enable_dpm_voltage_scaling failed\n");
+		return ret;
+	}
+	ret = kv_set_dpm_interval(rdev);
+	if (ret) {
+		DRM_ERROR("kv_set_dpm_interval failed\n");
+		return ret;
+	}
+	ret = kv_set_dpm_boot_state(rdev);
+	if (ret) {
+		DRM_ERROR("kv_set_dpm_boot_state failed\n");
+		return ret;
+	}
+	ret = kv_enable_ulv(rdev, true);
+	if (ret) {
+		DRM_ERROR("kv_enable_ulv failed\n");
+		return ret;
+	}
+	kv_start_dpm(rdev);
+	ret = kv_enable_didt(rdev, true);
+	if (ret) {
+		DRM_ERROR("kv_enable_didt failed\n");
+		return ret;
+	}
+	ret = kv_enable_smc_cac(rdev, true);
+	if (ret) {
+		DRM_ERROR("kv_enable_smc_cac failed\n");
+		return ret;
+	}
+
+	if (rdev->irq.installed &&
+	    r600_is_internal_thermal_sensor(rdev->pm.int_thermal_type)) {
+		ret = kv_set_thermal_temperature_range(rdev, R600_TEMP_RANGE_MIN, R600_TEMP_RANGE_MAX);
+		if (ret) {
+			DRM_ERROR("kv_set_thermal_temperature_range failed\n");
+			return ret;
+		}
+		rdev->irq.dpm_thermal = true;
+		radeon_irq_set(rdev);
+	}
+
+	/* powerdown unused blocks for now */
+	kv_dpm_powergate_acp(rdev, true);
+	kv_dpm_powergate_samu(rdev, true);
+	kv_dpm_powergate_vce(rdev, true);
+	kv_dpm_powergate_uvd(rdev, true);
+
+	cik_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
+			     RADEON_CG_BLOCK_SDMA |
+			     RADEON_CG_BLOCK_BIF |
+			     RADEON_CG_BLOCK_HDP), true);
+
+	kv_update_current_ps(rdev, rdev->pm.dpm.boot_ps);
+
+	return ret;
+}
+
+void kv_dpm_disable(struct radeon_device *rdev)
+{
+	cik_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
+			     RADEON_CG_BLOCK_SDMA |
+			     RADEON_CG_BLOCK_BIF |
+			     RADEON_CG_BLOCK_HDP), false);
+
+	/* powerup blocks */
+	kv_dpm_powergate_acp(rdev, false);
+	kv_dpm_powergate_samu(rdev, false);
+	kv_dpm_powergate_vce(rdev, false);
+	kv_dpm_powergate_uvd(rdev, false);
+
+	kv_enable_smc_cac(rdev, false);
+	kv_enable_didt(rdev, false);
+	kv_clear_vc(rdev);
+	kv_stop_dpm(rdev);
+	kv_enable_ulv(rdev, false);
+	kv_reset_am(rdev);
+
+	kv_update_current_ps(rdev, rdev->pm.dpm.boot_ps);
+}
+
+#if 0
+static int kv_write_smc_soft_register(struct radeon_device *rdev,
+				      u16 reg_offset, u32 value)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+
+	return kv_copy_bytes_to_smc(rdev, pi->soft_regs_start + reg_offset,
+				    (u8 *)&value, sizeof(u16), pi->sram_end);
+}
+
+static int kv_read_smc_soft_register(struct radeon_device *rdev,
+				     u16 reg_offset, u32 *value)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+
+	return kv_read_smc_sram_dword(rdev, pi->soft_regs_start + reg_offset,
+				      value, pi->sram_end);
+}
+#endif
+
+static void kv_init_sclk_t(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+
+	pi->low_sclk_interrupt_t = 0;
+}
+
+static int kv_init_fps_limits(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	int ret = 0;
+
+	if (pi->caps_fps) {
+		u16 tmp;
+
+		tmp = 45;
+		pi->fps_high_t = cpu_to_be16(tmp);
+		ret = kv_copy_bytes_to_smc(rdev,
+					   pi->dpm_table_start +
+					   offsetof(SMU7_Fusion_DpmTable, FpsHighT),
+					   (u8 *)&pi->fps_high_t,
+					   sizeof(u16), pi->sram_end);
+
+		tmp = 30;
+		pi->fps_low_t = cpu_to_be16(tmp);
+
+		ret = kv_copy_bytes_to_smc(rdev,
+					   pi->dpm_table_start +
+					   offsetof(SMU7_Fusion_DpmTable, FpsLowT),
+					   (u8 *)&pi->fps_low_t,
+					   sizeof(u16), pi->sram_end);
+
+	}
+	return ret;
+}
+
+static void kv_init_powergate_state(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+
+	pi->uvd_power_gated = false;
+	pi->vce_power_gated = false;
+	pi->samu_power_gated = false;
+	pi->acp_power_gated = false;
+
+}
+
+static int kv_enable_uvd_dpm(struct radeon_device *rdev, bool enable)
+{
+	return kv_notify_message_to_smu(rdev, enable ?
+					PPSMC_MSG_UVDDPM_Enable : PPSMC_MSG_UVDDPM_Disable);
+}
+
+#if 0
+static int kv_enable_vce_dpm(struct radeon_device *rdev, bool enable)
+{
+	return kv_notify_message_to_smu(rdev, enable ?
+					PPSMC_MSG_VCEDPM_Enable : PPSMC_MSG_VCEDPM_Disable);
+}
+#endif
+
+static int kv_enable_samu_dpm(struct radeon_device *rdev, bool enable)
+{
+	return kv_notify_message_to_smu(rdev, enable ?
+					PPSMC_MSG_SAMUDPM_Enable : PPSMC_MSG_SAMUDPM_Disable);
+}
+
+static int kv_enable_acp_dpm(struct radeon_device *rdev, bool enable)
+{
+	return kv_notify_message_to_smu(rdev, enable ?
+					PPSMC_MSG_ACPDPM_Enable : PPSMC_MSG_ACPDPM_Disable);
+}
+
+static int kv_update_uvd_dpm(struct radeon_device *rdev, bool gate)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	struct radeon_uvd_clock_voltage_dependency_table *table =
+		&rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table;
+	int ret;
+
+	if (!gate) {
+		if (!pi->caps_uvd_dpm || table->count || pi->caps_stable_p_state)
+			pi->uvd_boot_level = table->count - 1;
+		else
+			pi->uvd_boot_level = 0;
+
+		ret = kv_copy_bytes_to_smc(rdev,
+					   pi->dpm_table_start +
+					   offsetof(SMU7_Fusion_DpmTable, UvdBootLevel),
+					   (uint8_t *)&pi->uvd_boot_level,
+					   sizeof(u8), pi->sram_end);
+		if (ret)
+			return ret;
+
+		if (!pi->caps_uvd_dpm ||
+		    pi->caps_stable_p_state)
+			kv_send_msg_to_smc_with_parameter(rdev,
+							  PPSMC_MSG_UVDDPM_SetEnabledMask,
+							  (1 << pi->uvd_boot_level));
+	}
+
+	return kv_enable_uvd_dpm(rdev, !gate);
+}
+
+#if 0
+static u8 kv_get_vce_boot_level(struct radeon_device *rdev)
+{
+	u8 i;
+	struct radeon_vce_clock_voltage_dependency_table *table =
+		&rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table;
+
+	for (i = 0; i < table->count; i++) {
+		if (table->entries[i].evclk >= 0) /* XXX */
+			break;
+	}
+
+	return i;
+}
+
+static int kv_update_vce_dpm(struct radeon_device *rdev,
+			     struct radeon_ps *radeon_new_state,
+			     struct radeon_ps *radeon_current_state)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	struct radeon_vce_clock_voltage_dependency_table *table =
+		&rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table;
+	int ret;
+
+	if (radeon_new_state->evclk > 0 && radeon_current_state->evclk == 0) {
+		if (pi->caps_stable_p_state)
+			pi->vce_boot_level = table->count - 1;
+		else
+			pi->vce_boot_level = kv_get_vce_boot_level(rdev);
+
+		ret = kv_copy_bytes_to_smc(rdev,
+					   pi->dpm_table_start +
+					   offsetof(SMU7_Fusion_DpmTable, VceBootLevel),
+					   (u8 *)&pi->vce_boot_level,
+					   sizeof(u8),
+					   pi->sram_end);
+		if (ret)
+			return ret;
+
+		if (pi->caps_stable_p_state)
+			kv_send_msg_to_smc_with_parameter(rdev,
+							  PPSMC_MSG_VCEDPM_SetEnabledMask,
+							  (1 << pi->vce_boot_level));
+
+		kv_enable_vce_dpm(rdev, true);
+	} else if (radeon_new_state->evclk == 0 && radeon_current_state->evclk > 0) {
+		kv_enable_vce_dpm(rdev, false);
+	}
+
+	return 0;
+}
+#endif
+
+static int kv_update_samu_dpm(struct radeon_device *rdev, bool gate)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	struct radeon_clock_voltage_dependency_table *table =
+		&rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table;
+	int ret;
+
+	if (!gate) {
+		if (pi->caps_stable_p_state)
+			pi->samu_boot_level = table->count - 1;
+		else
+			pi->samu_boot_level = 0;
+
+		ret = kv_copy_bytes_to_smc(rdev,
+					   pi->dpm_table_start +
+					   offsetof(SMU7_Fusion_DpmTable, SamuBootLevel),
+					   (u8 *)&pi->samu_boot_level,
+					   sizeof(u8),
+					   pi->sram_end);
+		if (ret)
+			return ret;
+
+		if (pi->caps_stable_p_state)
+			kv_send_msg_to_smc_with_parameter(rdev,
+							  PPSMC_MSG_SAMUDPM_SetEnabledMask,
+							  (1 << pi->samu_boot_level));
+	}
+
+	return kv_enable_samu_dpm(rdev, !gate);
+}
+
+static int kv_update_acp_dpm(struct radeon_device *rdev, bool gate)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	struct radeon_clock_voltage_dependency_table *table =
+		&rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table;
+	int ret;
+
+	if (!gate) {
+		if (pi->caps_stable_p_state)
+			pi->acp_boot_level = table->count - 1;
+		else
+			pi->acp_boot_level = 0;
+
+		ret = kv_copy_bytes_to_smc(rdev,
+					   pi->dpm_table_start +
+					   offsetof(SMU7_Fusion_DpmTable, AcpBootLevel),
+					   (u8 *)&pi->acp_boot_level,
+					   sizeof(u8),
+					   pi->sram_end);
+		if (ret)
+			return ret;
+
+		if (pi->caps_stable_p_state)
+			kv_send_msg_to_smc_with_parameter(rdev,
+							  PPSMC_MSG_ACPDPM_SetEnabledMask,
+							  (1 << pi->acp_boot_level));
+	}
+
+	return kv_enable_acp_dpm(rdev, !gate);
+}
+
+void kv_dpm_powergate_uvd(struct radeon_device *rdev, bool gate)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+
+	if (pi->uvd_power_gated == gate)
+		return;
+
+	pi->uvd_power_gated = gate;
+
+	if (gate) {
+		if (pi->caps_uvd_pg) {
+			uvd_v1_0_stop(rdev);
+			cik_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
+		}
+		kv_update_uvd_dpm(rdev, gate);
+		if (pi->caps_uvd_pg)
+			kv_notify_message_to_smu(rdev, PPSMC_MSG_UVDPowerOFF);
+	} else {
+		if (pi->caps_uvd_pg) {
+			kv_notify_message_to_smu(rdev, PPSMC_MSG_UVDPowerON);
+			uvd_v4_2_resume(rdev);
+			uvd_v1_0_start(rdev);
+			cik_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
+		}
+		kv_update_uvd_dpm(rdev, gate);
+	}
+}
+
+static void kv_dpm_powergate_vce(struct radeon_device *rdev, bool gate)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+
+	if (pi->vce_power_gated == gate)
+		return;
+
+	pi->vce_power_gated = gate;
+
+	if (gate) {
+		if (pi->caps_vce_pg)
+			kv_notify_message_to_smu(rdev, PPSMC_MSG_VCEPowerOFF);
+	} else {
+		if (pi->caps_vce_pg)
+			kv_notify_message_to_smu(rdev, PPSMC_MSG_VCEPowerON);
+	}
+}
+
+static void kv_dpm_powergate_samu(struct radeon_device *rdev, bool gate)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+
+	if (pi->samu_power_gated == gate)
+		return;
+
+	pi->samu_power_gated = gate;
+
+	if (gate) {
+		kv_update_samu_dpm(rdev, true);
+		if (pi->caps_samu_pg)
+			kv_notify_message_to_smu(rdev, PPSMC_MSG_SAMPowerOFF);
+	} else {
+		if (pi->caps_samu_pg)
+			kv_notify_message_to_smu(rdev, PPSMC_MSG_SAMPowerON);
+		kv_update_samu_dpm(rdev, false);
+	}
+}
+
+static void kv_dpm_powergate_acp(struct radeon_device *rdev, bool gate)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+
+	if (pi->acp_power_gated == gate)
+		return;
+
+	if (rdev->family == CHIP_KABINI)
+		return;
+
+	pi->acp_power_gated = gate;
+
+	if (gate) {
+		kv_update_acp_dpm(rdev, true);
+		if (pi->caps_acp_pg)
+			kv_notify_message_to_smu(rdev, PPSMC_MSG_ACPPowerOFF);
+	} else {
+		if (pi->caps_acp_pg)
+			kv_notify_message_to_smu(rdev, PPSMC_MSG_ACPPowerON);
+		kv_update_acp_dpm(rdev, false);
+	}
+}
+
+static void kv_set_valid_clock_range(struct radeon_device *rdev,
+				     struct radeon_ps *new_rps)
+{
+	struct kv_ps *new_ps = kv_get_ps(new_rps);
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	u32 i;
+	struct radeon_clock_voltage_dependency_table *table =
+		&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk;
+
+	if (table && table->count) {
+		for (i = 0; i < pi->graphics_dpm_level_count; i++) {
+			if ((table->entries[i].clk >= new_ps->levels[0].sclk) ||
+			    (i == (pi->graphics_dpm_level_count - 1))) {
+				pi->lowest_valid = i;
+				break;
+			}
+		}
+
+		for (i = pi->graphics_dpm_level_count - 1; i >= 0; i--) {
+			if ((table->entries[i].clk <= new_ps->levels[new_ps->num_levels -1].sclk) ||
+			    (i == 0)) {
+				pi->highest_valid = i;
+				break;
+			}
+		}
+
+		if (pi->lowest_valid > pi->highest_valid) {
+			if ((new_ps->levels[0].sclk - table->entries[pi->highest_valid].clk) >
+			    (table->entries[pi->lowest_valid].clk - new_ps->levels[new_ps->num_levels - 1].sclk))
+				pi->highest_valid = pi->lowest_valid;
+			else
+				pi->lowest_valid =  pi->highest_valid;
+		}
+	} else {
+		struct sumo_sclk_voltage_mapping_table *table =
+			&pi->sys_info.sclk_voltage_mapping_table;
+
+		for (i = 0; i < (int)pi->graphics_dpm_level_count; i++) {
+			if (table->entries[i].sclk_frequency >= new_ps->levels[0].sclk ||
+			    i == (int)(pi->graphics_dpm_level_count - 1)) {
+				pi->lowest_valid = i;
+				break;
+			}
+		}
+
+		for (i = pi->graphics_dpm_level_count - 1; i >= 0; i--) {
+			if (table->entries[i].sclk_frequency <=
+			    new_ps->levels[new_ps->num_levels - 1].sclk ||
+			    i == 0) {
+				pi->highest_valid = i;
+				break;
+			}
+		}
+
+		if (pi->lowest_valid > pi->highest_valid) {
+			if ((new_ps->levels[0].sclk -
+			     table->entries[pi->highest_valid].sclk_frequency) >
+			    (table->entries[pi->lowest_valid].sclk_frequency -
+			     new_ps->levels[new_ps->num_levels -1].sclk))
+				pi->highest_valid = pi->lowest_valid;
+			else
+				pi->lowest_valid =  pi->highest_valid;
+		}
+	}
+}
+
+static int kv_update_dfs_bypass_settings(struct radeon_device *rdev,
+					 struct radeon_ps *new_rps)
+{
+	struct kv_ps *new_ps = kv_get_ps(new_rps);
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	int ret = 0;
+	u8 clk_bypass_cntl;
+
+	if (pi->caps_enable_dfs_bypass) {
+		clk_bypass_cntl = new_ps->need_dfs_bypass ?
+			pi->graphics_level[pi->graphics_boot_level].ClkBypassCntl : 0;
+		ret = kv_copy_bytes_to_smc(rdev,
+					   (pi->dpm_table_start +
+					    offsetof(SMU7_Fusion_DpmTable, GraphicsLevel) +
+					    (pi->graphics_boot_level * sizeof(SMU7_Fusion_GraphicsLevel)) +
+					    offsetof(SMU7_Fusion_GraphicsLevel, ClkBypassCntl)),
+					   &clk_bypass_cntl,
+					   sizeof(u8), pi->sram_end);
+	}
+
+	return ret;
+}
+
+static int kv_enable_nb_dpm(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	int ret = 0;
+
+	if (pi->enable_nb_dpm && !pi->nb_dpm_enabled) {
+		ret = kv_notify_message_to_smu(rdev, PPSMC_MSG_NBDPM_Enable);
+		if (ret == 0)
+			pi->nb_dpm_enabled = true;
+	}
+
+	return ret;
+}
+
+int kv_dpm_force_performance_level(struct radeon_device *rdev,
+				   enum radeon_dpm_forced_level level)
+{
+	int ret;
+
+	if (level == RADEON_DPM_FORCED_LEVEL_HIGH) {
+		ret = kv_force_dpm_highest(rdev);
+		if (ret)
+			return ret;
+	} else if (level == RADEON_DPM_FORCED_LEVEL_LOW) {
+		ret = kv_force_dpm_lowest(rdev);
+		if (ret)
+			return ret;
+	} else if (level == RADEON_DPM_FORCED_LEVEL_AUTO) {
+		ret = kv_unforce_levels(rdev);
+		if (ret)
+			return ret;
+	}
+
+	rdev->pm.dpm.forced_level = level;
+
+	return 0;
+}
+
+int kv_dpm_pre_set_power_state(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	struct radeon_ps requested_ps = *rdev->pm.dpm.requested_ps;
+	struct radeon_ps *new_ps = &requested_ps;
+
+	kv_update_requested_ps(rdev, new_ps);
+
+	kv_apply_state_adjust_rules(rdev,
+				    &pi->requested_rps,
+				    &pi->current_rps);
+
+	return 0;
+}
+
+int kv_dpm_set_power_state(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	struct radeon_ps *new_ps = &pi->requested_rps;
+	/*struct radeon_ps *old_ps = &pi->current_rps;*/
+	int ret;
+
+	cik_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
+			     RADEON_CG_BLOCK_SDMA |
+			     RADEON_CG_BLOCK_BIF |
+			     RADEON_CG_BLOCK_HDP), false);
+
+	if (rdev->family == CHIP_KABINI) {
+		if (pi->enable_dpm) {
+			kv_set_valid_clock_range(rdev, new_ps);
+			kv_update_dfs_bypass_settings(rdev, new_ps);
+			ret = kv_calculate_ds_divider(rdev);
+			if (ret) {
+				DRM_ERROR("kv_calculate_ds_divider failed\n");
+				return ret;
+			}
+			kv_calculate_nbps_level_settings(rdev);
+			kv_calculate_dpm_settings(rdev);
+			kv_force_lowest_valid(rdev);
+			kv_enable_new_levels(rdev);
+			kv_upload_dpm_settings(rdev);
+			kv_program_nbps_index_settings(rdev, new_ps);
+			kv_unforce_levels(rdev);
+			kv_set_enabled_levels(rdev);
+			kv_force_lowest_valid(rdev);
+			kv_unforce_levels(rdev);
+#if 0
+			ret = kv_update_vce_dpm(rdev, new_ps, old_ps);
+			if (ret) {
+				DRM_ERROR("kv_update_vce_dpm failed\n");
+				return ret;
+			}
+#endif
+			kv_update_sclk_t(rdev);
+		}
+	} else {
+		if (pi->enable_dpm) {
+			kv_set_valid_clock_range(rdev, new_ps);
+			kv_update_dfs_bypass_settings(rdev, new_ps);
+			ret = kv_calculate_ds_divider(rdev);
+			if (ret) {
+				DRM_ERROR("kv_calculate_ds_divider failed\n");
+				return ret;
+			}
+			kv_calculate_nbps_level_settings(rdev);
+			kv_calculate_dpm_settings(rdev);
+			kv_freeze_sclk_dpm(rdev, true);
+			kv_upload_dpm_settings(rdev);
+			kv_program_nbps_index_settings(rdev, new_ps);
+			kv_freeze_sclk_dpm(rdev, false);
+			kv_set_enabled_levels(rdev);
+#if 0
+			ret = kv_update_vce_dpm(rdev, new_ps, old_ps);
+			if (ret) {
+				DRM_ERROR("kv_update_vce_dpm failed\n");
+				return ret;
+			}
+#endif
+			kv_update_sclk_t(rdev);
+			kv_enable_nb_dpm(rdev);
+		}
+	}
+
+	cik_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
+			     RADEON_CG_BLOCK_SDMA |
+			     RADEON_CG_BLOCK_BIF |
+			     RADEON_CG_BLOCK_HDP), true);
+
+	rdev->pm.dpm.forced_level = RADEON_DPM_FORCED_LEVEL_AUTO;
+	return 0;
+}
+
+void kv_dpm_post_set_power_state(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	struct radeon_ps *new_ps = &pi->requested_rps;
+
+	kv_update_current_ps(rdev, new_ps);
+}
+
+void kv_dpm_setup_asic(struct radeon_device *rdev)
+{
+	sumo_take_smu_control(rdev, true);
+	kv_init_powergate_state(rdev);
+	kv_init_sclk_t(rdev);
+}
+
+void kv_dpm_reset_asic(struct radeon_device *rdev)
+{
+	kv_force_lowest_valid(rdev);
+	kv_init_graphics_levels(rdev);
+	kv_program_bootup_state(rdev);
+	kv_upload_dpm_settings(rdev);
+	kv_force_lowest_valid(rdev);
+	kv_unforce_levels(rdev);
+}
+
+//XXX use sumo_dpm_display_configuration_changed
+
+static void kv_construct_max_power_limits_table(struct radeon_device *rdev,
+						struct radeon_clock_and_voltage_limits *table)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+
+	if (pi->sys_info.sclk_voltage_mapping_table.num_max_dpm_entries > 0) {
+		int idx = pi->sys_info.sclk_voltage_mapping_table.num_max_dpm_entries - 1;
+		table->sclk =
+			pi->sys_info.sclk_voltage_mapping_table.entries[idx].sclk_frequency;
+		table->vddc =
+			kv_convert_2bit_index_to_voltage(rdev,
+							 pi->sys_info.sclk_voltage_mapping_table.entries[idx].vid_2bit);
+	}
+
+	table->mclk = pi->sys_info.nbp_memory_clock[0];
+}
+
+static void kv_patch_voltage_values(struct radeon_device *rdev)
+{
+	int i;
+	struct radeon_uvd_clock_voltage_dependency_table *table =
+		&rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table;
+
+	if (table->count) {
+		for (i = 0; i < table->count; i++)
+			table->entries[i].v =
+				kv_convert_8bit_index_to_voltage(rdev,
+								 table->entries[i].v);
+	}
+
+}
+
+static void kv_construct_boot_state(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+
+	pi->boot_pl.sclk = pi->sys_info.bootup_sclk;
+	pi->boot_pl.vddc_index = pi->sys_info.bootup_nb_voltage_index;
+	pi->boot_pl.ds_divider_index = 0;
+	pi->boot_pl.ss_divider_index = 0;
+	pi->boot_pl.allow_gnb_slow = 1;
+	pi->boot_pl.force_nbp_state = 0;
+	pi->boot_pl.display_wm = 0;
+	pi->boot_pl.vce_wm = 0;
+}
+
+static int kv_force_dpm_highest(struct radeon_device *rdev)
+{
+	int ret;
+	u32 enable_mask, i;
+
+	ret = kv_dpm_get_enable_mask(rdev, &enable_mask);
+	if (ret)
+		return ret;
+
+	for (i = SMU7_MAX_LEVELS_GRAPHICS - 1; i >= 0; i--) {
+		if (enable_mask & (1 << i))
+			break;
+	}
+
+	return kv_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_DPM_ForceState, i);
+}
+
+static int kv_force_dpm_lowest(struct radeon_device *rdev)
+{
+	int ret;
+	u32 enable_mask, i;
+
+	ret = kv_dpm_get_enable_mask(rdev, &enable_mask);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < SMU7_MAX_LEVELS_GRAPHICS; i++) {
+		if (enable_mask & (1 << i))
+			break;
+	}
+
+	return kv_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_DPM_ForceState, i);
+}
+
+static u8 kv_get_sleep_divider_id_from_clock(struct radeon_device *rdev,
+					     u32 sclk, u32 min_sclk_in_sr)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	u32 i;
+	u32 temp;
+	u32 min = (min_sclk_in_sr > KV_MINIMUM_ENGINE_CLOCK) ?
+		min_sclk_in_sr : KV_MINIMUM_ENGINE_CLOCK;
+
+	if (sclk < min)
+		return 0;
+
+	if (!pi->caps_sclk_ds)
+		return 0;
+
+	for (i = KV_MAX_DEEPSLEEP_DIVIDER_ID; i <= 0; i--) {
+		temp = sclk / sumo_get_sleep_divider_from_id(i);
+		if ((temp >= min) || (i == 0))
+			break;
+	}
+
+	return (u8)i;
+}
+
+static int kv_get_high_voltage_limit(struct radeon_device *rdev, int *limit)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	struct radeon_clock_voltage_dependency_table *table =
+		&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk;
+	int i;
+
+	if (table && table->count) {
+		for (i = table->count - 1; i >= 0; i--) {
+			if (pi->high_voltage_t &&
+			    (kv_convert_8bit_index_to_voltage(rdev, table->entries[i].v) <=
+			     pi->high_voltage_t)) {
+				*limit = i;
+				return 0;
+			}
+		}
+	} else {
+		struct sumo_sclk_voltage_mapping_table *table =
+			&pi->sys_info.sclk_voltage_mapping_table;
+
+		for (i = table->num_max_dpm_entries - 1; i >= 0; i--) {
+			if (pi->high_voltage_t &&
+			    (kv_convert_2bit_index_to_voltage(rdev, table->entries[i].vid_2bit) <=
+			     pi->high_voltage_t)) {
+				*limit = i;
+				return 0;
+			}
+		}
+	}
+
+	*limit = 0;
+	return 0;
+}
+
+static void kv_apply_state_adjust_rules(struct radeon_device *rdev,
+					struct radeon_ps *new_rps,
+					struct radeon_ps *old_rps)
+{
+	struct kv_ps *ps = kv_get_ps(new_rps);
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	u32 min_sclk = 10000; /* ??? */
+	u32 sclk, mclk = 0;
+	int i, limit;
+	bool force_high;
+	struct radeon_clock_voltage_dependency_table *table =
+		&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk;
+	u32 stable_p_state_sclk = 0;
+	struct radeon_clock_and_voltage_limits *max_limits =
+		&rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
+
+	mclk = max_limits->mclk;
+	sclk = min_sclk;
+
+	if (pi->caps_stable_p_state) {
+		stable_p_state_sclk = (max_limits->sclk * 75) / 100;
+
+		for (i = table->count - 1; i >= 0; i++) {
+			if (stable_p_state_sclk >= table->entries[i].clk) {
+				stable_p_state_sclk = table->entries[i].clk;
+				break;
+			}
+		}
+
+		if (i > 0)
+			stable_p_state_sclk = table->entries[0].clk;
+
+		sclk = stable_p_state_sclk;
+	}
+
+	ps->need_dfs_bypass = true;
+
+	for (i = 0; i < ps->num_levels; i++) {
+		if (ps->levels[i].sclk < sclk)
+			ps->levels[i].sclk = sclk;
+	}
+
+	if (table && table->count) {
+		for (i = 0; i < ps->num_levels; i++) {
+			if (pi->high_voltage_t &&
+			    (pi->high_voltage_t <
+			     kv_convert_8bit_index_to_voltage(rdev, ps->levels[i].vddc_index))) {
+				kv_get_high_voltage_limit(rdev, &limit);
+				ps->levels[i].sclk = table->entries[limit].clk;
+			}
+		}
+	} else {
+		struct sumo_sclk_voltage_mapping_table *table =
+			&pi->sys_info.sclk_voltage_mapping_table;
+
+		for (i = 0; i < ps->num_levels; i++) {
+			if (pi->high_voltage_t &&
+			    (pi->high_voltage_t <
+			     kv_convert_8bit_index_to_voltage(rdev, ps->levels[i].vddc_index))) {
+				kv_get_high_voltage_limit(rdev, &limit);
+				ps->levels[i].sclk = table->entries[limit].sclk_frequency;
+			}
+		}
+	}
+
+	if (pi->caps_stable_p_state) {
+		for (i = 0; i < ps->num_levels; i++) {
+			ps->levels[i].sclk = stable_p_state_sclk;
+		}
+	}
+
+	pi->video_start = new_rps->dclk || new_rps->vclk;
+
+	if ((new_rps->class & ATOM_PPLIB_CLASSIFICATION_UI_MASK) ==
+	    ATOM_PPLIB_CLASSIFICATION_UI_BATTERY)
+		pi->battery_state = true;
+	else
+		pi->battery_state = false;
+
+	if (rdev->family == CHIP_KABINI) {
+		ps->dpm0_pg_nb_ps_lo = 0x1;
+		ps->dpm0_pg_nb_ps_hi = 0x0;
+		ps->dpmx_nb_ps_lo = 0x1;
+		ps->dpmx_nb_ps_hi = 0x0;
+	} else {
+		ps->dpm0_pg_nb_ps_lo = 0x1;
+		ps->dpm0_pg_nb_ps_hi = 0x0;
+		ps->dpmx_nb_ps_lo = 0x2;
+		ps->dpmx_nb_ps_hi = 0x1;
+
+		if (pi->sys_info.nb_dpm_enable && pi->battery_state) {
+			force_high = (mclk >= pi->sys_info.nbp_memory_clock[3]) ||
+				pi->video_start || (rdev->pm.dpm.new_active_crtc_count >= 3) ||
+				pi->disable_nb_ps3_in_battery;
+			ps->dpm0_pg_nb_ps_lo = force_high ? 0x2 : 0x3;
+			ps->dpm0_pg_nb_ps_hi = 0x2;
+			ps->dpmx_nb_ps_lo = force_high ? 0x2 : 0x3;
+			ps->dpmx_nb_ps_hi = 0x2;
+		}
+	}
+}
+
+static void kv_dpm_power_level_enabled_for_throttle(struct radeon_device *rdev,
+						    u32 index, bool enable)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+
+	pi->graphics_level[index].EnabledForThrottle = enable ? 1 : 0;
+}
+
+static int kv_calculate_ds_divider(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	u32 sclk_in_sr = 10000; /* ??? */
+	u32 i;
+
+	if (pi->lowest_valid > pi->highest_valid)
+		return -EINVAL;
+
+	for (i = pi->lowest_valid; i <= pi->highest_valid; i++) {
+		pi->graphics_level[i].DeepSleepDivId =
+			kv_get_sleep_divider_id_from_clock(rdev,
+							   be32_to_cpu(pi->graphics_level[i].SclkFrequency),
+							   sclk_in_sr);
+	}
+	return 0;
+}
+
+static int kv_calculate_nbps_level_settings(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	u32 i;
+	bool force_high;
+	struct radeon_clock_and_voltage_limits *max_limits =
+		&rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
+	u32 mclk = max_limits->mclk;
+
+	if (pi->lowest_valid > pi->highest_valid)
+		return -EINVAL;
+
+	if (rdev->family == CHIP_KABINI) {
+		for (i = pi->lowest_valid; i <= pi->highest_valid; i++) {
+			pi->graphics_level[i].GnbSlow = 1;
+			pi->graphics_level[i].ForceNbPs1 = 0;
+			pi->graphics_level[i].UpH = 0;
+		}
+
+		if (!pi->sys_info.nb_dpm_enable)
+			return 0;
+
+		force_high = ((mclk >= pi->sys_info.nbp_memory_clock[3]) ||
+			      (rdev->pm.dpm.new_active_crtc_count >= 3) || pi->video_start);
+
+		if (force_high) {
+			for (i = pi->lowest_valid; i <= pi->highest_valid; i++)
+				pi->graphics_level[i].GnbSlow = 0;
+		} else {
+			if (pi->battery_state)
+				pi->graphics_level[0].ForceNbPs1 = 1;
+
+			pi->graphics_level[1].GnbSlow = 0;
+			pi->graphics_level[2].GnbSlow = 0;
+			pi->graphics_level[3].GnbSlow = 0;
+			pi->graphics_level[4].GnbSlow = 0;
+		}
+	} else {
+		for (i = pi->lowest_valid; i <= pi->highest_valid; i++) {
+			pi->graphics_level[i].GnbSlow = 1;
+			pi->graphics_level[i].ForceNbPs1 = 0;
+			pi->graphics_level[i].UpH = 0;
+		}
+
+		if (pi->sys_info.nb_dpm_enable && pi->battery_state) {
+			pi->graphics_level[pi->lowest_valid].UpH = 0x28;
+			pi->graphics_level[pi->lowest_valid].GnbSlow = 0;
+			if (pi->lowest_valid != pi->highest_valid)
+				pi->graphics_level[pi->lowest_valid].ForceNbPs1 = 1;
+		}
+	}
+	return 0;
+}
+
+static int kv_calculate_dpm_settings(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	u32 i;
+
+	if (pi->lowest_valid > pi->highest_valid)
+		return -EINVAL;
+
+	for (i = pi->lowest_valid; i <= pi->highest_valid; i++)
+		pi->graphics_level[i].DisplayWatermark = (i == pi->highest_valid) ? 1 : 0;
+
+	return 0;
+}
+
+static void kv_init_graphics_levels(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	u32 i;
+	struct radeon_clock_voltage_dependency_table *table =
+		&rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk;
+
+	if (table && table->count) {
+		u32 vid_2bit;
+
+		pi->graphics_dpm_level_count = 0;
+		for (i = 0; i < table->count; i++) {
+			if (pi->high_voltage_t &&
+			    (pi->high_voltage_t <
+			     kv_convert_8bit_index_to_voltage(rdev, table->entries[i].v)))
+				break;
+
+			kv_set_divider_value(rdev, i, table->entries[i].clk);
+			vid_2bit = sumo_convert_vid7_to_vid2(rdev,
+							     &pi->sys_info.vid_mapping_table,
+							     table->entries[i].v);
+			kv_set_vid(rdev, i, vid_2bit);
+			kv_set_at(rdev, i, pi->at[i]);
+			kv_dpm_power_level_enabled_for_throttle(rdev, i, true);
+			pi->graphics_dpm_level_count++;
+		}
+	} else {
+		struct sumo_sclk_voltage_mapping_table *table =
+			&pi->sys_info.sclk_voltage_mapping_table;
+
+		pi->graphics_dpm_level_count = 0;
+		for (i = 0; i < table->num_max_dpm_entries; i++) {
+			if (pi->high_voltage_t &&
+			    pi->high_voltage_t <
+			    kv_convert_2bit_index_to_voltage(rdev, table->entries[i].vid_2bit))
+				break;
+
+			kv_set_divider_value(rdev, i, table->entries[i].sclk_frequency);
+			kv_set_vid(rdev, i, table->entries[i].vid_2bit);
+			kv_set_at(rdev, i, pi->at[i]);
+			kv_dpm_power_level_enabled_for_throttle(rdev, i, true);
+			pi->graphics_dpm_level_count++;
+		}
+	}
+
+	for (i = 0; i < SMU7_MAX_LEVELS_GRAPHICS; i++)
+		kv_dpm_power_level_enable(rdev, i, false);
+}
+
+static void kv_enable_new_levels(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	u32 i;
+
+	for (i = 0; i < SMU7_MAX_LEVELS_GRAPHICS; i++) {
+		if (i >= pi->lowest_valid && i <= pi->highest_valid)
+			kv_dpm_power_level_enable(rdev, i, true);
+	}
+}
+
+static int kv_set_enabled_levels(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	u32 i, new_mask = 0;
+
+	for (i = pi->lowest_valid; i <= pi->highest_valid; i++)
+		new_mask |= (1 << i);
+
+	return kv_send_msg_to_smc_with_parameter(rdev,
+						 PPSMC_MSG_SCLKDPM_SetEnabledMask,
+						 new_mask);
+}
+
+static void kv_program_nbps_index_settings(struct radeon_device *rdev,
+					   struct radeon_ps *new_rps)
+{
+	struct kv_ps *new_ps = kv_get_ps(new_rps);
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	u32 nbdpmconfig1;
+
+	if (rdev->family == CHIP_KABINI)
+		return;
+
+	if (pi->sys_info.nb_dpm_enable) {
+		nbdpmconfig1 = RREG32_SMC(NB_DPM_CONFIG_1);
+		nbdpmconfig1 &= ~(Dpm0PgNbPsLo_MASK | Dpm0PgNbPsHi_MASK |
+				  DpmXNbPsLo_MASK | DpmXNbPsHi_MASK);
+		nbdpmconfig1 |= (Dpm0PgNbPsLo(new_ps->dpm0_pg_nb_ps_lo) |
+				 Dpm0PgNbPsHi(new_ps->dpm0_pg_nb_ps_hi) |
+				 DpmXNbPsLo(new_ps->dpmx_nb_ps_lo) |
+				 DpmXNbPsHi(new_ps->dpmx_nb_ps_hi));
+		WREG32_SMC(NB_DPM_CONFIG_1, nbdpmconfig1);
+	}
+}
+
+static int kv_set_thermal_temperature_range(struct radeon_device *rdev,
+					    int min_temp, int max_temp)
+{
+	int low_temp = 0 * 1000;
+	int high_temp = 255 * 1000;
+	u32 tmp;
+
+	if (low_temp < min_temp)
+		low_temp = min_temp;
+	if (high_temp > max_temp)
+		high_temp = max_temp;
+	if (high_temp < low_temp) {
+		DRM_ERROR("invalid thermal range: %d - %d\n", low_temp, high_temp);
+		return -EINVAL;
+	}
+
+	tmp = RREG32_SMC(CG_THERMAL_INT_CTRL);
+	tmp &= ~(DIG_THERM_INTH_MASK | DIG_THERM_INTL_MASK);
+	tmp |= (DIG_THERM_INTH(49 + (high_temp / 1000)) |
+		DIG_THERM_INTL(49 + (low_temp / 1000)));
+	WREG32_SMC(CG_THERMAL_INT_CTRL, tmp);
+
+	rdev->pm.dpm.thermal.min_temp = low_temp;
+	rdev->pm.dpm.thermal.max_temp = high_temp;
+
+	return 0;
+}
+
+union igp_info {
+	struct _ATOM_INTEGRATED_SYSTEM_INFO info;
+	struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2;
+	struct _ATOM_INTEGRATED_SYSTEM_INFO_V5 info_5;
+	struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6;
+	struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7;
+	struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_8 info_8;
+};
+
+static int kv_parse_sys_info_table(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	struct radeon_mode_info *mode_info = &rdev->mode_info;
+	int index = GetIndexIntoMasterTable(DATA, IntegratedSystemInfo);
+	union igp_info *igp_info;
+	u8 frev, crev;
+	u16 data_offset;
+	int i;
+
+	if (atom_parse_data_header(mode_info->atom_context, index, NULL,
+				   &frev, &crev, &data_offset)) {
+		igp_info = (union igp_info *)(mode_info->atom_context->bios +
+					      data_offset);
+
+		if (crev != 8) {
+			DRM_ERROR("Unsupported IGP table: %d %d\n", frev, crev);
+			return -EINVAL;
+		}
+		pi->sys_info.bootup_sclk = le32_to_cpu(igp_info->info_8.ulBootUpEngineClock);
+		pi->sys_info.bootup_uma_clk = le32_to_cpu(igp_info->info_8.ulBootUpUMAClock);
+		pi->sys_info.bootup_nb_voltage_index =
+			le16_to_cpu(igp_info->info_8.usBootUpNBVoltage);
+		if (igp_info->info_8.ucHtcTmpLmt == 0)
+			pi->sys_info.htc_tmp_lmt = 203;
+		else
+			pi->sys_info.htc_tmp_lmt = igp_info->info_8.ucHtcTmpLmt;
+		if (igp_info->info_8.ucHtcHystLmt == 0)
+			pi->sys_info.htc_hyst_lmt = 5;
+		else
+			pi->sys_info.htc_hyst_lmt = igp_info->info_8.ucHtcHystLmt;
+		if (pi->sys_info.htc_tmp_lmt <= pi->sys_info.htc_hyst_lmt) {
+			DRM_ERROR("The htcTmpLmt should be larger than htcHystLmt.\n");
+		}
+
+		if (le32_to_cpu(igp_info->info_8.ulSystemConfig) & (1 << 3))
+			pi->sys_info.nb_dpm_enable = true;
+		else
+			pi->sys_info.nb_dpm_enable = false;
+
+		for (i = 0; i < KV_NUM_NBPSTATES; i++) {
+			pi->sys_info.nbp_memory_clock[i] =
+				le32_to_cpu(igp_info->info_8.ulNbpStateMemclkFreq[i]);
+			pi->sys_info.nbp_n_clock[i] =
+				le32_to_cpu(igp_info->info_8.ulNbpStateNClkFreq[i]);
+		}
+		if (le32_to_cpu(igp_info->info_8.ulGPUCapInfo) &
+		    SYS_INFO_GPUCAPS__ENABEL_DFS_BYPASS)
+			pi->caps_enable_dfs_bypass = true;
+
+		sumo_construct_sclk_voltage_mapping_table(rdev,
+							  &pi->sys_info.sclk_voltage_mapping_table,
+							  igp_info->info_8.sAvail_SCLK);
+
+		sumo_construct_vid_mapping_table(rdev,
+						 &pi->sys_info.vid_mapping_table,
+						 igp_info->info_8.sAvail_SCLK);
+
+		kv_construct_max_power_limits_table(rdev,
+						    &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac);
+	}
+	return 0;
+}
+
+union power_info {
+	struct _ATOM_POWERPLAY_INFO info;
+	struct _ATOM_POWERPLAY_INFO_V2 info_2;
+	struct _ATOM_POWERPLAY_INFO_V3 info_3;
+	struct _ATOM_PPLIB_POWERPLAYTABLE pplib;
+	struct _ATOM_PPLIB_POWERPLAYTABLE2 pplib2;
+	struct _ATOM_PPLIB_POWERPLAYTABLE3 pplib3;
+};
+
+union pplib_clock_info {
+	struct _ATOM_PPLIB_R600_CLOCK_INFO r600;
+	struct _ATOM_PPLIB_RS780_CLOCK_INFO rs780;
+	struct _ATOM_PPLIB_EVERGREEN_CLOCK_INFO evergreen;
+	struct _ATOM_PPLIB_SUMO_CLOCK_INFO sumo;
+};
+
+union pplib_power_state {
+	struct _ATOM_PPLIB_STATE v1;
+	struct _ATOM_PPLIB_STATE_V2 v2;
+};
+
+static void kv_patch_boot_state(struct radeon_device *rdev,
+				struct kv_ps *ps)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+
+	ps->num_levels = 1;
+	ps->levels[0] = pi->boot_pl;
+}
+
+static void kv_parse_pplib_non_clock_info(struct radeon_device *rdev,
+					  struct radeon_ps *rps,
+					  struct _ATOM_PPLIB_NONCLOCK_INFO *non_clock_info,
+					  u8 table_rev)
+{
+	struct kv_ps *ps = kv_get_ps(rps);
+
+	rps->caps = le32_to_cpu(non_clock_info->ulCapsAndSettings);
+	rps->class = le16_to_cpu(non_clock_info->usClassification);
+	rps->class2 = le16_to_cpu(non_clock_info->usClassification2);
+
+	if (ATOM_PPLIB_NONCLOCKINFO_VER1 < table_rev) {
+		rps->vclk = le32_to_cpu(non_clock_info->ulVCLK);
+		rps->dclk = le32_to_cpu(non_clock_info->ulDCLK);
+	} else {
+		rps->vclk = 0;
+		rps->dclk = 0;
+	}
+
+	if (rps->class & ATOM_PPLIB_CLASSIFICATION_BOOT) {
+		rdev->pm.dpm.boot_ps = rps;
+		kv_patch_boot_state(rdev, ps);
+	}
+	if (rps->class & ATOM_PPLIB_CLASSIFICATION_UVDSTATE)
+		rdev->pm.dpm.uvd_ps = rps;
+}
+
+static void kv_parse_pplib_clock_info(struct radeon_device *rdev,
+				      struct radeon_ps *rps, int index,
+					union pplib_clock_info *clock_info)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	struct kv_ps *ps = kv_get_ps(rps);
+	struct kv_pl *pl = &ps->levels[index];
+	u32 sclk;
+
+	sclk = le16_to_cpu(clock_info->sumo.usEngineClockLow);
+	sclk |= clock_info->sumo.ucEngineClockHigh << 16;
+	pl->sclk = sclk;
+	pl->vddc_index = clock_info->sumo.vddcIndex;
+
+	ps->num_levels = index + 1;
+
+	if (pi->caps_sclk_ds) {
+		pl->ds_divider_index = 5;
+		pl->ss_divider_index = 5;
+	}
+}
+
+static int kv_parse_power_table(struct radeon_device *rdev)
+{
+	struct radeon_mode_info *mode_info = &rdev->mode_info;
+	struct _ATOM_PPLIB_NONCLOCK_INFO *non_clock_info;
+	union pplib_power_state *power_state;
+	int i, j, k, non_clock_array_index, clock_array_index;
+	union pplib_clock_info *clock_info;
+	struct _StateArray *state_array;
+	struct _ClockInfoArray *clock_info_array;
+	struct _NonClockInfoArray *non_clock_info_array;
+	union power_info *power_info;
+	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
+        u16 data_offset;
+	u8 frev, crev;
+	u8 *power_state_offset;
+	struct kv_ps *ps;
+
+	if (!atom_parse_data_header(mode_info->atom_context, index, NULL,
+				   &frev, &crev, &data_offset))
+		return -EINVAL;
+	power_info = (union power_info *)(mode_info->atom_context->bios + data_offset);
+
+	state_array = (struct _StateArray *)
+		(mode_info->atom_context->bios + data_offset +
+		 le16_to_cpu(power_info->pplib.usStateArrayOffset));
+	clock_info_array = (struct _ClockInfoArray *)
+		(mode_info->atom_context->bios + data_offset +
+		 le16_to_cpu(power_info->pplib.usClockInfoArrayOffset));
+	non_clock_info_array = (struct _NonClockInfoArray *)
+		(mode_info->atom_context->bios + data_offset +
+		 le16_to_cpu(power_info->pplib.usNonClockInfoArrayOffset));
+
+	rdev->pm.dpm.ps = kzalloc(sizeof(struct radeon_ps) *
+				  state_array->ucNumEntries, GFP_KERNEL);
+	if (!rdev->pm.dpm.ps)
+		return -ENOMEM;
+	power_state_offset = (u8 *)state_array->states;
+	rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps);
+	rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime);
+	rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime);
+	for (i = 0; i < state_array->ucNumEntries; i++) {
+		u8 *idx;
+		power_state = (union pplib_power_state *)power_state_offset;
+		non_clock_array_index = power_state->v2.nonClockInfoIndex;
+		non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *)
+			&non_clock_info_array->nonClockInfo[non_clock_array_index];
+		if (!rdev->pm.power_state[i].clock_info)
+			return -EINVAL;
+		ps = kzalloc(sizeof(struct kv_ps), GFP_KERNEL);
+		if (ps == NULL) {
+			kfree(rdev->pm.dpm.ps);
+			return -ENOMEM;
+		}
+		rdev->pm.dpm.ps[i].ps_priv = ps;
+		k = 0;
+		idx = (u8 *)&power_state->v2.clockInfoIndex[0];
+		for (j = 0; j < power_state->v2.ucNumDPMLevels; j++) {
+			clock_array_index = idx[j];
+			if (clock_array_index >= clock_info_array->ucNumEntries)
+				continue;
+			if (k >= SUMO_MAX_HARDWARE_POWERLEVELS)
+				break;
+			clock_info = (union pplib_clock_info *)
+				((u8 *)&clock_info_array->clockInfo[0] +
+				 (clock_array_index * clock_info_array->ucEntrySize));
+			kv_parse_pplib_clock_info(rdev,
+						  &rdev->pm.dpm.ps[i], k,
+						  clock_info);
+			k++;
+		}
+		kv_parse_pplib_non_clock_info(rdev, &rdev->pm.dpm.ps[i],
+					      non_clock_info,
+					      non_clock_info_array->ucEntrySize);
+		power_state_offset += 2 + power_state->v2.ucNumDPMLevels;
+	}
+	rdev->pm.dpm.num_ps = state_array->ucNumEntries;
+	return 0;
+}
+
+int kv_dpm_init(struct radeon_device *rdev)
+{
+	struct kv_power_info *pi;
+	int ret, i;
+
+	pi = kzalloc(sizeof(struct kv_power_info), GFP_KERNEL);
+	if (pi == NULL)
+		return -ENOMEM;
+	rdev->pm.dpm.priv = pi;
+
+	ret = r600_parse_extended_power_table(rdev);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < SUMO_MAX_HARDWARE_POWERLEVELS; i++)
+		pi->at[i] = TRINITY_AT_DFLT;
+
+        pi->sram_end = SMC_RAM_END;
+
+	if (rdev->family == CHIP_KABINI)
+		pi->high_voltage_t = 4001;
+
+	pi->enable_nb_dpm = true;
+
+	pi->caps_power_containment = true;
+	pi->caps_cac = true;
+	pi->enable_didt = false;
+	if (pi->enable_didt) {
+		pi->caps_sq_ramping = true;
+		pi->caps_db_ramping = true;
+		pi->caps_td_ramping = true;
+		pi->caps_tcp_ramping = true;
+	}
+
+	pi->caps_sclk_ds = true;
+	pi->enable_auto_thermal_throttling = true;
+	pi->disable_nb_ps3_in_battery = false;
+	pi->bapm_enable = true;
+	pi->voltage_drop_t = 0;
+	pi->caps_sclk_throttle_low_notification = false;
+	pi->caps_fps = false; /* true? */
+	pi->caps_uvd_pg = true;
+	pi->caps_uvd_dpm = true;
+	pi->caps_vce_pg = false;
+	pi->caps_samu_pg = false;
+	pi->caps_acp_pg = false;
+	pi->caps_stable_p_state = false;
+
+	ret = kv_parse_sys_info_table(rdev);
+	if (ret)
+		return ret;
+
+	kv_patch_voltage_values(rdev);
+	kv_construct_boot_state(rdev);
+
+	ret = kv_parse_power_table(rdev);
+	if (ret)
+		return ret;
+
+	pi->enable_dpm = true;
+
+	return 0;
+}
+
+void kv_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev,
+						    struct seq_file *m)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	u32 current_index =
+		(RREG32_SMC(TARGET_AND_CURRENT_PROFILE_INDEX) & CURR_SCLK_INDEX_MASK) >>
+		CURR_SCLK_INDEX_SHIFT;
+	u32 sclk, tmp;
+	u16 vddc;
+
+	if (current_index >= SMU__NUM_SCLK_DPM_STATE) {
+		seq_printf(m, "invalid dpm profile %d\n", current_index);
+	} else {
+		sclk = be32_to_cpu(pi->graphics_level[current_index].SclkFrequency);
+		tmp = (RREG32_SMC(SMU_VOLTAGE_STATUS) & SMU_VOLTAGE_CURRENT_LEVEL_MASK) >>
+			SMU_VOLTAGE_CURRENT_LEVEL_SHIFT;
+		vddc = kv_convert_8bit_index_to_voltage(rdev, (u16)tmp);
+		seq_printf(m, "power level %d    sclk: %u vddc: %u\n",
+			   current_index, sclk, vddc);
+	}
+}
+
+void kv_dpm_print_power_state(struct radeon_device *rdev,
+			      struct radeon_ps *rps)
+{
+	int i;
+	struct kv_ps *ps = kv_get_ps(rps);
+
+	r600_dpm_print_class_info(rps->class, rps->class2);
+	r600_dpm_print_cap_info(rps->caps);
+	printk("\tuvd    vclk: %d dclk: %d\n", rps->vclk, rps->dclk);
+	for (i = 0; i < ps->num_levels; i++) {
+		struct kv_pl *pl = &ps->levels[i];
+		printk("\t\tpower level %d    sclk: %u vddc: %u\n",
+		       i, pl->sclk,
+		       kv_convert_8bit_index_to_voltage(rdev, pl->vddc_index));
+	}
+	r600_dpm_print_ps_status(rdev, rps);
+}
+
+void kv_dpm_fini(struct radeon_device *rdev)
+{
+	int i;
+
+	for (i = 0; i < rdev->pm.dpm.num_ps; i++) {
+		kfree(rdev->pm.dpm.ps[i].ps_priv);
+	}
+	kfree(rdev->pm.dpm.ps);
+	kfree(rdev->pm.dpm.priv);
+	r600_free_extended_power_table(rdev);
+}
+
+void kv_dpm_display_configuration_changed(struct radeon_device *rdev)
+{
+
+}
+
+u32 kv_dpm_get_sclk(struct radeon_device *rdev, bool low)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+	struct kv_ps *requested_state = kv_get_ps(&pi->requested_rps);
+
+	if (low)
+		return requested_state->levels[0].sclk;
+	else
+		return requested_state->levels[requested_state->num_levels - 1].sclk;
+}
+
+u32 kv_dpm_get_mclk(struct radeon_device *rdev, bool low)
+{
+	struct kv_power_info *pi = kv_get_pi(rdev);
+
+	return pi->sys_info.bootup_uma_clk;
+}
+
diff --git a/drivers/gpu/drm/radeon/kv_dpm.h b/drivers/gpu/drm/radeon/kv_dpm.h
new file mode 100644
index 0000000..32bb079
--- /dev/null
+++ b/drivers/gpu/drm/radeon/kv_dpm.h
@@ -0,0 +1,199 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __KV_DPM_H__
+#define __KV_DPM_H__
+
+#define SMU__NUM_SCLK_DPM_STATE  8
+#define SMU__NUM_MCLK_DPM_LEVELS 4
+#define SMU__NUM_LCLK_DPM_LEVELS 8
+#define SMU__NUM_PCIE_DPM_LEVELS 0 /* ??? */
+#include "smu7_fusion.h"
+#include "trinity_dpm.h"
+#include "ppsmc.h"
+
+#define KV_NUM_NBPSTATES   4
+
+enum kv_pt_config_reg_type {
+	KV_CONFIGREG_MMR = 0,
+	KV_CONFIGREG_SMC_IND,
+	KV_CONFIGREG_DIDT_IND,
+	KV_CONFIGREG_CACHE,
+	KV_CONFIGREG_MAX
+};
+
+struct kv_pt_config_reg {
+	u32 offset;
+	u32 mask;
+	u32 shift;
+	u32 value;
+	enum kv_pt_config_reg_type type;
+};
+
+struct kv_lcac_config_values {
+	u32 block_id;
+	u32 signal_id;
+	u32 t;
+};
+
+struct kv_lcac_config_reg {
+	u32 cntl;
+	u32 block_mask;
+	u32 block_shift;
+	u32 signal_mask;
+	u32 signal_shift;
+	u32 t_mask;
+	u32 t_shift;
+	u32 enable_mask;
+	u32 enable_shift;
+};
+
+struct kv_pl {
+	u32 sclk;
+	u8 vddc_index;
+	u8 ds_divider_index;
+	u8 ss_divider_index;
+	u8 allow_gnb_slow;
+	u8 force_nbp_state;
+	u8 display_wm;
+	u8 vce_wm;
+};
+
+struct kv_ps {
+	struct kv_pl levels[SUMO_MAX_HARDWARE_POWERLEVELS];
+	u32 num_levels;
+	bool need_dfs_bypass;
+	u8 dpm0_pg_nb_ps_lo;
+	u8 dpm0_pg_nb_ps_hi;
+	u8 dpmx_nb_ps_lo;
+	u8 dpmx_nb_ps_hi;
+};
+
+struct kv_sys_info {
+	u32 bootup_uma_clk;
+	u32 bootup_sclk;
+	u32 dentist_vco_freq;
+	u32 nb_dpm_enable;
+	u32 nbp_memory_clock[KV_NUM_NBPSTATES];
+	u32 nbp_n_clock[KV_NUM_NBPSTATES];
+	u16 bootup_nb_voltage_index;
+	u8 htc_tmp_lmt;
+	u8 htc_hyst_lmt;
+	struct sumo_sclk_voltage_mapping_table sclk_voltage_mapping_table;
+	struct sumo_vid_mapping_table vid_mapping_table;
+	u32 uma_channel_number;
+};
+
+struct kv_power_info {
+	u32 at[SUMO_MAX_HARDWARE_POWERLEVELS];
+	u32 voltage_drop_t;
+	struct kv_sys_info sys_info;
+	struct kv_pl boot_pl;
+	bool enable_nb_ps_policy;
+	bool disable_nb_ps3_in_battery;
+	bool video_start;
+	bool battery_state;
+	u32 lowest_valid;
+	u32 highest_valid;
+	u16 high_voltage_t;
+	bool cac_enabled;
+	bool bapm_enable;
+	/* smc offsets */
+	u32 sram_end;
+	u32 dpm_table_start;
+	u32 soft_regs_start;
+	/* dpm SMU tables */
+	u8 graphics_dpm_level_count;
+	u8 uvd_level_count;
+	u8 vce_level_count;
+	u8 acp_level_count;
+	u8 samu_level_count;
+	u16 fps_high_t;
+	SMU7_Fusion_GraphicsLevel graphics_level[SMU__NUM_SCLK_DPM_STATE];
+	SMU7_Fusion_ACPILevel acpi_level;
+	SMU7_Fusion_UvdLevel uvd_level[SMU7_MAX_LEVELS_UVD];
+	SMU7_Fusion_ExtClkLevel vce_level[SMU7_MAX_LEVELS_VCE];
+	SMU7_Fusion_ExtClkLevel acp_level[SMU7_MAX_LEVELS_ACP];
+	SMU7_Fusion_ExtClkLevel samu_level[SMU7_MAX_LEVELS_SAMU];
+	u8 uvd_boot_level;
+	u8 vce_boot_level;
+	u8 acp_boot_level;
+	u8 samu_boot_level;
+	u8 uvd_interval;
+	u8 vce_interval;
+	u8 acp_interval;
+	u8 samu_interval;
+	u8 graphics_boot_level;
+	u8 graphics_interval;
+	u8 graphics_therm_throttle_enable;
+	u8 graphics_voltage_change_enable;
+	u8 graphics_clk_slow_enable;
+	u8 graphics_clk_slow_divider;
+	u8 fps_low_t;
+	u32 low_sclk_interrupt_t;
+	bool uvd_power_gated;
+	bool vce_power_gated;
+	bool acp_power_gated;
+	bool samu_power_gated;
+	bool nb_dpm_enabled;
+	/* flags */
+	bool enable_didt;
+	bool enable_dpm;
+	bool enable_auto_thermal_throttling;
+	bool enable_nb_dpm;
+	/* caps */
+	bool caps_cac;
+	bool caps_power_containment;
+	bool caps_sq_ramping;
+	bool caps_db_ramping;
+	bool caps_td_ramping;
+	bool caps_tcp_ramping;
+	bool caps_sclk_throttle_low_notification;
+	bool caps_fps;
+	bool caps_uvd_dpm;
+	bool caps_uvd_pg;
+	bool caps_vce_pg;
+	bool caps_samu_pg;
+	bool caps_acp_pg;
+	bool caps_stable_p_state;
+	bool caps_enable_dfs_bypass;
+	bool caps_sclk_ds;
+	struct radeon_ps current_rps;
+	struct kv_ps current_ps;
+	struct radeon_ps requested_rps;
+	struct kv_ps requested_ps;
+};
+
+
+/* kv_smc.c */
+int kv_notify_message_to_smu(struct radeon_device *rdev, u32 id);
+int kv_dpm_get_enable_mask(struct radeon_device *rdev, u32 *enable_mask);
+int kv_send_msg_to_smc_with_parameter(struct radeon_device *rdev,
+				      PPSMC_Msg msg, u32 parameter);
+int kv_read_smc_sram_dword(struct radeon_device *rdev, u32 smc_address,
+			   u32 *value, u32 limit);
+int kv_smc_dpm_enable(struct radeon_device *rdev, bool enable);
+int kv_copy_bytes_to_smc(struct radeon_device *rdev,
+			 u32 smc_start_address,
+			 const u8 *src, u32 byte_count, u32 limit);
+
+#endif
diff --git a/drivers/gpu/drm/radeon/kv_smc.c b/drivers/gpu/drm/radeon/kv_smc.c
new file mode 100644
index 0000000..34a226d
--- /dev/null
+++ b/drivers/gpu/drm/radeon/kv_smc.c
@@ -0,0 +1,207 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Alex Deucher
+ */
+
+#include "drmP.h"
+#include "radeon.h"
+#include "cikd.h"
+#include "kv_dpm.h"
+
+int kv_notify_message_to_smu(struct radeon_device *rdev, u32 id)
+{
+	u32 i;
+	u32 tmp = 0;
+
+	WREG32(SMC_MESSAGE_0, id & SMC_MSG_MASK);
+
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		if ((RREG32(SMC_RESP_0) & SMC_RESP_MASK) != 0)
+			break;
+		udelay(1);
+	}
+	tmp = RREG32(SMC_RESP_0) & SMC_RESP_MASK;
+
+	if (tmp != 1) {
+		if (tmp == 0xFF)
+			return -EINVAL;
+		else if (tmp == 0xFE)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+int kv_dpm_get_enable_mask(struct radeon_device *rdev, u32 *enable_mask)
+{
+	int ret;
+
+	ret = kv_notify_message_to_smu(rdev, PPSMC_MSG_SCLKDPM_GetEnabledMask);
+
+	if (ret == 0)
+		*enable_mask = RREG32_SMC(SMC_SYSCON_MSG_ARG_0);
+
+	return ret;
+}
+
+int kv_send_msg_to_smc_with_parameter(struct radeon_device *rdev,
+				      PPSMC_Msg msg, u32 parameter)
+{
+
+	WREG32(SMC_MSG_ARG_0, parameter);
+
+	return kv_notify_message_to_smu(rdev, msg);
+}
+
+static int kv_set_smc_sram_address(struct radeon_device *rdev,
+				   u32 smc_address, u32 limit)
+{
+	if (smc_address & 3)
+		return -EINVAL;
+	if ((smc_address + 3) > limit)
+		return -EINVAL;
+
+	WREG32(SMC_IND_INDEX_0, smc_address);
+	WREG32_P(SMC_IND_ACCESS_CNTL, 0, ~AUTO_INCREMENT_IND_0);
+
+	return 0;
+}
+
+int kv_read_smc_sram_dword(struct radeon_device *rdev, u32 smc_address,
+			   u32 *value, u32 limit)
+{
+	int ret;
+
+	ret = kv_set_smc_sram_address(rdev, smc_address, limit);
+	if (ret)
+		return ret;
+
+	*value = RREG32(SMC_IND_DATA_0);
+	return 0;
+}
+
+int kv_smc_dpm_enable(struct radeon_device *rdev, bool enable)
+{
+	if (enable)
+		return kv_notify_message_to_smu(rdev, PPSMC_MSG_DPM_Enable);
+	else
+		return kv_notify_message_to_smu(rdev, PPSMC_MSG_DPM_Disable);
+}
+
+int kv_copy_bytes_to_smc(struct radeon_device *rdev,
+			 u32 smc_start_address,
+			 const u8 *src, u32 byte_count, u32 limit)
+{
+	int ret;
+	u32 data, original_data, addr, extra_shift, t_byte, count, mask;
+
+	if ((smc_start_address + byte_count) > limit)
+		return -EINVAL;
+
+	addr = smc_start_address;
+	t_byte = addr & 3;
+
+	/* RMW for the initial bytes */
+	if  (t_byte != 0) {
+		addr -= t_byte;
+
+		ret = kv_set_smc_sram_address(rdev, addr, limit);
+		if (ret)
+			return ret;
+
+		original_data = RREG32(SMC_IND_DATA_0);
+
+		data = 0;
+		mask = 0;
+		count = 4;
+		while (count > 0) {
+			if (t_byte > 0) {
+				mask = (mask << 8) | 0xff;
+				t_byte--;
+			} else if (byte_count > 0) {
+				data = (data << 8) + *src++;
+				byte_count--;
+				mask <<= 8;
+			} else {
+				data <<= 8;
+				mask = (mask << 8) | 0xff;
+			}
+			count--;
+		}
+
+		data |= original_data & mask;
+
+		ret = kv_set_smc_sram_address(rdev, addr, limit);
+		if (ret)
+			return ret;
+
+		WREG32(SMC_IND_DATA_0, data);
+
+		addr += 4;
+	}
+
+	while (byte_count >= 4) {
+		/* SMC address space is BE */
+		data = (src[0] << 24) + (src[1] << 16) + (src[2] << 8) + src[3];
+
+		ret = kv_set_smc_sram_address(rdev, addr, limit);
+		if (ret)
+			return ret;
+
+		WREG32(SMC_IND_DATA_0, data);
+
+		src += 4;
+		byte_count -= 4;
+		addr += 4;
+	}
+
+	/* RMW for the final bytes */
+	if (byte_count > 0) {
+		data = 0;
+
+		ret = kv_set_smc_sram_address(rdev, addr, limit);
+		if (ret)
+			return ret;
+
+		original_data= RREG32(SMC_IND_DATA_0);
+
+		extra_shift = 8 * (4 - byte_count);
+
+		while (byte_count > 0) {
+			/* SMC address space is BE */
+			data = (data << 8) + *src++;
+			byte_count--;
+		}
+
+		data <<= extra_shift;
+
+		data |= (original_data & ~((~0UL) << extra_shift));
+
+		ret = kv_set_smc_sram_address(rdev, addr, limit);
+		if (ret)
+			return ret;
+
+		WREG32(SMC_IND_DATA_0, data);
+	}
+	return 0;
+}
+
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 56bd4f3..93c1f9e 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -35,7 +35,7 @@
 #include "radeon_ucode.h"
 #include "clearstate_cayman.h"
 
-static u32 tn_rlc_save_restore_register_list[] =
+static const u32 tn_rlc_save_restore_register_list[] =
 {
 	0x98fc,
 	0x98f0,
@@ -160,7 +160,6 @@
 	0x9830,
 	0x802c,
 };
-static u32 tn_rlc_save_restore_register_list_size = ARRAY_SIZE(tn_rlc_save_restore_register_list);
 
 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
@@ -175,6 +174,11 @@
 extern void evergreen_program_aspm(struct radeon_device *rdev);
 extern void sumo_rlc_fini(struct radeon_device *rdev);
 extern int sumo_rlc_init(struct radeon_device *rdev);
+extern void cayman_dma_vm_set_page(struct radeon_device *rdev,
+				   struct radeon_ib *ib,
+				   uint64_t pe,
+				   uint64_t addr, unsigned count,
+				   uint32_t incr, uint32_t flags);
 
 /* Firmware Names */
 MODULE_FIRMWARE("radeon/BARTS_pfp.bin");
@@ -794,9 +798,13 @@
 	if ((rdev->family >= CHIP_BARTS) && (rdev->family <= CHIP_CAYMAN)) {
 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
-		if (err)
-			goto out;
-		if (rdev->smc_fw->size != smc_req_size) {
+		if (err) {
+			printk(KERN_ERR
+			       "smc: error loading firmware \"%s\"\n",
+			       fw_name);
+			release_firmware(rdev->smc_fw);
+			rdev->smc_fw = NULL;
+		} else if (rdev->smc_fw->size != smc_req_size) {
 			printk(KERN_ERR
 			       "ni_mc: Bogus length %zu in firmware \"%s\"\n",
 			       rdev->mc_fw->size, fw_name);
@@ -1370,23 +1378,6 @@
 	radeon_ring_write(ring, 10); /* poll interval */
 }
 
-void cayman_uvd_semaphore_emit(struct radeon_device *rdev,
-			       struct radeon_ring *ring,
-			       struct radeon_semaphore *semaphore,
-			       bool emit_wait)
-{
-	uint64_t addr = semaphore->gpu_addr;
-
-	radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0));
-	radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
-
-	radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0));
-	radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
-
-	radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
-	radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0));
-}
-
 static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
 {
 	if (enable)
@@ -1560,8 +1551,8 @@
 
 		/* Set ring buffer size */
 		ring = &rdev->ring[ridx[i]];
-		rb_cntl = drm_order(ring->ring_size / 8);
-		rb_cntl |= drm_order(RADEON_GPU_PAGE_SIZE/8) << 8;
+		rb_cntl = order_base_2(ring->ring_size / 8);
+		rb_cntl |= order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8;
 #ifdef __BIG_ENDIAN
 		rb_cntl |= BUF_SWAP_32BIT;
 #endif
@@ -1609,186 +1600,7 @@
 	return 0;
 }
 
-/*
- * DMA
- * Starting with R600, the GPU has an asynchronous
- * DMA engine.  The programming model is very similar
- * to the 3D engine (ring buffer, IBs, etc.), but the
- * DMA controller has it's own packet format that is
- * different form the PM4 format used by the 3D engine.
- * It supports copying data, writing embedded data,
- * solid fills, and a number of other things.  It also
- * has support for tiling/detiling of buffers.
- * Cayman and newer support two asynchronous DMA engines.
- */
-/**
- * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
- *
- * @rdev: radeon_device pointer
- * @ib: IB object to schedule
- *
- * Schedule an IB in the DMA ring (cayman-SI).
- */
-void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
-				struct radeon_ib *ib)
-{
-	struct radeon_ring *ring = &rdev->ring[ib->ring];
-
-	if (rdev->wb.enabled) {
-		u32 next_rptr = ring->wptr + 4;
-		while ((next_rptr & 7) != 5)
-			next_rptr++;
-		next_rptr += 3;
-		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
-		radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-		radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
-		radeon_ring_write(ring, next_rptr);
-	}
-
-	/* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
-	 * Pad as necessary with NOPs.
-	 */
-	while ((ring->wptr & 7) != 5)
-		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
-	radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
-	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
-	radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
-
-}
-
-/**
- * cayman_dma_stop - stop the async dma engines
- *
- * @rdev: radeon_device pointer
- *
- * Stop the async dma engines (cayman-SI).
- */
-void cayman_dma_stop(struct radeon_device *rdev)
-{
-	u32 rb_cntl;
-
-	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
-
-	/* dma0 */
-	rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
-	rb_cntl &= ~DMA_RB_ENABLE;
-	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
-
-	/* dma1 */
-	rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
-	rb_cntl &= ~DMA_RB_ENABLE;
-	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
-
-	rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
-	rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
-}
-
-/**
- * cayman_dma_resume - setup and start the async dma engines
- *
- * @rdev: radeon_device pointer
- *
- * Set up the DMA ring buffers and enable them. (cayman-SI).
- * Returns 0 for success, error for failure.
- */
-int cayman_dma_resume(struct radeon_device *rdev)
-{
-	struct radeon_ring *ring;
-	u32 rb_cntl, dma_cntl, ib_cntl;
-	u32 rb_bufsz;
-	u32 reg_offset, wb_offset;
-	int i, r;
-
-	/* Reset dma */
-	WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
-	RREG32(SRBM_SOFT_RESET);
-	udelay(50);
-	WREG32(SRBM_SOFT_RESET, 0);
-
-	for (i = 0; i < 2; i++) {
-		if (i == 0) {
-			ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
-			reg_offset = DMA0_REGISTER_OFFSET;
-			wb_offset = R600_WB_DMA_RPTR_OFFSET;
-		} else {
-			ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
-			reg_offset = DMA1_REGISTER_OFFSET;
-			wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
-		}
-
-		WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
-		WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
-
-		/* Set ring buffer size in dwords */
-		rb_bufsz = drm_order(ring->ring_size / 4);
-		rb_cntl = rb_bufsz << 1;
-#ifdef __BIG_ENDIAN
-		rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
-#endif
-		WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
-
-		/* Initialize the ring buffer's read and write pointers */
-		WREG32(DMA_RB_RPTR + reg_offset, 0);
-		WREG32(DMA_RB_WPTR + reg_offset, 0);
-
-		/* set the wb address whether it's enabled or not */
-		WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
-		       upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
-		WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
-		       ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
-
-		if (rdev->wb.enabled)
-			rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
-
-		WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
-
-		/* enable DMA IBs */
-		ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
-#ifdef __BIG_ENDIAN
-		ib_cntl |= DMA_IB_SWAP_ENABLE;
-#endif
-		WREG32(DMA_IB_CNTL + reg_offset, ib_cntl);
-
-		dma_cntl = RREG32(DMA_CNTL + reg_offset);
-		dma_cntl &= ~CTXEMPTY_INT_ENABLE;
-		WREG32(DMA_CNTL + reg_offset, dma_cntl);
-
-		ring->wptr = 0;
-		WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
-
-		ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2;
-
-		WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
-
-		ring->ready = true;
-
-		r = radeon_ring_test(rdev, ring->idx, ring);
-		if (r) {
-			ring->ready = false;
-			return r;
-		}
-	}
-
-	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
-
-	return 0;
-}
-
-/**
- * cayman_dma_fini - tear down the async dma engines
- *
- * @rdev: radeon_device pointer
- *
- * Stop the async dma engines and free the rings (cayman-SI).
- */
-void cayman_dma_fini(struct radeon_device *rdev)
-{
-	cayman_dma_stop(rdev);
-	radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
-	radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
-}
-
-static u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev)
+u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev)
 {
 	u32 reset_mask = 0;
 	u32 tmp;
@@ -2041,34 +1853,6 @@
 	return radeon_ring_test_lockup(rdev, ring);
 }
 
-/**
- * cayman_dma_is_lockup - Check if the DMA engine is locked up
- *
- * @rdev: radeon_device pointer
- * @ring: radeon_ring structure holding ring information
- *
- * Check if the async DMA engine is locked up.
- * Returns true if the engine appears to be locked up, false if not.
- */
-bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
-{
-	u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
-	u32 mask;
-
-	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
-		mask = RADEON_RESET_DMA;
-	else
-		mask = RADEON_RESET_DMA1;
-
-	if (!(reset_mask & mask)) {
-		radeon_ring_lockup_update(ring);
-		return false;
-	}
-	/* force ring activities */
-	radeon_ring_force_activity(rdev, ring);
-	return radeon_ring_test_lockup(rdev, ring);
-}
-
 static int cayman_startup(struct radeon_device *rdev)
 {
 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
@@ -2079,6 +1863,13 @@
 	/* enable aspm */
 	evergreen_program_aspm(rdev);
 
+	/* scratch needs to be initialized before MC */
+	r = r600_vram_scratch_init(rdev);
+	if (r)
+		return r;
+
+	evergreen_mc_program(rdev);
+
 	if (rdev->flags & RADEON_IS_IGP) {
 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) {
 			r = ni_init_microcode(rdev);
@@ -2103,27 +1894,16 @@
 		}
 	}
 
-	r = r600_vram_scratch_init(rdev);
-	if (r)
-		return r;
-
-	evergreen_mc_program(rdev);
 	r = cayman_pcie_gart_enable(rdev);
 	if (r)
 		return r;
 	cayman_gpu_init(rdev);
 
-	r = evergreen_blit_init(rdev);
-	if (r) {
-		r600_blit_fini(rdev);
-		rdev->asic->copy.copy = NULL;
-		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
-	}
-
 	/* allocate rlc buffers */
 	if (rdev->flags & RADEON_IS_IGP) {
 		rdev->rlc.reg_list = tn_rlc_save_restore_register_list;
-		rdev->rlc.reg_list_size = tn_rlc_save_restore_register_list_size;
+		rdev->rlc.reg_list_size =
+			(u32)ARRAY_SIZE(tn_rlc_save_restore_register_list);
 		rdev->rlc.cs_data = cayman_cs_data;
 		r = sumo_rlc_init(rdev);
 		if (r) {
@@ -2143,7 +1923,7 @@
 		return r;
 	}
 
-	r = rv770_uvd_resume(rdev);
+	r = uvd_v2_2_resume(rdev);
 	if (!r) {
 		r = radeon_fence_driver_start_ring(rdev,
 						   R600_RING_TYPE_UVD_INDEX);
@@ -2194,7 +1974,7 @@
 
 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
 			     CP_RB0_RPTR, CP_RB0_WPTR,
-			     0, 0xfffff, RADEON_CP_PACKET2);
+			     RADEON_CP_PACKET2);
 	if (r)
 		return r;
 
@@ -2202,7 +1982,7 @@
 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
 			     DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
 			     DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
-			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
 	if (r)
 		return r;
 
@@ -2210,7 +1990,7 @@
 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
 			     DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
 			     DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
-			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
 	if (r)
 		return r;
 
@@ -2227,12 +2007,11 @@
 
 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
 	if (ring->ring_size) {
-		r = radeon_ring_init(rdev, ring, ring->ring_size,
-				     R600_WB_UVD_RPTR_OFFSET,
+		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
 				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
-				     0, 0xfffff, RADEON_CP_PACKET2);
+				     RADEON_CP_PACKET2);
 		if (!r)
-			r = r600_uvd_init(rdev);
+			r = uvd_v1_0_init(rdev);
 		if (r)
 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
 	}
@@ -2249,9 +2028,15 @@
 		return r;
 	}
 
-	r = r600_audio_init(rdev);
-	if (r)
-		return r;
+	if (ASIC_IS_DCE6(rdev)) {
+		r = dce6_audio_init(rdev);
+		if (r)
+			return r;
+	} else {
+		r = r600_audio_init(rdev);
+		if (r)
+			return r;
+	}
 
 	return 0;
 }
@@ -2282,11 +2067,14 @@
 
 int cayman_suspend(struct radeon_device *rdev)
 {
-	r600_audio_fini(rdev);
+	if (ASIC_IS_DCE6(rdev))
+		dce6_audio_fini(rdev);
+	else
+		r600_audio_fini(rdev);
 	radeon_vm_manager_fini(rdev);
 	cayman_cp_enable(rdev, false);
 	cayman_dma_stop(rdev);
-	r600_uvd_rbc_stop(rdev);
+	uvd_v1_0_fini(rdev);
 	radeon_uvd_suspend(rdev);
 	evergreen_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
@@ -2408,7 +2196,6 @@
 
 void cayman_fini(struct radeon_device *rdev)
 {
-	r600_blit_fini(rdev);
 	cayman_cp_fini(rdev);
 	cayman_dma_fini(rdev);
 	r600_irq_fini(rdev);
@@ -2418,6 +2205,7 @@
 	radeon_vm_manager_fini(rdev);
 	radeon_ib_pool_fini(rdev);
 	radeon_irq_kms_fini(rdev);
+	uvd_v1_0_fini(rdev);
 	radeon_uvd_fini(rdev);
 	cayman_pcie_gart_fini(rdev);
 	r600_vram_scratch_fini(rdev);
@@ -2678,61 +2466,7 @@
 			}
 		}
 	} else {
-		if ((flags & RADEON_VM_PAGE_SYSTEM) ||
-		    (count == 1)) {
-			while (count) {
-				ndw = count * 2;
-				if (ndw > 0xFFFFE)
-					ndw = 0xFFFFE;
-
-				/* for non-physically contiguous pages (system) */
-				ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw);
-				ib->ptr[ib->length_dw++] = pe;
-				ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
-				for (; ndw > 0; ndw -= 2, --count, pe += 8) {
-					if (flags & RADEON_VM_PAGE_SYSTEM) {
-						value = radeon_vm_map_gart(rdev, addr);
-						value &= 0xFFFFFFFFFFFFF000ULL;
-					} else if (flags & RADEON_VM_PAGE_VALID) {
-						value = addr;
-					} else {
-						value = 0;
-					}
-					addr += incr;
-					value |= r600_flags;
-					ib->ptr[ib->length_dw++] = value;
-					ib->ptr[ib->length_dw++] = upper_32_bits(value);
-				}
-			}
-			while (ib->length_dw & 0x7)
-				ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
-		} else {
-			while (count) {
-				ndw = count * 2;
-				if (ndw > 0xFFFFE)
-					ndw = 0xFFFFE;
-
-				if (flags & RADEON_VM_PAGE_VALID)
-					value = addr;
-				else
-					value = 0;
-				/* for physically contiguous pages (vram) */
-				ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
-				ib->ptr[ib->length_dw++] = pe; /* dst addr */
-				ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
-				ib->ptr[ib->length_dw++] = r600_flags; /* mask */
-				ib->ptr[ib->length_dw++] = 0;
-				ib->ptr[ib->length_dw++] = value; /* value */
-				ib->ptr[ib->length_dw++] = upper_32_bits(value);
-				ib->ptr[ib->length_dw++] = incr; /* increment size */
-				ib->ptr[ib->length_dw++] = 0;
-				pe += ndw * 4;
-				addr += (ndw / 2) * incr;
-				count -= ndw / 2;
-			}
-		}
-		while (ib->length_dw & 0x7)
-			ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
+		cayman_dma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
 	}
 }
 
@@ -2766,26 +2500,3 @@
 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
 	radeon_ring_write(ring, 0x0);
 }
-
-void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
-{
-	struct radeon_ring *ring = &rdev->ring[ridx];
-
-	if (vm == NULL)
-		return;
-
-	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
-	radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
-	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
-
-	/* flush hdp cache */
-	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
-	radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
-	radeon_ring_write(ring, 1);
-
-	/* bits 0-7 are the VM contexts0-7 */
-	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
-	radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
-	radeon_ring_write(ring, 1 << vm->id);
-}
-
diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c
new file mode 100644
index 0000000..dd6e968
--- /dev/null
+++ b/drivers/gpu/drm/radeon/ni_dma.c
@@ -0,0 +1,338 @@
+/*
+ * Copyright 2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Alex Deucher
+ */
+#include <drm/drmP.h>
+#include "radeon.h"
+#include "radeon_asic.h"
+#include "nid.h"
+
+u32 cayman_gpu_check_soft_reset(struct radeon_device *rdev);
+
+/*
+ * DMA
+ * Starting with R600, the GPU has an asynchronous
+ * DMA engine.  The programming model is very similar
+ * to the 3D engine (ring buffer, IBs, etc.), but the
+ * DMA controller has it's own packet format that is
+ * different form the PM4 format used by the 3D engine.
+ * It supports copying data, writing embedded data,
+ * solid fills, and a number of other things.  It also
+ * has support for tiling/detiling of buffers.
+ * Cayman and newer support two asynchronous DMA engines.
+ */
+
+/**
+ * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @ib: IB object to schedule
+ *
+ * Schedule an IB in the DMA ring (cayman-SI).
+ */
+void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
+				struct radeon_ib *ib)
+{
+	struct radeon_ring *ring = &rdev->ring[ib->ring];
+
+	if (rdev->wb.enabled) {
+		u32 next_rptr = ring->wptr + 4;
+		while ((next_rptr & 7) != 5)
+			next_rptr++;
+		next_rptr += 3;
+		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
+		radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
+		radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
+		radeon_ring_write(ring, next_rptr);
+	}
+
+	/* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
+	 * Pad as necessary with NOPs.
+	 */
+	while ((ring->wptr & 7) != 5)
+		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+	radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, ib->vm ? ib->vm->id : 0, 0));
+	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
+	radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF));
+
+}
+
+/**
+ * cayman_dma_stop - stop the async dma engines
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Stop the async dma engines (cayman-SI).
+ */
+void cayman_dma_stop(struct radeon_device *rdev)
+{
+	u32 rb_cntl;
+
+	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
+
+	/* dma0 */
+	rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
+	rb_cntl &= ~DMA_RB_ENABLE;
+	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl);
+
+	/* dma1 */
+	rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
+	rb_cntl &= ~DMA_RB_ENABLE;
+	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl);
+
+	rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
+	rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
+}
+
+/**
+ * cayman_dma_resume - setup and start the async dma engines
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Set up the DMA ring buffers and enable them. (cayman-SI).
+ * Returns 0 for success, error for failure.
+ */
+int cayman_dma_resume(struct radeon_device *rdev)
+{
+	struct radeon_ring *ring;
+	u32 rb_cntl, dma_cntl, ib_cntl;
+	u32 rb_bufsz;
+	u32 reg_offset, wb_offset;
+	int i, r;
+
+	/* Reset dma */
+	WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA | SOFT_RESET_DMA1);
+	RREG32(SRBM_SOFT_RESET);
+	udelay(50);
+	WREG32(SRBM_SOFT_RESET, 0);
+
+	for (i = 0; i < 2; i++) {
+		if (i == 0) {
+			ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+			reg_offset = DMA0_REGISTER_OFFSET;
+			wb_offset = R600_WB_DMA_RPTR_OFFSET;
+		} else {
+			ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
+			reg_offset = DMA1_REGISTER_OFFSET;
+			wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
+		}
+
+		WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
+		WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
+
+		/* Set ring buffer size in dwords */
+		rb_bufsz = order_base_2(ring->ring_size / 4);
+		rb_cntl = rb_bufsz << 1;
+#ifdef __BIG_ENDIAN
+		rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
+#endif
+		WREG32(DMA_RB_CNTL + reg_offset, rb_cntl);
+
+		/* Initialize the ring buffer's read and write pointers */
+		WREG32(DMA_RB_RPTR + reg_offset, 0);
+		WREG32(DMA_RB_WPTR + reg_offset, 0);
+
+		/* set the wb address whether it's enabled or not */
+		WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset,
+		       upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF);
+		WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset,
+		       ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
+
+		if (rdev->wb.enabled)
+			rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
+
+		WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8);
+
+		/* enable DMA IBs */
+		ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE;
+#ifdef __BIG_ENDIAN
+		ib_cntl |= DMA_IB_SWAP_ENABLE;
+#endif
+		WREG32(DMA_IB_CNTL + reg_offset, ib_cntl);
+
+		dma_cntl = RREG32(DMA_CNTL + reg_offset);
+		dma_cntl &= ~CTXEMPTY_INT_ENABLE;
+		WREG32(DMA_CNTL + reg_offset, dma_cntl);
+
+		ring->wptr = 0;
+		WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);
+
+		ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2;
+
+		WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);
+
+		ring->ready = true;
+
+		r = radeon_ring_test(rdev, ring->idx, ring);
+		if (r) {
+			ring->ready = false;
+			return r;
+		}
+	}
+
+	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
+
+	return 0;
+}
+
+/**
+ * cayman_dma_fini - tear down the async dma engines
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Stop the async dma engines and free the rings (cayman-SI).
+ */
+void cayman_dma_fini(struct radeon_device *rdev)
+{
+	cayman_dma_stop(rdev);
+	radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
+	radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
+}
+
+/**
+ * cayman_dma_is_lockup - Check if the DMA engine is locked up
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Check if the async DMA engine is locked up.
+ * Returns true if the engine appears to be locked up, false if not.
+ */
+bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+	u32 reset_mask = cayman_gpu_check_soft_reset(rdev);
+	u32 mask;
+
+	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
+		mask = RADEON_RESET_DMA;
+	else
+		mask = RADEON_RESET_DMA1;
+
+	if (!(reset_mask & mask)) {
+		radeon_ring_lockup_update(ring);
+		return false;
+	}
+	/* force ring activities */
+	radeon_ring_force_activity(rdev, ring);
+	return radeon_ring_test_lockup(rdev, ring);
+}
+
+/**
+ * cayman_dma_vm_set_page - update the page tables using the DMA
+ *
+ * @rdev: radeon_device pointer
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: access flags
+ * @r600_flags: hw access flags 
+ *
+ * Update the page tables using the DMA (cayman/TN).
+ */
+void cayman_dma_vm_set_page(struct radeon_device *rdev,
+			    struct radeon_ib *ib,
+			    uint64_t pe,
+			    uint64_t addr, unsigned count,
+			    uint32_t incr, uint32_t flags)
+{
+	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
+	uint64_t value;
+	unsigned ndw;
+
+	if ((flags & RADEON_VM_PAGE_SYSTEM) || (count == 1)) {
+		while (count) {
+			ndw = count * 2;
+			if (ndw > 0xFFFFE)
+				ndw = 0xFFFFE;
+
+			/* for non-physically contiguous pages (system) */
+			ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, ndw);
+			ib->ptr[ib->length_dw++] = pe;
+			ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
+			for (; ndw > 0; ndw -= 2, --count, pe += 8) {
+				if (flags & RADEON_VM_PAGE_SYSTEM) {
+					value = radeon_vm_map_gart(rdev, addr);
+					value &= 0xFFFFFFFFFFFFF000ULL;
+				} else if (flags & RADEON_VM_PAGE_VALID) {
+					value = addr;
+				} else {
+					value = 0;
+				}
+				addr += incr;
+				value |= r600_flags;
+				ib->ptr[ib->length_dw++] = value;
+				ib->ptr[ib->length_dw++] = upper_32_bits(value);
+			}
+		}
+	} else {
+		while (count) {
+			ndw = count * 2;
+			if (ndw > 0xFFFFE)
+				ndw = 0xFFFFE;
+
+			if (flags & RADEON_VM_PAGE_VALID)
+				value = addr;
+			else
+				value = 0;
+			/* for physically contiguous pages (vram) */
+			ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
+			ib->ptr[ib->length_dw++] = pe; /* dst addr */
+			ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
+			ib->ptr[ib->length_dw++] = r600_flags; /* mask */
+			ib->ptr[ib->length_dw++] = 0;
+			ib->ptr[ib->length_dw++] = value; /* value */
+			ib->ptr[ib->length_dw++] = upper_32_bits(value);
+			ib->ptr[ib->length_dw++] = incr; /* increment size */
+			ib->ptr[ib->length_dw++] = 0;
+			pe += ndw * 4;
+			addr += (ndw / 2) * incr;
+			count -= ndw / 2;
+		}
+	}
+	while (ib->length_dw & 0x7)
+		ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0);
+}
+
+void cayman_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+{
+	struct radeon_ring *ring = &rdev->ring[ridx];
+
+	if (vm == NULL)
+		return;
+
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
+	radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
+	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+
+	/* flush hdp cache */
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
+	radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
+	radeon_ring_write(ring, 1);
+
+	/* bits 0-7 are the VM contexts0-7 */
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0));
+	radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
+	radeon_ring_write(ring, 1 << vm->id);
+}
+
diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c
index 559cf24..f7b625c 100644
--- a/drivers/gpu/drm/radeon/ni_dpm.c
+++ b/drivers/gpu/drm/radeon/ni_dpm.c
@@ -769,7 +769,8 @@
 {
 	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
 	u32 vblank_time = r600_dpm_get_vblank_time(rdev);
-	u32 switch_limit = pi->mem_gddr5 ? 450 : 300;
+	/* we never hit the non-gddr5 limit so disable it */
+	u32 switch_limit = pi->mem_gddr5 ? 450 : 0;
 
 	if (vblank_time < switch_limit)
 		return true;
@@ -1054,10 +1055,6 @@
 int ni_dpm_force_performance_level(struct radeon_device *rdev,
 				   enum radeon_dpm_forced_level level)
 {
-	struct radeon_ps *rps = rdev->pm.dpm.current_ps;
-	struct ni_ps *ps = ni_get_ps(rps);
-	u32 levels;
-
 	if (level == RADEON_DPM_FORCED_LEVEL_HIGH) {
 		if (ni_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_SetEnabledLevels, 0) != PPSMC_Result_OK)
 			return -EINVAL;
@@ -1068,8 +1065,7 @@
 		if (ni_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_SetForcedLevels, 0) != PPSMC_Result_OK)
 			return -EINVAL;
 
-		levels = ps->performance_level_count - 1;
-		if (ni_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_SetEnabledLevels, levels) != PPSMC_Result_OK)
+		if (ni_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_SetEnabledLevels, 1) != PPSMC_Result_OK)
 			return -EINVAL;
 	} else if (level == RADEON_DPM_FORCED_LEVEL_AUTO) {
 		if (ni_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_SetForcedLevels, 0) != PPSMC_Result_OK)
@@ -4042,6 +4038,7 @@
 			 (power_state->v1.ucNonClockStateIndex *
 			  power_info->pplib.ucNonClockSize));
 		if (power_info->pplib.ucStateEntrySize - 1) {
+			u8 *idx;
 			ps = kzalloc(sizeof(struct ni_ps), GFP_KERNEL);
 			if (ps == NULL) {
 				kfree(rdev->pm.dpm.ps);
@@ -4051,12 +4048,12 @@
 			ni_parse_pplib_non_clock_info(rdev, &rdev->pm.dpm.ps[i],
 							 non_clock_info,
 							 power_info->pplib.ucNonClockSize);
+			idx = (u8 *)&power_state->v1.ucClockStateIndices[0];
 			for (j = 0; j < (power_info->pplib.ucStateEntrySize - 1); j++) {
 				clock_info = (union pplib_clock_info *)
 					(mode_info->atom_context->bios + data_offset +
 					 le16_to_cpu(power_info->pplib.usClockInfoArrayOffset) +
-					 (power_state->v1.ucClockStateIndices[j] *
-					  power_info->pplib.ucClockInfoSize));
+					 (idx[j] * power_info->pplib.ucClockInfoSize));
 				ni_parse_pplib_clock_info(rdev,
 							  &rdev->pm.dpm.ps[i], j,
 							  clock_info);
@@ -4072,9 +4069,6 @@
 	struct rv7xx_power_info *pi;
 	struct evergreen_power_info *eg_pi;
 	struct ni_power_info *ni_pi;
-	int index = GetIndexIntoMasterTable(DATA, ASIC_InternalSS_Info);
-	u16 data_offset, size;
-	u8 frev, crev;
 	struct atom_clock_dividers dividers;
 	int ret;
 
@@ -4167,16 +4161,7 @@
 	eg_pi->vddci_control =
 		radeon_atom_is_voltage_gpio(rdev, SET_VOLTAGE_TYPE_ASIC_VDDCI, 0);
 
-	if (atom_parse_data_header(rdev->mode_info.atom_context, index, &size,
-                                   &frev, &crev, &data_offset)) {
-		pi->sclk_ss = true;
-		pi->mclk_ss = true;
-		pi->dynamic_ss = true;
-	} else {
-		pi->sclk_ss = false;
-		pi->mclk_ss = false;
-		pi->dynamic_ss = true;
-	}
+	rv770_get_engine_memory_ss(rdev);
 
 	pi->asi = RV770_ASI_DFLT;
 	pi->pasi = CYPRESS_HASI_DFLT;
@@ -4193,8 +4178,7 @@
 
 	pi->dynamic_pcie_gen2 = true;
 
-	if (pi->gfx_clock_gating &&
-	    (rdev->pm.int_thermal_type != THERMAL_TYPE_NONE))
+	if (rdev->pm.int_thermal_type != THERMAL_TYPE_NONE)
 		pi->thermal_protection = true;
 	else
 		pi->thermal_protection = false;
@@ -4288,6 +4272,12 @@
 
 	ni_pi->use_power_boost_limit = true;
 
+	/* make sure dc limits are valid */
+	if ((rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc.sclk == 0) ||
+	    (rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc.mclk == 0))
+		rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc =
+			rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/radeon/ppsmc.h b/drivers/gpu/drm/radeon/ppsmc.h
index b5564a3..6828428 100644
--- a/drivers/gpu/drm/radeon/ppsmc.h
+++ b/drivers/gpu/drm/radeon/ppsmc.h
@@ -99,11 +99,68 @@
 #define PPSMC_MSG_ThrottleOVRDSCLKDS        ((uint8_t)0x96)
 #define PPSMC_MSG_CancelThrottleOVRDSCLKDS  ((uint8_t)0x97)
 
+/* CI/KV/KB */
+#define PPSMC_MSG_UVDDPM_SetEnabledMask       ((uint16_t) 0x12D)
+#define PPSMC_MSG_VCEDPM_SetEnabledMask       ((uint16_t) 0x12E)
+#define PPSMC_MSG_ACPDPM_SetEnabledMask       ((uint16_t) 0x12F)
+#define PPSMC_MSG_SAMUDPM_SetEnabledMask      ((uint16_t) 0x130)
+#define PPSMC_MSG_MCLKDPM_ForceState          ((uint16_t) 0x131)
+#define PPSMC_MSG_MCLKDPM_NoForcedLevel       ((uint16_t) 0x132)
+#define PPSMC_MSG_Voltage_Cntl_Disable        ((uint16_t) 0x135)
+#define PPSMC_MSG_PCIeDPM_Enable              ((uint16_t) 0x136)
+#define PPSMC_MSG_PCIeDPM_Disable             ((uint16_t) 0x13d)
+#define PPSMC_MSG_ACPPowerOFF                 ((uint16_t) 0x137)
+#define PPSMC_MSG_ACPPowerON                  ((uint16_t) 0x138)
+#define PPSMC_MSG_SAMPowerOFF                 ((uint16_t) 0x139)
+#define PPSMC_MSG_SAMPowerON                  ((uint16_t) 0x13a)
+#define PPSMC_MSG_PCIeDPM_Disable             ((uint16_t) 0x13d)
+#define PPSMC_MSG_NBDPM_Enable                ((uint16_t) 0x140)
+#define PPSMC_MSG_NBDPM_Disable               ((uint16_t) 0x141)
+#define PPSMC_MSG_SCLKDPM_SetEnabledMask      ((uint16_t) 0x145)
+#define PPSMC_MSG_MCLKDPM_SetEnabledMask      ((uint16_t) 0x146)
+#define PPSMC_MSG_PCIeDPM_ForceLevel          ((uint16_t) 0x147)
+#define PPSMC_MSG_PCIeDPM_UnForceLevel        ((uint16_t) 0x148)
+#define PPSMC_MSG_EnableVRHotGPIOInterrupt    ((uint16_t) 0x14a)
+#define PPSMC_MSG_DPM_Enable                  ((uint16_t) 0x14e)
+#define PPSMC_MSG_DPM_Disable                 ((uint16_t) 0x14f)
+#define PPSMC_MSG_MCLKDPM_Enable              ((uint16_t) 0x150)
+#define PPSMC_MSG_MCLKDPM_Disable             ((uint16_t) 0x151)
+#define PPSMC_MSG_UVDDPM_Enable               ((uint16_t) 0x154)
+#define PPSMC_MSG_UVDDPM_Disable              ((uint16_t) 0x155)
+#define PPSMC_MSG_SAMUDPM_Enable              ((uint16_t) 0x156)
+#define PPSMC_MSG_SAMUDPM_Disable             ((uint16_t) 0x157)
+#define PPSMC_MSG_ACPDPM_Enable               ((uint16_t) 0x158)
+#define PPSMC_MSG_ACPDPM_Disable              ((uint16_t) 0x159)
+#define PPSMC_MSG_VCEDPM_Enable               ((uint16_t) 0x15a)
+#define PPSMC_MSG_VCEDPM_Disable              ((uint16_t) 0x15b)
+#define PPSMC_MSG_VddC_Request                ((uint16_t) 0x15f)
+#define PPSMC_MSG_SCLKDPM_GetEnabledMask      ((uint16_t) 0x162)
+#define PPSMC_MSG_PCIeDPM_SetEnabledMask      ((uint16_t) 0x167)
+#define PPSMC_MSG_TDCLimitEnable              ((uint16_t) 0x169)
+#define PPSMC_MSG_TDCLimitDisable             ((uint16_t) 0x16a)
+#define PPSMC_MSG_PkgPwrLimitEnable           ((uint16_t) 0x185)
+#define PPSMC_MSG_PkgPwrLimitDisable          ((uint16_t) 0x186)
+#define PPSMC_MSG_PkgPwrSetLimit              ((uint16_t) 0x187)
+#define PPSMC_MSG_OverDriveSetTargetTdp       ((uint16_t) 0x188)
+#define PPSMC_MSG_SCLKDPM_FreezeLevel         ((uint16_t) 0x189)
+#define PPSMC_MSG_SCLKDPM_UnfreezeLevel       ((uint16_t) 0x18A)
+#define PPSMC_MSG_MCLKDPM_FreezeLevel         ((uint16_t) 0x18B)
+#define PPSMC_MSG_MCLKDPM_UnfreezeLevel       ((uint16_t) 0x18C)
+#define PPSMC_MSG_MASTER_DeepSleep_ON         ((uint16_t) 0x18F)
+#define PPSMC_MSG_MASTER_DeepSleep_OFF        ((uint16_t) 0x190)
+#define PPSMC_MSG_Remove_DC_Clamp             ((uint16_t) 0x191)
+
+#define PPSMC_MSG_API_GetSclkFrequency        ((uint16_t) 0x200)
+#define PPSMC_MSG_API_GetMclkFrequency        ((uint16_t) 0x201)
+
 /* TN */
 #define PPSMC_MSG_DPM_Config                ((uint32_t) 0x102)
 #define PPSMC_MSG_DPM_ForceState            ((uint32_t) 0x104)
 #define PPSMC_MSG_PG_SIMD_Config            ((uint32_t) 0x108)
 #define PPSMC_MSG_DPM_N_LevelsDisabled      ((uint32_t) 0x112)
+#define PPSMC_MSG_Voltage_Cntl_Enable       ((uint32_t) 0x109)
+#define PPSMC_MSG_VCEPowerOFF               ((uint32_t) 0x10e)
+#define PPSMC_MSG_VCEPowerON                ((uint32_t) 0x10f)
 #define PPSMC_MSG_DCE_RemoveVoltageAdjustment   ((uint32_t) 0x11d)
 #define PPSMC_MSG_DCE_AllowVoltageAdjustment    ((uint32_t) 0x11e)
 #define PPSMC_MSG_UVD_DPM_Config            ((uint32_t) 0x124)
diff --git a/drivers/gpu/drm/radeon/pptable.h b/drivers/gpu/drm/radeon/pptable.h
new file mode 100644
index 0000000..da43ab3
--- /dev/null
+++ b/drivers/gpu/drm/radeon/pptable.h
@@ -0,0 +1,682 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _PPTABLE_H
+#define _PPTABLE_H
+
+#pragma pack(push, 1)
+
+typedef struct _ATOM_PPLIB_THERMALCONTROLLER
+
+{
+    UCHAR ucType;           // one of ATOM_PP_THERMALCONTROLLER_*
+    UCHAR ucI2cLine;        // as interpreted by DAL I2C
+    UCHAR ucI2cAddress;
+    UCHAR ucFanParameters;  // Fan Control Parameters.
+    UCHAR ucFanMinRPM;      // Fan Minimum RPM (hundreds) -- for display purposes only.
+    UCHAR ucFanMaxRPM;      // Fan Maximum RPM (hundreds) -- for display purposes only.
+    UCHAR ucReserved;       // ----
+    UCHAR ucFlags;          // to be defined
+} ATOM_PPLIB_THERMALCONTROLLER;
+
+#define ATOM_PP_FANPARAMETERS_TACHOMETER_PULSES_PER_REVOLUTION_MASK 0x0f
+#define ATOM_PP_FANPARAMETERS_NOFAN                                 0x80    // No fan is connected to this controller.
+
+#define ATOM_PP_THERMALCONTROLLER_NONE      0
+#define ATOM_PP_THERMALCONTROLLER_LM63      1  // Not used by PPLib
+#define ATOM_PP_THERMALCONTROLLER_ADM1032   2  // Not used by PPLib
+#define ATOM_PP_THERMALCONTROLLER_ADM1030   3  // Not used by PPLib
+#define ATOM_PP_THERMALCONTROLLER_MUA6649   4  // Not used by PPLib
+#define ATOM_PP_THERMALCONTROLLER_LM64      5
+#define ATOM_PP_THERMALCONTROLLER_F75375    6  // Not used by PPLib
+#define ATOM_PP_THERMALCONTROLLER_RV6xx     7
+#define ATOM_PP_THERMALCONTROLLER_RV770     8
+#define ATOM_PP_THERMALCONTROLLER_ADT7473   9
+#define ATOM_PP_THERMALCONTROLLER_KONG      10
+#define ATOM_PP_THERMALCONTROLLER_EXTERNAL_GPIO     11
+#define ATOM_PP_THERMALCONTROLLER_EVERGREEN 12
+#define ATOM_PP_THERMALCONTROLLER_EMC2103   13  /* 0x0D */ // Only fan control will be implemented, do NOT show this in PPGen.
+#define ATOM_PP_THERMALCONTROLLER_SUMO      14  /* 0x0E */ // Sumo type, used internally
+#define ATOM_PP_THERMALCONTROLLER_NISLANDS  15
+#define ATOM_PP_THERMALCONTROLLER_SISLANDS  16
+#define ATOM_PP_THERMALCONTROLLER_LM96163   17
+#define ATOM_PP_THERMALCONTROLLER_CISLANDS  18
+#define ATOM_PP_THERMALCONTROLLER_KAVERI    19
+
+
+// Thermal controller 'combo type' to use an external controller for Fan control and an internal controller for thermal.
+// We probably should reserve the bit 0x80 for this use.
+// To keep the number of these types low we should also use the same code for all ASICs (i.e. do not distinguish RV6xx and RV7xx Internal here).
+// The driver can pick the correct internal controller based on the ASIC.
+
+#define ATOM_PP_THERMALCONTROLLER_ADT7473_WITH_INTERNAL   0x89    // ADT7473 Fan Control + Internal Thermal Controller
+#define ATOM_PP_THERMALCONTROLLER_EMC2103_WITH_INTERNAL   0x8D    // EMC2103 Fan Control + Internal Thermal Controller
+
+typedef struct _ATOM_PPLIB_STATE
+{
+    UCHAR ucNonClockStateIndex;
+    UCHAR ucClockStateIndices[1]; // variable-sized
+} ATOM_PPLIB_STATE;
+
+
+typedef struct _ATOM_PPLIB_FANTABLE
+{
+    UCHAR   ucFanTableFormat;                // Change this if the table format changes or version changes so that the other fields are not the same.
+    UCHAR   ucTHyst;                         // Temperature hysteresis. Integer.
+    USHORT  usTMin;                          // The temperature, in 0.01 centigrades, below which we just run at a minimal PWM.
+    USHORT  usTMed;                          // The middle temperature where we change slopes.
+    USHORT  usTHigh;                         // The high point above TMed for adjusting the second slope.
+    USHORT  usPWMMin;                        // The minimum PWM value in percent (0.01% increments).
+    USHORT  usPWMMed;                        // The PWM value (in percent) at TMed.
+    USHORT  usPWMHigh;                       // The PWM value at THigh.
+} ATOM_PPLIB_FANTABLE;
+
+typedef struct _ATOM_PPLIB_FANTABLE2
+{
+    ATOM_PPLIB_FANTABLE basicTable;
+    USHORT  usTMax;                          // The max temperature
+} ATOM_PPLIB_FANTABLE2;
+
+typedef struct _ATOM_PPLIB_EXTENDEDHEADER
+{
+    USHORT  usSize;
+    ULONG   ulMaxEngineClock;   // For Overdrive.
+    ULONG   ulMaxMemoryClock;   // For Overdrive.
+    // Add extra system parameters here, always adjust size to include all fields.
+    USHORT  usVCETableOffset; //points to ATOM_PPLIB_VCE_Table
+    USHORT  usUVDTableOffset;   //points to ATOM_PPLIB_UVD_Table
+    USHORT  usSAMUTableOffset;  //points to ATOM_PPLIB_SAMU_Table
+    USHORT  usPPMTableOffset;   //points to ATOM_PPLIB_PPM_Table
+    USHORT  usACPTableOffset;  //points to ATOM_PPLIB_ACP_Table   
+    USHORT  usPowerTuneTableOffset; //points to ATOM_PPLIB_POWERTUNE_Table   
+} ATOM_PPLIB_EXTENDEDHEADER;
+
+//// ATOM_PPLIB_POWERPLAYTABLE::ulPlatformCaps
+#define ATOM_PP_PLATFORM_CAP_BACKBIAS 1
+#define ATOM_PP_PLATFORM_CAP_POWERPLAY 2
+#define ATOM_PP_PLATFORM_CAP_SBIOSPOWERSOURCE 4
+#define ATOM_PP_PLATFORM_CAP_ASPM_L0s 8
+#define ATOM_PP_PLATFORM_CAP_ASPM_L1 16
+#define ATOM_PP_PLATFORM_CAP_HARDWAREDC 32
+#define ATOM_PP_PLATFORM_CAP_GEMINIPRIMARY 64
+#define ATOM_PP_PLATFORM_CAP_STEPVDDC 128
+#define ATOM_PP_PLATFORM_CAP_VOLTAGECONTROL 256
+#define ATOM_PP_PLATFORM_CAP_SIDEPORTCONTROL 512
+#define ATOM_PP_PLATFORM_CAP_TURNOFFPLL_ASPML1 1024
+#define ATOM_PP_PLATFORM_CAP_HTLINKCONTROL 2048
+#define ATOM_PP_PLATFORM_CAP_MVDDCONTROL 4096
+#define ATOM_PP_PLATFORM_CAP_GOTO_BOOT_ON_ALERT 0x2000              // Go to boot state on alerts, e.g. on an AC->DC transition.
+#define ATOM_PP_PLATFORM_CAP_DONT_WAIT_FOR_VBLANK_ON_ALERT 0x4000   // Do NOT wait for VBLANK during an alert (e.g. AC->DC transition).
+#define ATOM_PP_PLATFORM_CAP_VDDCI_CONTROL 0x8000                   // Does the driver control VDDCI independently from VDDC.
+#define ATOM_PP_PLATFORM_CAP_REGULATOR_HOT 0x00010000               // Enable the 'regulator hot' feature.
+#define ATOM_PP_PLATFORM_CAP_BACO          0x00020000               // Does the driver supports BACO state.
+#define ATOM_PP_PLATFORM_CAP_NEW_CAC_VOLTAGE   0x00040000           // Does the driver supports new CAC voltage table.
+#define ATOM_PP_PLATFORM_CAP_REVERT_GPIO5_POLARITY   0x00080000     // Does the driver supports revert GPIO5 polarity.
+#define ATOM_PP_PLATFORM_CAP_OUTPUT_THERMAL2GPIO17   0x00100000     // Does the driver supports thermal2GPIO17.
+#define ATOM_PP_PLATFORM_CAP_VRHOT_GPIO_CONFIGURABLE   0x00200000   // Does the driver supports VR HOT GPIO Configurable.
+#define ATOM_PP_PLATFORM_CAP_TEMP_INVERSION   0x00400000            // Does the driver supports Temp Inversion feature.
+#define ATOM_PP_PLATFORM_CAP_EVV    0x00800000
+
+typedef struct _ATOM_PPLIB_POWERPLAYTABLE
+{
+      ATOM_COMMON_TABLE_HEADER sHeader;
+
+      UCHAR ucDataRevision;
+
+      UCHAR ucNumStates;
+      UCHAR ucStateEntrySize;
+      UCHAR ucClockInfoSize;
+      UCHAR ucNonClockSize;
+
+      // offset from start of this table to array of ucNumStates ATOM_PPLIB_STATE structures
+      USHORT usStateArrayOffset;
+
+      // offset from start of this table to array of ASIC-specific structures,
+      // currently ATOM_PPLIB_CLOCK_INFO.
+      USHORT usClockInfoArrayOffset;
+
+      // offset from start of this table to array of ATOM_PPLIB_NONCLOCK_INFO
+      USHORT usNonClockInfoArrayOffset;
+
+      USHORT usBackbiasTime;    // in microseconds
+      USHORT usVoltageTime;     // in microseconds
+      USHORT usTableSize;       //the size of this structure, or the extended structure
+
+      ULONG ulPlatformCaps;            // See ATOM_PPLIB_CAPS_*
+
+      ATOM_PPLIB_THERMALCONTROLLER    sThermalController;
+
+      USHORT usBootClockInfoOffset;
+      USHORT usBootNonClockInfoOffset;
+
+} ATOM_PPLIB_POWERPLAYTABLE;
+
+typedef struct _ATOM_PPLIB_POWERPLAYTABLE2
+{
+    ATOM_PPLIB_POWERPLAYTABLE basicTable;
+    UCHAR   ucNumCustomThermalPolicy;
+    USHORT  usCustomThermalPolicyArrayOffset;
+}ATOM_PPLIB_POWERPLAYTABLE2, *LPATOM_PPLIB_POWERPLAYTABLE2;
+
+typedef struct _ATOM_PPLIB_POWERPLAYTABLE3
+{
+    ATOM_PPLIB_POWERPLAYTABLE2 basicTable2;
+    USHORT                     usFormatID;                      // To be used ONLY by PPGen.
+    USHORT                     usFanTableOffset;
+    USHORT                     usExtendendedHeaderOffset;
+} ATOM_PPLIB_POWERPLAYTABLE3, *LPATOM_PPLIB_POWERPLAYTABLE3;
+
+typedef struct _ATOM_PPLIB_POWERPLAYTABLE4
+{
+    ATOM_PPLIB_POWERPLAYTABLE3 basicTable3;
+    ULONG                      ulGoldenPPID;                    // PPGen use only     
+    ULONG                      ulGoldenRevision;                // PPGen use only
+    USHORT                     usVddcDependencyOnSCLKOffset;
+    USHORT                     usVddciDependencyOnMCLKOffset;
+    USHORT                     usVddcDependencyOnMCLKOffset;
+    USHORT                     usMaxClockVoltageOnDCOffset;
+    USHORT                     usVddcPhaseShedLimitsTableOffset;    // Points to ATOM_PPLIB_PhaseSheddingLimits_Table
+    USHORT                     usMvddDependencyOnMCLKOffset;  
+} ATOM_PPLIB_POWERPLAYTABLE4, *LPATOM_PPLIB_POWERPLAYTABLE4;
+
+typedef struct _ATOM_PPLIB_POWERPLAYTABLE5
+{
+    ATOM_PPLIB_POWERPLAYTABLE4 basicTable4;
+    ULONG                      ulTDPLimit;
+    ULONG                      ulNearTDPLimit;
+    ULONG                      ulSQRampingThreshold;
+    USHORT                     usCACLeakageTableOffset;         // Points to ATOM_PPLIB_CAC_Leakage_Table
+    ULONG                      ulCACLeakage;                    // The iLeakage for driver calculated CAC leakage table
+    USHORT                     usTDPODLimit;
+    USHORT                     usLoadLineSlope;                 // in milliOhms * 100
+} ATOM_PPLIB_POWERPLAYTABLE5, *LPATOM_PPLIB_POWERPLAYTABLE5;
+
+//// ATOM_PPLIB_NONCLOCK_INFO::usClassification
+#define ATOM_PPLIB_CLASSIFICATION_UI_MASK          0x0007
+#define ATOM_PPLIB_CLASSIFICATION_UI_SHIFT         0
+#define ATOM_PPLIB_CLASSIFICATION_UI_NONE          0
+#define ATOM_PPLIB_CLASSIFICATION_UI_BATTERY       1
+#define ATOM_PPLIB_CLASSIFICATION_UI_BALANCED      3
+#define ATOM_PPLIB_CLASSIFICATION_UI_PERFORMANCE   5
+// 2, 4, 6, 7 are reserved
+
+#define ATOM_PPLIB_CLASSIFICATION_BOOT                   0x0008
+#define ATOM_PPLIB_CLASSIFICATION_THERMAL                0x0010
+#define ATOM_PPLIB_CLASSIFICATION_LIMITEDPOWERSOURCE     0x0020
+#define ATOM_PPLIB_CLASSIFICATION_REST                   0x0040
+#define ATOM_PPLIB_CLASSIFICATION_FORCED                 0x0080
+#define ATOM_PPLIB_CLASSIFICATION_3DPERFORMANCE          0x0100
+#define ATOM_PPLIB_CLASSIFICATION_OVERDRIVETEMPLATE      0x0200
+#define ATOM_PPLIB_CLASSIFICATION_UVDSTATE               0x0400
+#define ATOM_PPLIB_CLASSIFICATION_3DLOW                  0x0800
+#define ATOM_PPLIB_CLASSIFICATION_ACPI                   0x1000
+#define ATOM_PPLIB_CLASSIFICATION_HD2STATE               0x2000
+#define ATOM_PPLIB_CLASSIFICATION_HDSTATE                0x4000
+#define ATOM_PPLIB_CLASSIFICATION_SDSTATE                0x8000
+
+//// ATOM_PPLIB_NONCLOCK_INFO::usClassification2
+#define ATOM_PPLIB_CLASSIFICATION2_LIMITEDPOWERSOURCE_2     0x0001
+#define ATOM_PPLIB_CLASSIFICATION2_ULV                      0x0002
+#define ATOM_PPLIB_CLASSIFICATION2_MVC                      0x0004   //Multi-View Codec (BD-3D)
+
+//// ATOM_PPLIB_NONCLOCK_INFO::ulCapsAndSettings
+#define ATOM_PPLIB_SINGLE_DISPLAY_ONLY           0x00000001
+#define ATOM_PPLIB_SUPPORTS_VIDEO_PLAYBACK         0x00000002
+
+// 0 is 2.5Gb/s, 1 is 5Gb/s
+#define ATOM_PPLIB_PCIE_LINK_SPEED_MASK            0x00000004
+#define ATOM_PPLIB_PCIE_LINK_SPEED_SHIFT           2
+
+// lanes - 1: 1, 2, 4, 8, 12, 16 permitted by PCIE spec
+#define ATOM_PPLIB_PCIE_LINK_WIDTH_MASK            0x000000F8
+#define ATOM_PPLIB_PCIE_LINK_WIDTH_SHIFT           3
+
+// lookup into reduced refresh-rate table
+#define ATOM_PPLIB_LIMITED_REFRESHRATE_VALUE_MASK  0x00000F00
+#define ATOM_PPLIB_LIMITED_REFRESHRATE_VALUE_SHIFT 8
+
+#define ATOM_PPLIB_LIMITED_REFRESHRATE_UNLIMITED    0
+#define ATOM_PPLIB_LIMITED_REFRESHRATE_50HZ         1
+// 2-15 TBD as needed.
+
+#define ATOM_PPLIB_SOFTWARE_DISABLE_LOADBALANCING        0x00001000
+#define ATOM_PPLIB_SOFTWARE_ENABLE_SLEEP_FOR_TIMESTAMPS  0x00002000
+
+#define ATOM_PPLIB_DISALLOW_ON_DC                       0x00004000
+
+#define ATOM_PPLIB_ENABLE_VARIBRIGHT                     0x00008000
+
+//memory related flags
+#define ATOM_PPLIB_SWSTATE_MEMORY_DLL_OFF               0x000010000
+
+//M3 Arb    //2bits, current 3 sets of parameters in total
+#define ATOM_PPLIB_M3ARB_MASK                       0x00060000
+#define ATOM_PPLIB_M3ARB_SHIFT                      17
+
+#define ATOM_PPLIB_ENABLE_DRR                       0x00080000
+
+// remaining 16 bits are reserved
+typedef struct _ATOM_PPLIB_THERMAL_STATE
+{
+    UCHAR   ucMinTemperature;
+    UCHAR   ucMaxTemperature;
+    UCHAR   ucThermalAction;
+}ATOM_PPLIB_THERMAL_STATE, *LPATOM_PPLIB_THERMAL_STATE;
+
+// Contained in an array starting at the offset
+// in ATOM_PPLIB_POWERPLAYTABLE::usNonClockInfoArrayOffset.
+// referenced from ATOM_PPLIB_STATE_INFO::ucNonClockStateIndex
+#define ATOM_PPLIB_NONCLOCKINFO_VER1      12
+#define ATOM_PPLIB_NONCLOCKINFO_VER2      24
+typedef struct _ATOM_PPLIB_NONCLOCK_INFO
+{
+      USHORT usClassification;
+      UCHAR  ucMinTemperature;
+      UCHAR  ucMaxTemperature;
+      ULONG  ulCapsAndSettings;
+      UCHAR  ucRequiredPower;
+      USHORT usClassification2;
+      ULONG  ulVCLK;
+      ULONG  ulDCLK;
+      UCHAR  ucUnused[5];
+} ATOM_PPLIB_NONCLOCK_INFO;
+
+// Contained in an array starting at the offset
+// in ATOM_PPLIB_POWERPLAYTABLE::usClockInfoArrayOffset.
+// referenced from ATOM_PPLIB_STATE::ucClockStateIndices
+typedef struct _ATOM_PPLIB_R600_CLOCK_INFO
+{
+      USHORT usEngineClockLow;
+      UCHAR ucEngineClockHigh;
+
+      USHORT usMemoryClockLow;
+      UCHAR ucMemoryClockHigh;
+
+      USHORT usVDDC;
+      USHORT usUnused1;
+      USHORT usUnused2;
+
+      ULONG ulFlags; // ATOM_PPLIB_R600_FLAGS_*
+
+} ATOM_PPLIB_R600_CLOCK_INFO;
+
+// ulFlags in ATOM_PPLIB_R600_CLOCK_INFO
+#define ATOM_PPLIB_R600_FLAGS_PCIEGEN2          1
+#define ATOM_PPLIB_R600_FLAGS_UVDSAFE           2
+#define ATOM_PPLIB_R600_FLAGS_BACKBIASENABLE    4
+#define ATOM_PPLIB_R600_FLAGS_MEMORY_ODT_OFF    8
+#define ATOM_PPLIB_R600_FLAGS_MEMORY_DLL_OFF   16
+#define ATOM_PPLIB_R600_FLAGS_LOWPOWER         32   // On the RV770 use 'low power' setting (sequencer S0).
+
+typedef struct _ATOM_PPLIB_RS780_CLOCK_INFO
+
+{
+      USHORT usLowEngineClockLow;         // Low Engine clock in MHz (the same way as on the R600).
+      UCHAR  ucLowEngineClockHigh;
+      USHORT usHighEngineClockLow;        // High Engine clock in MHz.
+      UCHAR  ucHighEngineClockHigh;
+      USHORT usMemoryClockLow;            // For now one of the ATOM_PPLIB_RS780_SPMCLK_XXXX constants.
+      UCHAR  ucMemoryClockHigh;           // Currentyl unused.
+      UCHAR  ucPadding;                   // For proper alignment and size.
+      USHORT usVDDC;                      // For the 780, use: None, Low, High, Variable
+      UCHAR  ucMaxHTLinkWidth;            // From SBIOS - {2, 4, 8, 16}
+      UCHAR  ucMinHTLinkWidth;            // From SBIOS - {2, 4, 8, 16}. Effective only if CDLW enabled. Minimum down stream width could 
+      USHORT usHTLinkFreq;                // See definition ATOM_PPLIB_RS780_HTLINKFREQ_xxx or in MHz(>=200).
+      ULONG  ulFlags; 
+} ATOM_PPLIB_RS780_CLOCK_INFO;
+
+#define ATOM_PPLIB_RS780_VOLTAGE_NONE       0 
+#define ATOM_PPLIB_RS780_VOLTAGE_LOW        1 
+#define ATOM_PPLIB_RS780_VOLTAGE_HIGH       2 
+#define ATOM_PPLIB_RS780_VOLTAGE_VARIABLE   3 
+
+#define ATOM_PPLIB_RS780_SPMCLK_NONE        0   // We cannot change the side port memory clock, leave it as it is.
+#define ATOM_PPLIB_RS780_SPMCLK_LOW         1
+#define ATOM_PPLIB_RS780_SPMCLK_HIGH        2
+
+#define ATOM_PPLIB_RS780_HTLINKFREQ_NONE       0 
+#define ATOM_PPLIB_RS780_HTLINKFREQ_LOW        1 
+#define ATOM_PPLIB_RS780_HTLINKFREQ_HIGH       2 
+
+typedef struct _ATOM_PPLIB_EVERGREEN_CLOCK_INFO
+{
+      USHORT usEngineClockLow;
+      UCHAR  ucEngineClockHigh;
+
+      USHORT usMemoryClockLow;
+      UCHAR  ucMemoryClockHigh;
+
+      USHORT usVDDC;
+      USHORT usVDDCI;
+      USHORT usUnused;
+
+      ULONG ulFlags; // ATOM_PPLIB_R600_FLAGS_*
+
+} ATOM_PPLIB_EVERGREEN_CLOCK_INFO;
+
+typedef struct _ATOM_PPLIB_SI_CLOCK_INFO
+{
+      USHORT usEngineClockLow;
+      UCHAR  ucEngineClockHigh;
+
+      USHORT usMemoryClockLow;
+      UCHAR  ucMemoryClockHigh;
+
+      USHORT usVDDC;
+      USHORT usVDDCI;
+      UCHAR  ucPCIEGen;
+      UCHAR  ucUnused1;
+
+      ULONG ulFlags; // ATOM_PPLIB_SI_FLAGS_*, no flag is necessary for now
+
+} ATOM_PPLIB_SI_CLOCK_INFO;
+
+typedef struct _ATOM_PPLIB_CI_CLOCK_INFO
+{
+      USHORT usEngineClockLow;
+      UCHAR  ucEngineClockHigh;
+
+      USHORT usMemoryClockLow;
+      UCHAR  ucMemoryClockHigh;
+      
+      UCHAR  ucPCIEGen;
+      USHORT usPCIELane;
+} ATOM_PPLIB_CI_CLOCK_INFO;
+
+typedef struct _ATOM_PPLIB_SUMO_CLOCK_INFO{
+      USHORT usEngineClockLow;  //clockfrequency & 0xFFFF. The unit is in 10khz
+      UCHAR  ucEngineClockHigh; //clockfrequency >> 16. 
+      UCHAR  vddcIndex;         //2-bit vddc index;
+      USHORT tdpLimit;
+      //please initalize to 0
+      USHORT rsv1;
+      //please initialize to 0s
+      ULONG rsv2[2];
+}ATOM_PPLIB_SUMO_CLOCK_INFO;
+
+typedef struct _ATOM_PPLIB_STATE_V2
+{
+      //number of valid dpm levels in this state; Driver uses it to calculate the whole 
+      //size of the state: sizeof(ATOM_PPLIB_STATE_V2) + (ucNumDPMLevels - 1) * sizeof(UCHAR)
+      UCHAR ucNumDPMLevels;
+      
+      //a index to the array of nonClockInfos
+      UCHAR nonClockInfoIndex;
+      /**
+      * Driver will read the first ucNumDPMLevels in this array
+      */
+      UCHAR clockInfoIndex[1];
+} ATOM_PPLIB_STATE_V2;
+
+typedef struct _StateArray{
+    //how many states we have 
+    UCHAR ucNumEntries;
+    
+    ATOM_PPLIB_STATE_V2 states[1];
+}StateArray;
+
+
+typedef struct _ClockInfoArray{
+    //how many clock levels we have
+    UCHAR ucNumEntries;
+    
+    //sizeof(ATOM_PPLIB_CLOCK_INFO)
+    UCHAR ucEntrySize;
+    
+    UCHAR clockInfo[1];
+}ClockInfoArray;
+
+typedef struct _NonClockInfoArray{
+
+    //how many non-clock levels we have. normally should be same as number of states
+    UCHAR ucNumEntries;
+    //sizeof(ATOM_PPLIB_NONCLOCK_INFO)
+    UCHAR ucEntrySize;
+    
+    ATOM_PPLIB_NONCLOCK_INFO nonClockInfo[1];
+}NonClockInfoArray;
+
+typedef struct _ATOM_PPLIB_Clock_Voltage_Dependency_Record
+{
+    USHORT usClockLow;
+    UCHAR  ucClockHigh;
+    USHORT usVoltage;
+}ATOM_PPLIB_Clock_Voltage_Dependency_Record;
+
+typedef struct _ATOM_PPLIB_Clock_Voltage_Dependency_Table
+{
+    UCHAR ucNumEntries;                                                // Number of entries.
+    ATOM_PPLIB_Clock_Voltage_Dependency_Record entries[1];             // Dynamically allocate entries.
+}ATOM_PPLIB_Clock_Voltage_Dependency_Table;
+
+typedef struct _ATOM_PPLIB_Clock_Voltage_Limit_Record
+{
+    USHORT usSclkLow;
+    UCHAR  ucSclkHigh;
+    USHORT usMclkLow;
+    UCHAR  ucMclkHigh;
+    USHORT usVddc;
+    USHORT usVddci;
+}ATOM_PPLIB_Clock_Voltage_Limit_Record;
+
+typedef struct _ATOM_PPLIB_Clock_Voltage_Limit_Table
+{
+    UCHAR ucNumEntries;                                                // Number of entries.
+    ATOM_PPLIB_Clock_Voltage_Limit_Record entries[1];                  // Dynamically allocate entries.
+}ATOM_PPLIB_Clock_Voltage_Limit_Table;
+
+union _ATOM_PPLIB_CAC_Leakage_Record
+{
+    struct
+    {
+        USHORT usVddc;          // We use this field for the "fake" standardized VDDC for power calculations; For CI and newer, we use this as the real VDDC value. in CI we read it as StdVoltageHiSidd
+        ULONG  ulLeakageValue;  // For CI and newer we use this as the "fake" standar VDDC value. in CI we read it as StdVoltageLoSidd
+
+    };
+    struct
+     {
+        USHORT usVddc1;
+        USHORT usVddc2;
+        USHORT usVddc3;
+     };
+};
+
+typedef union _ATOM_PPLIB_CAC_Leakage_Record ATOM_PPLIB_CAC_Leakage_Record;
+
+typedef struct _ATOM_PPLIB_CAC_Leakage_Table
+{
+    UCHAR ucNumEntries;                                                 // Number of entries.
+    ATOM_PPLIB_CAC_Leakage_Record entries[1];                           // Dynamically allocate entries.
+}ATOM_PPLIB_CAC_Leakage_Table;
+
+typedef struct _ATOM_PPLIB_PhaseSheddingLimits_Record
+{
+    USHORT usVoltage;
+    USHORT usSclkLow;
+    UCHAR  ucSclkHigh;
+    USHORT usMclkLow;
+    UCHAR  ucMclkHigh;
+}ATOM_PPLIB_PhaseSheddingLimits_Record;
+
+typedef struct _ATOM_PPLIB_PhaseSheddingLimits_Table
+{
+    UCHAR ucNumEntries;                                                 // Number of entries.
+    ATOM_PPLIB_PhaseSheddingLimits_Record entries[1];                   // Dynamically allocate entries.
+}ATOM_PPLIB_PhaseSheddingLimits_Table;
+
+typedef struct _VCEClockInfo{
+    USHORT usEVClkLow;
+    UCHAR  ucEVClkHigh;
+    USHORT usECClkLow;
+    UCHAR  ucECClkHigh;
+}VCEClockInfo;
+
+typedef struct _VCEClockInfoArray{
+    UCHAR ucNumEntries;
+    VCEClockInfo entries[1];
+}VCEClockInfoArray;
+
+typedef struct _ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record
+{
+    USHORT usVoltage;
+    UCHAR  ucVCEClockInfoIndex;
+}ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record;
+
+typedef struct _ATOM_PPLIB_VCE_Clock_Voltage_Limit_Table
+{
+    UCHAR numEntries;
+    ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record entries[1];
+}ATOM_PPLIB_VCE_Clock_Voltage_Limit_Table;
+
+typedef struct _ATOM_PPLIB_VCE_State_Record
+{
+    UCHAR  ucVCEClockInfoIndex;
+    UCHAR  ucClockInfoIndex; //highest 2 bits indicates memory p-states, lower 6bits indicates index to ClockInfoArrary
+}ATOM_PPLIB_VCE_State_Record;
+
+typedef struct _ATOM_PPLIB_VCE_State_Table
+{
+    UCHAR numEntries;
+    ATOM_PPLIB_VCE_State_Record entries[1];
+}ATOM_PPLIB_VCE_State_Table;
+
+
+typedef struct _ATOM_PPLIB_VCE_Table
+{
+      UCHAR revid;
+//    VCEClockInfoArray array;
+//    ATOM_PPLIB_VCE_Clock_Voltage_Limit_Table limits;
+//    ATOM_PPLIB_VCE_State_Table states;
+}ATOM_PPLIB_VCE_Table;
+
+
+typedef struct _UVDClockInfo{
+    USHORT usVClkLow;
+    UCHAR  ucVClkHigh;
+    USHORT usDClkLow;
+    UCHAR  ucDClkHigh;
+}UVDClockInfo;
+
+typedef struct _UVDClockInfoArray{
+    UCHAR ucNumEntries;
+    UVDClockInfo entries[1];
+}UVDClockInfoArray;
+
+typedef struct _ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record
+{
+    USHORT usVoltage;
+    UCHAR  ucUVDClockInfoIndex;
+}ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record;
+
+typedef struct _ATOM_PPLIB_UVD_Clock_Voltage_Limit_Table
+{
+    UCHAR numEntries;
+    ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record entries[1];
+}ATOM_PPLIB_UVD_Clock_Voltage_Limit_Table;
+
+typedef struct _ATOM_PPLIB_UVD_Table
+{
+      UCHAR revid;
+//    UVDClockInfoArray array;
+//    ATOM_PPLIB_UVD_Clock_Voltage_Limit_Table limits;
+}ATOM_PPLIB_UVD_Table;
+
+typedef struct _ATOM_PPLIB_SAMClk_Voltage_Limit_Record
+{
+      USHORT usVoltage;
+      USHORT usSAMClockLow;
+      UCHAR  ucSAMClockHigh;
+}ATOM_PPLIB_SAMClk_Voltage_Limit_Record;
+
+typedef struct _ATOM_PPLIB_SAMClk_Voltage_Limit_Table{
+    UCHAR numEntries;
+    ATOM_PPLIB_SAMClk_Voltage_Limit_Record entries[1];
+}ATOM_PPLIB_SAMClk_Voltage_Limit_Table;
+
+typedef struct _ATOM_PPLIB_SAMU_Table
+{
+      UCHAR revid;
+      ATOM_PPLIB_SAMClk_Voltage_Limit_Table limits;
+}ATOM_PPLIB_SAMU_Table;
+
+typedef struct _ATOM_PPLIB_ACPClk_Voltage_Limit_Record
+{
+      USHORT usVoltage;
+      USHORT usACPClockLow;
+      UCHAR  ucACPClockHigh;
+}ATOM_PPLIB_ACPClk_Voltage_Limit_Record;
+
+typedef struct _ATOM_PPLIB_ACPClk_Voltage_Limit_Table{
+    UCHAR numEntries;
+    ATOM_PPLIB_ACPClk_Voltage_Limit_Record entries[1];
+}ATOM_PPLIB_ACPClk_Voltage_Limit_Table;
+
+typedef struct _ATOM_PPLIB_ACP_Table
+{
+      UCHAR revid;
+      ATOM_PPLIB_ACPClk_Voltage_Limit_Table limits;
+}ATOM_PPLIB_ACP_Table;
+
+typedef struct _ATOM_PowerTune_Table{
+    USHORT usTDP;
+    USHORT usConfigurableTDP;
+    USHORT usTDC;
+    USHORT usBatteryPowerLimit;
+    USHORT usSmallPowerLimit;
+    USHORT usLowCACLeakage;
+    USHORT usHighCACLeakage;
+}ATOM_PowerTune_Table;
+
+typedef struct _ATOM_PPLIB_POWERTUNE_Table
+{
+      UCHAR revid;
+      ATOM_PowerTune_Table power_tune_table;
+}ATOM_PPLIB_POWERTUNE_Table;
+
+typedef struct _ATOM_PPLIB_POWERTUNE_Table_V1
+{
+      UCHAR revid;
+      ATOM_PowerTune_Table power_tune_table;
+      USHORT usMaximumPowerDeliveryLimit;
+      USHORT usReserve[7];
+} ATOM_PPLIB_POWERTUNE_Table_V1;
+
+#define ATOM_PPM_A_A    1
+#define ATOM_PPM_A_I    2
+typedef struct _ATOM_PPLIB_PPM_Table
+{
+      UCHAR  ucRevId;
+      UCHAR  ucPpmDesign;          //A+I or A+A
+      USHORT usCpuCoreNumber;
+      ULONG  ulPlatformTDP;
+      ULONG  ulSmallACPlatformTDP;
+      ULONG  ulPlatformTDC;
+      ULONG  ulSmallACPlatformTDC;
+      ULONG  ulApuTDP;
+      ULONG  ulDGpuTDP;  
+      ULONG  ulDGpuUlvPower;
+      ULONG  ulTjmax;
+} ATOM_PPLIB_PPM_Table;
+
+#pragma pack(pop)
+
+#endif
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 75349cd..9fc61dd 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -1097,12 +1097,12 @@
 	}
 
 	/* Align ring size */
-	rb_bufsz = drm_order(ring_size / 8);
+	rb_bufsz = order_base_2(ring_size / 8);
 	ring_size = (1 << (rb_bufsz + 1)) * 4;
 	r100_cp_load_microcode(rdev);
 	r = radeon_ring_init(rdev, ring, ring_size, RADEON_WB_CP_RPTR_OFFSET,
 			     RADEON_CP_RB_RPTR, RADEON_CP_RB_WPTR,
-			     0, 0x7fffff, RADEON_CP_PACKET2);
+			     RADEON_CP_PACKET2);
 	if (r) {
 		return r;
 	}
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 393880a..ea4d3734 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1374,7 +1374,7 @@
 	return true;
 }
 
-static u32 r600_gpu_check_soft_reset(struct radeon_device *rdev)
+u32 r600_gpu_check_soft_reset(struct radeon_device *rdev)
 {
 	u32 reset_mask = 0;
 	u32 tmp;
@@ -1622,28 +1622,6 @@
 	return radeon_ring_test_lockup(rdev, ring);
 }
 
-/**
- * r600_dma_is_lockup - Check if the DMA engine is locked up
- *
- * @rdev: radeon_device pointer
- * @ring: radeon_ring structure holding ring information
- *
- * Check if the async DMA engine is locked up.
- * Returns true if the engine appears to be locked up, false if not.
- */
-bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
-{
-	u32 reset_mask = r600_gpu_check_soft_reset(rdev);
-
-	if (!(reset_mask & RADEON_RESET_DMA)) {
-		radeon_ring_lockup_update(ring);
-		return false;
-	}
-	/* force ring activities */
-	radeon_ring_force_activity(rdev, ring);
-	return radeon_ring_test_lockup(rdev, ring);
-}
-
 u32 r6xx_remap_render_backend(struct radeon_device *rdev,
 			      u32 tiling_pipe_num,
 			      u32 max_rb_num,
@@ -2299,9 +2277,13 @@
 	if ((rdev->family >= CHIP_RV770) && (rdev->family <= CHIP_HEMLOCK)) {
 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", smc_chip_name);
 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
-		if (err)
-			goto out;
-		if (rdev->smc_fw->size != smc_req_size) {
+		if (err) {
+			printk(KERN_ERR
+			       "smc: error loading firmware \"%s\"\n",
+			       fw_name);
+			release_firmware(rdev->smc_fw);
+			rdev->smc_fw = NULL;
+		} else if (rdev->smc_fw->size != smc_req_size) {
 			printk(KERN_ERR
 			       "smc: Bogus length %zu in firmware \"%s\"\n",
 			       rdev->smc_fw->size, fw_name);
@@ -2413,8 +2395,8 @@
 	WREG32(GRBM_SOFT_RESET, 0);
 
 	/* Set ring buffer size */
-	rb_bufsz = drm_order(ring->ring_size / 8);
-	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
+	rb_bufsz = order_base_2(ring->ring_size / 8);
+	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
 #ifdef __BIG_ENDIAN
 	tmp |= BUF_SWAP_32BIT;
 #endif
@@ -2467,7 +2449,7 @@
 	int r;
 
 	/* Align ring size */
-	rb_bufsz = drm_order(ring_size / 8);
+	rb_bufsz = order_base_2(ring_size / 8);
 	ring_size = (1 << (rb_bufsz + 1)) * 4;
 	ring->ring_size = ring_size;
 	ring->align_mask = 16 - 1;
@@ -2490,327 +2472,6 @@
 }
 
 /*
- * DMA
- * Starting with R600, the GPU has an asynchronous
- * DMA engine.  The programming model is very similar
- * to the 3D engine (ring buffer, IBs, etc.), but the
- * DMA controller has it's own packet format that is
- * different form the PM4 format used by the 3D engine.
- * It supports copying data, writing embedded data,
- * solid fills, and a number of other things.  It also
- * has support for tiling/detiling of buffers.
- */
-/**
- * r600_dma_stop - stop the async dma engine
- *
- * @rdev: radeon_device pointer
- *
- * Stop the async dma engine (r6xx-evergreen).
- */
-void r600_dma_stop(struct radeon_device *rdev)
-{
-	u32 rb_cntl = RREG32(DMA_RB_CNTL);
-
-	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
-
-	rb_cntl &= ~DMA_RB_ENABLE;
-	WREG32(DMA_RB_CNTL, rb_cntl);
-
-	rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
-}
-
-/**
- * r600_dma_resume - setup and start the async dma engine
- *
- * @rdev: radeon_device pointer
- *
- * Set up the DMA ring buffer and enable it. (r6xx-evergreen).
- * Returns 0 for success, error for failure.
- */
-int r600_dma_resume(struct radeon_device *rdev)
-{
-	struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
-	u32 rb_cntl, dma_cntl, ib_cntl;
-	u32 rb_bufsz;
-	int r;
-
-	/* Reset dma */
-	if (rdev->family >= CHIP_RV770)
-		WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA);
-	else
-		WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA);
-	RREG32(SRBM_SOFT_RESET);
-	udelay(50);
-	WREG32(SRBM_SOFT_RESET, 0);
-
-	WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0);
-	WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
-
-	/* Set ring buffer size in dwords */
-	rb_bufsz = drm_order(ring->ring_size / 4);
-	rb_cntl = rb_bufsz << 1;
-#ifdef __BIG_ENDIAN
-	rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
-#endif
-	WREG32(DMA_RB_CNTL, rb_cntl);
-
-	/* Initialize the ring buffer's read and write pointers */
-	WREG32(DMA_RB_RPTR, 0);
-	WREG32(DMA_RB_WPTR, 0);
-
-	/* set the wb address whether it's enabled or not */
-	WREG32(DMA_RB_RPTR_ADDR_HI,
-	       upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF);
-	WREG32(DMA_RB_RPTR_ADDR_LO,
-	       ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC));
-
-	if (rdev->wb.enabled)
-		rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
-
-	WREG32(DMA_RB_BASE, ring->gpu_addr >> 8);
-
-	/* enable DMA IBs */
-	ib_cntl = DMA_IB_ENABLE;
-#ifdef __BIG_ENDIAN
-	ib_cntl |= DMA_IB_SWAP_ENABLE;
-#endif
-	WREG32(DMA_IB_CNTL, ib_cntl);
-
-	dma_cntl = RREG32(DMA_CNTL);
-	dma_cntl &= ~CTXEMPTY_INT_ENABLE;
-	WREG32(DMA_CNTL, dma_cntl);
-
-	if (rdev->family >= CHIP_RV770)
-		WREG32(DMA_MODE, 1);
-
-	ring->wptr = 0;
-	WREG32(DMA_RB_WPTR, ring->wptr << 2);
-
-	ring->rptr = RREG32(DMA_RB_RPTR) >> 2;
-
-	WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE);
-
-	ring->ready = true;
-
-	r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring);
-	if (r) {
-		ring->ready = false;
-		return r;
-	}
-
-	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
-
-	return 0;
-}
-
-/**
- * r600_dma_fini - tear down the async dma engine
- *
- * @rdev: radeon_device pointer
- *
- * Stop the async dma engine and free the ring (r6xx-evergreen).
- */
-void r600_dma_fini(struct radeon_device *rdev)
-{
-	r600_dma_stop(rdev);
-	radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
-}
-
-/*
- * UVD
- */
-int r600_uvd_rbc_start(struct radeon_device *rdev)
-{
-	struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
-	uint64_t rptr_addr;
-	uint32_t rb_bufsz, tmp;
-	int r;
-
-	rptr_addr = rdev->wb.gpu_addr + R600_WB_UVD_RPTR_OFFSET;
-
-	if (upper_32_bits(rptr_addr) != upper_32_bits(ring->gpu_addr)) {
-		DRM_ERROR("UVD ring and rptr not in the same 4GB segment!\n");
-		return -EINVAL;
-	}
-
-	/* force RBC into idle state */
-	WREG32(UVD_RBC_RB_CNTL, 0x11010101);
-
-	/* Set the write pointer delay */
-	WREG32(UVD_RBC_RB_WPTR_CNTL, 0);
-
-	/* set the wb address */
-	WREG32(UVD_RBC_RB_RPTR_ADDR, rptr_addr >> 2);
-
-	/* programm the 4GB memory segment for rptr and ring buffer */
-	WREG32(UVD_LMI_EXT40_ADDR, upper_32_bits(rptr_addr) |
-				   (0x7 << 16) | (0x1 << 31));
-
-	/* Initialize the ring buffer's read and write pointers */
-	WREG32(UVD_RBC_RB_RPTR, 0x0);
-
-	ring->wptr = ring->rptr = RREG32(UVD_RBC_RB_RPTR);
-	WREG32(UVD_RBC_RB_WPTR, ring->wptr);
-
-	/* set the ring address */
-	WREG32(UVD_RBC_RB_BASE, ring->gpu_addr);
-
-	/* Set ring buffer size */
-	rb_bufsz = drm_order(ring->ring_size);
-	rb_bufsz = (0x1 << 8) | rb_bufsz;
-	WREG32(UVD_RBC_RB_CNTL, rb_bufsz);
-
-	ring->ready = true;
-	r = radeon_ring_test(rdev, R600_RING_TYPE_UVD_INDEX, ring);
-	if (r) {
-		ring->ready = false;
-		return r;
-	}
-
-	r = radeon_ring_lock(rdev, ring, 10);
-	if (r) {
-		DRM_ERROR("radeon: ring failed to lock UVD ring (%d).\n", r);
-		return r;
-	}
-
-	tmp = PACKET0(UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0);
-	radeon_ring_write(ring, tmp);
-	radeon_ring_write(ring, 0xFFFFF);
-
-	tmp = PACKET0(UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0);
-	radeon_ring_write(ring, tmp);
-	radeon_ring_write(ring, 0xFFFFF);
-
-	tmp = PACKET0(UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0);
-	radeon_ring_write(ring, tmp);
-	radeon_ring_write(ring, 0xFFFFF);
-
-	/* Clear timeout status bits */
-	radeon_ring_write(ring, PACKET0(UVD_SEMA_TIMEOUT_STATUS, 0));
-	radeon_ring_write(ring, 0x8);
-
-	radeon_ring_write(ring, PACKET0(UVD_SEMA_CNTL, 0));
-	radeon_ring_write(ring, 3);
-
-	radeon_ring_unlock_commit(rdev, ring);
-
-	return 0;
-}
-
-void r600_uvd_rbc_stop(struct radeon_device *rdev)
-{
-	struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
-
-	/* force RBC into idle state */
-	WREG32(UVD_RBC_RB_CNTL, 0x11010101);
-	ring->ready = false;
-}
-
-int r600_uvd_init(struct radeon_device *rdev)
-{
-	int i, j, r;
-	/* disable byte swapping */
-	u32 lmi_swap_cntl = 0;
-	u32 mp_swap_cntl = 0;
-
-	/* raise clocks while booting up the VCPU */
-	radeon_set_uvd_clocks(rdev, 53300, 40000);
-
-	/* disable clock gating */
-	WREG32(UVD_CGC_GATE, 0);
-
-	/* disable interupt */
-	WREG32_P(UVD_MASTINT_EN, 0, ~(1 << 1));
-
-	/* put LMI, VCPU, RBC etc... into reset */
-	WREG32(UVD_SOFT_RESET, LMI_SOFT_RESET | VCPU_SOFT_RESET |
-	       LBSI_SOFT_RESET | RBC_SOFT_RESET | CSM_SOFT_RESET |
-	       CXW_SOFT_RESET | TAP_SOFT_RESET | LMI_UMC_SOFT_RESET);
-	mdelay(5);
-
-	/* take UVD block out of reset */
-	WREG32_P(SRBM_SOFT_RESET, 0, ~SOFT_RESET_UVD);
-	mdelay(5);
-
-	/* initialize UVD memory controller */
-	WREG32(UVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) |
-			     (1 << 21) | (1 << 9) | (1 << 20));
-
-#ifdef __BIG_ENDIAN
-	/* swap (8 in 32) RB and IB */
-	lmi_swap_cntl = 0xa;
-	mp_swap_cntl = 0;
-#endif
-	WREG32(UVD_LMI_SWAP_CNTL, lmi_swap_cntl);
-	WREG32(UVD_MP_SWAP_CNTL, mp_swap_cntl);
-
-	WREG32(UVD_MPC_SET_MUXA0, 0x40c2040);
-	WREG32(UVD_MPC_SET_MUXA1, 0x0);
-	WREG32(UVD_MPC_SET_MUXB0, 0x40c2040);
-	WREG32(UVD_MPC_SET_MUXB1, 0x0);
-	WREG32(UVD_MPC_SET_ALU, 0);
-	WREG32(UVD_MPC_SET_MUX, 0x88);
-
-	/* Stall UMC */
-	WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8));
-	WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3));
-
-	/* take all subblocks out of reset, except VCPU */
-	WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET);
-	mdelay(5);
-
-	/* enable VCPU clock */
-	WREG32(UVD_VCPU_CNTL,  1 << 9);
-
-	/* enable UMC */
-	WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8));
-
-	/* boot up the VCPU */
-	WREG32(UVD_SOFT_RESET, 0);
-	mdelay(10);
-
-	WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3));
-
-	for (i = 0; i < 10; ++i) {
-		uint32_t status;
-		for (j = 0; j < 100; ++j) {
-			status = RREG32(UVD_STATUS);
-			if (status & 2)
-				break;
-			mdelay(10);
-		}
-		r = 0;
-		if (status & 2)
-			break;
-
-		DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n");
-		WREG32_P(UVD_SOFT_RESET, VCPU_SOFT_RESET, ~VCPU_SOFT_RESET);
-		mdelay(10);
-		WREG32_P(UVD_SOFT_RESET, 0, ~VCPU_SOFT_RESET);
-		mdelay(10);
-		r = -1;
-	}
-
-	if (r) {
-		DRM_ERROR("UVD not responding, giving up!!!\n");
-		radeon_set_uvd_clocks(rdev, 0, 0);
-		return r;
-	}
-
-	/* enable interupt */
-	WREG32_P(UVD_MASTINT_EN, 3<<1, ~(3 << 1));
-
-	r = r600_uvd_rbc_start(rdev);
-	if (!r)
-		DRM_INFO("UVD initialized successfully.\n");
-
-	/* lower clocks again */
-	radeon_set_uvd_clocks(rdev, 0, 0);
-
-	return r;
-}
-
-/*
  * GPU scratch registers helpers function.
  */
 void r600_scratch_init(struct radeon_device *rdev)
@@ -2865,94 +2526,6 @@
 	return r;
 }
 
-/**
- * r600_dma_ring_test - simple async dma engine test
- *
- * @rdev: radeon_device pointer
- * @ring: radeon_ring structure holding ring information
- *
- * Test the DMA engine by writing using it to write an
- * value to memory. (r6xx-SI).
- * Returns 0 for success, error for failure.
- */
-int r600_dma_ring_test(struct radeon_device *rdev,
-		       struct radeon_ring *ring)
-{
-	unsigned i;
-	int r;
-	void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
-	u32 tmp;
-
-	if (!ptr) {
-		DRM_ERROR("invalid vram scratch pointer\n");
-		return -EINVAL;
-	}
-
-	tmp = 0xCAFEDEAD;
-	writel(tmp, ptr);
-
-	r = radeon_ring_lock(rdev, ring, 4);
-	if (r) {
-		DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
-		return r;
-	}
-	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
-	radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
-	radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff);
-	radeon_ring_write(ring, 0xDEADBEEF);
-	radeon_ring_unlock_commit(rdev, ring);
-
-	for (i = 0; i < rdev->usec_timeout; i++) {
-		tmp = readl(ptr);
-		if (tmp == 0xDEADBEEF)
-			break;
-		DRM_UDELAY(1);
-	}
-
-	if (i < rdev->usec_timeout) {
-		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
-	} else {
-		DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
-			  ring->idx, tmp);
-		r = -EINVAL;
-	}
-	return r;
-}
-
-int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
-{
-	uint32_t tmp = 0;
-	unsigned i;
-	int r;
-
-	WREG32(UVD_CONTEXT_ID, 0xCAFEDEAD);
-	r = radeon_ring_lock(rdev, ring, 3);
-	if (r) {
-		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n",
-			  ring->idx, r);
-		return r;
-	}
-	radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0));
-	radeon_ring_write(ring, 0xDEADBEEF);
-	radeon_ring_unlock_commit(rdev, ring);
-	for (i = 0; i < rdev->usec_timeout; i++) {
-		tmp = RREG32(UVD_CONTEXT_ID);
-		if (tmp == 0xDEADBEEF)
-			break;
-		DRM_UDELAY(1);
-	}
-
-	if (i < rdev->usec_timeout) {
-		DRM_INFO("ring test on %d succeeded in %d usecs\n",
-			 ring->idx, i);
-	} else {
-		DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
-			  ring->idx, tmp);
-		r = -EINVAL;
-	}
-	return r;
-}
-
 /*
  * CP fences/semaphores
  */
@@ -3004,30 +2577,6 @@
 	}
 }
 
-void r600_uvd_fence_emit(struct radeon_device *rdev,
-			 struct radeon_fence *fence)
-{
-	struct radeon_ring *ring = &rdev->ring[fence->ring];
-	uint64_t addr = rdev->fence_drv[fence->ring].gpu_addr;
-
-	radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0));
-	radeon_ring_write(ring, fence->seq);
-	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0));
-	radeon_ring_write(ring, addr & 0xffffffff);
-	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0));
-	radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
-	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0));
-	radeon_ring_write(ring, 0);
-
-	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0));
-	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0));
-	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0));
-	radeon_ring_write(ring, 2);
-	return;
-}
-
 void r600_semaphore_ring_emit(struct radeon_device *rdev,
 			      struct radeon_ring *ring,
 			      struct radeon_semaphore *semaphore,
@@ -3044,95 +2593,6 @@
 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel);
 }
 
-/*
- * DMA fences/semaphores
- */
-
-/**
- * r600_dma_fence_ring_emit - emit a fence on the DMA ring
- *
- * @rdev: radeon_device pointer
- * @fence: radeon fence object
- *
- * Add a DMA fence packet to the ring to write
- * the fence seq number and DMA trap packet to generate
- * an interrupt if needed (r6xx-r7xx).
- */
-void r600_dma_fence_ring_emit(struct radeon_device *rdev,
-			      struct radeon_fence *fence)
-{
-	struct radeon_ring *ring = &rdev->ring[fence->ring];
-	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
-
-	/* write the fence */
-	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0));
-	radeon_ring_write(ring, addr & 0xfffffffc);
-	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
-	radeon_ring_write(ring, lower_32_bits(fence->seq));
-	/* generate an interrupt */
-	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0));
-}
-
-/**
- * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring
- *
- * @rdev: radeon_device pointer
- * @ring: radeon_ring structure holding ring information
- * @semaphore: radeon semaphore object
- * @emit_wait: wait or signal semaphore
- *
- * Add a DMA semaphore packet to the ring wait on or signal
- * other rings (r6xx-SI).
- */
-void r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
-				  struct radeon_ring *ring,
-				  struct radeon_semaphore *semaphore,
-				  bool emit_wait)
-{
-	u64 addr = semaphore->gpu_addr;
-	u32 s = emit_wait ? 0 : 1;
-
-	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0));
-	radeon_ring_write(ring, addr & 0xfffffffc);
-	radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
-}
-
-void r600_uvd_semaphore_emit(struct radeon_device *rdev,
-			     struct radeon_ring *ring,
-			     struct radeon_semaphore *semaphore,
-			     bool emit_wait)
-{
-	uint64_t addr = semaphore->gpu_addr;
-
-	radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0));
-	radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
-
-	radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0));
-	radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
-
-	radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
-	radeon_ring_write(ring, emit_wait ? 1 : 0);
-}
-
-int r600_copy_blit(struct radeon_device *rdev,
-		   uint64_t src_offset,
-		   uint64_t dst_offset,
-		   unsigned num_gpu_pages,
-		   struct radeon_fence **fence)
-{
-	struct radeon_semaphore *sem = NULL;
-	struct radeon_sa_bo *vb = NULL;
-	int r;
-
-	r = r600_blit_prepare_copy(rdev, num_gpu_pages, fence, &vb, &sem);
-	if (r) {
-		return r;
-	}
-	r600_kms_blit_copy(rdev, src_offset, dst_offset, num_gpu_pages, vb);
-	r600_blit_done_copy(rdev, fence, vb, sem);
-	return 0;
-}
-
 /**
  * r600_copy_cpdma - copy pages using the CP DMA engine
  *
@@ -3166,7 +2626,7 @@
 
 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
-	r = radeon_ring_lock(rdev, ring, num_loops * 6 + 21);
+	r = radeon_ring_lock(rdev, ring, num_loops * 6 + 24);
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
 		radeon_semaphore_free(rdev, &sem, NULL);
@@ -3181,6 +2641,9 @@
 		radeon_semaphore_free(rdev, &sem, NULL);
 	}
 
+	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+	radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
+	radeon_ring_write(ring, WAIT_3D_IDLE_bit);
 	for (i = 0; i < num_loops; i++) {
 		cur_size_in_bytes = size_in_bytes;
 		if (cur_size_in_bytes > 0x1fffff)
@@ -3214,80 +2677,6 @@
 	return r;
 }
 
-/**
- * r600_copy_dma - copy pages using the DMA engine
- *
- * @rdev: radeon_device pointer
- * @src_offset: src GPU address
- * @dst_offset: dst GPU address
- * @num_gpu_pages: number of GPU pages to xfer
- * @fence: radeon fence object
- *
- * Copy GPU paging using the DMA engine (r6xx).
- * Used by the radeon ttm implementation to move pages if
- * registered as the asic copy callback.
- */
-int r600_copy_dma(struct radeon_device *rdev,
-		  uint64_t src_offset, uint64_t dst_offset,
-		  unsigned num_gpu_pages,
-		  struct radeon_fence **fence)
-{
-	struct radeon_semaphore *sem = NULL;
-	int ring_index = rdev->asic->copy.dma_ring_index;
-	struct radeon_ring *ring = &rdev->ring[ring_index];
-	u32 size_in_dw, cur_size_in_dw;
-	int i, num_loops;
-	int r = 0;
-
-	r = radeon_semaphore_create(rdev, &sem);
-	if (r) {
-		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		return r;
-	}
-
-	size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
-	num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE);
-	r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8);
-	if (r) {
-		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
-	}
-
-	if (radeon_fence_need_sync(*fence, ring->idx)) {
-		radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
-					    ring->idx);
-		radeon_fence_note_sync(*fence, ring->idx);
-	} else {
-		radeon_semaphore_free(rdev, &sem, NULL);
-	}
-
-	for (i = 0; i < num_loops; i++) {
-		cur_size_in_dw = size_in_dw;
-		if (cur_size_in_dw > 0xFFFE)
-			cur_size_in_dw = 0xFFFE;
-		size_in_dw -= cur_size_in_dw;
-		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
-		radeon_ring_write(ring, dst_offset & 0xfffffffc);
-		radeon_ring_write(ring, src_offset & 0xfffffffc);
-		radeon_ring_write(ring, (((upper_32_bits(dst_offset) & 0xff) << 16) |
-					 (upper_32_bits(src_offset) & 0xff)));
-		src_offset += cur_size_in_dw * 4;
-		dst_offset += cur_size_in_dw * 4;
-	}
-
-	r = radeon_fence_emit(rdev, fence, ring->idx);
-	if (r) {
-		radeon_ring_unlock_undo(rdev, ring);
-		return r;
-	}
-
-	radeon_ring_unlock_commit(rdev, ring);
-	radeon_semaphore_free(rdev, &sem, *fence);
-
-	return r;
-}
-
 int r600_set_surface_reg(struct radeon_device *rdev, int reg,
 			 uint32_t tiling_flags, uint32_t pitch,
 			 uint32_t offset, uint32_t obj_size)
@@ -3309,6 +2698,13 @@
 	/* enable pcie gen2 link */
 	r600_pcie_gen2_enable(rdev);
 
+	/* scratch needs to be initialized before MC */
+	r = r600_vram_scratch_init(rdev);
+	if (r)
+		return r;
+
+	r600_mc_program(rdev);
+
 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) {
 		r = r600_init_microcode(rdev);
 		if (r) {
@@ -3317,11 +2713,6 @@
 		}
 	}
 
-	r = r600_vram_scratch_init(rdev);
-	if (r)
-		return r;
-
-	r600_mc_program(rdev);
 	if (rdev->flags & RADEON_IS_AGP) {
 		r600_agp_enable(rdev);
 	} else {
@@ -3330,12 +2721,6 @@
 			return r;
 	}
 	r600_gpu_init(rdev);
-	r = r600_blit_init(rdev);
-	if (r) {
-		r600_blit_fini(rdev);
-		rdev->asic->copy.copy = NULL;
-		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
-	}
 
 	/* allocate wb buffer */
 	r = radeon_wb_init(rdev);
@@ -3372,14 +2757,14 @@
 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
 			     R600_CP_RB_RPTR, R600_CP_RB_WPTR,
-			     0, 0xfffff, RADEON_CP_PACKET2);
+			     RADEON_CP_PACKET2);
 	if (r)
 		return r;
 
 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
 			     DMA_RB_RPTR, DMA_RB_WPTR,
-			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
 	if (r)
 		return r;
 
@@ -3548,7 +2933,6 @@
 void r600_fini(struct radeon_device *rdev)
 {
 	r600_audio_fini(rdev);
-	r600_blit_fini(rdev);
 	r600_cp_fini(rdev);
 	r600_dma_fini(rdev);
 	r600_irq_fini(rdev);
@@ -3600,16 +2984,6 @@
 	radeon_ring_write(ring, ib->length_dw);
 }
 
-void r600_uvd_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
-{
-	struct radeon_ring *ring = &rdev->ring[ib->ring];
-
-	radeon_ring_write(ring, PACKET0(UVD_RBC_IB_BASE, 0));
-	radeon_ring_write(ring, ib->gpu_addr);
-	radeon_ring_write(ring, PACKET0(UVD_RBC_IB_SIZE, 0));
-	radeon_ring_write(ring, ib->length_dw);
-}
-
 int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	struct radeon_ib ib;
@@ -3663,139 +3037,6 @@
 	return r;
 }
 
-/**
- * r600_dma_ib_test - test an IB on the DMA engine
- *
- * @rdev: radeon_device pointer
- * @ring: radeon_ring structure holding ring information
- *
- * Test a simple IB in the DMA ring (r6xx-SI).
- * Returns 0 on success, error on failure.
- */
-int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
-{
-	struct radeon_ib ib;
-	unsigned i;
-	int r;
-	void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
-	u32 tmp = 0;
-
-	if (!ptr) {
-		DRM_ERROR("invalid vram scratch pointer\n");
-		return -EINVAL;
-	}
-
-	tmp = 0xCAFEDEAD;
-	writel(tmp, ptr);
-
-	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
-	if (r) {
-		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
-		return r;
-	}
-
-	ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1);
-	ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
-	ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff;
-	ib.ptr[3] = 0xDEADBEEF;
-	ib.length_dw = 4;
-
-	r = radeon_ib_schedule(rdev, &ib, NULL);
-	if (r) {
-		radeon_ib_free(rdev, &ib);
-		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
-		return r;
-	}
-	r = radeon_fence_wait(ib.fence, false);
-	if (r) {
-		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
-		return r;
-	}
-	for (i = 0; i < rdev->usec_timeout; i++) {
-		tmp = readl(ptr);
-		if (tmp == 0xDEADBEEF)
-			break;
-		DRM_UDELAY(1);
-	}
-	if (i < rdev->usec_timeout) {
-		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
-	} else {
-		DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
-		r = -EINVAL;
-	}
-	radeon_ib_free(rdev, &ib);
-	return r;
-}
-
-int r600_uvd_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
-{
-	struct radeon_fence *fence = NULL;
-	int r;
-
-	r = radeon_set_uvd_clocks(rdev, 53300, 40000);
-	if (r) {
-		DRM_ERROR("radeon: failed to raise UVD clocks (%d).\n", r);
-		return r;
-	}
-
-	r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL);
-	if (r) {
-		DRM_ERROR("radeon: failed to get create msg (%d).\n", r);
-		goto error;
-	}
-
-	r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, &fence);
-	if (r) {
-		DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r);
-		goto error;
-	}
-
-	r = radeon_fence_wait(fence, false);
-	if (r) {
-		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
-		goto error;
-	}
-	DRM_INFO("ib test on ring %d succeeded\n",  ring->idx);
-error:
-	radeon_fence_unref(&fence);
-	radeon_set_uvd_clocks(rdev, 0, 0);
-	return r;
-}
-
-/**
- * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine
- *
- * @rdev: radeon_device pointer
- * @ib: IB object to schedule
- *
- * Schedule an IB in the DMA ring (r6xx-r7xx).
- */
-void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
-{
-	struct radeon_ring *ring = &rdev->ring[ib->ring];
-
-	if (rdev->wb.enabled) {
-		u32 next_rptr = ring->wptr + 4;
-		while ((next_rptr & 7) != 5)
-			next_rptr++;
-		next_rptr += 3;
-		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
-		radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-		radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
-		radeon_ring_write(ring, next_rptr);
-	}
-
-	/* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
-	 * Pad as necessary with NOPs.
-	 */
-	while ((ring->wptr & 7) != 5)
-		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
-	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0));
-	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
-	radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF));
-
-}
-
 /*
  * Interrupts
  *
@@ -3812,7 +3053,7 @@
 	u32 rb_bufsz;
 
 	/* Align ring size */
-	rb_bufsz = drm_order(ring_size / 4);
+	rb_bufsz = order_base_2(ring_size / 4);
 	ring_size = (1 << rb_bufsz) * 4;
 	rdev->ih.ring_size = ring_size;
 	rdev->ih.ptr_mask = rdev->ih.ring_size - 1;
@@ -4049,7 +3290,7 @@
 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
 
 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
-	rb_bufsz = drm_order(rdev->ih.ring_size / 4);
+	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
 
 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
 		      IH_WPTR_OVERFLOW_CLEAR |
diff --git a/drivers/gpu/drm/radeon/r600_audio.c b/drivers/gpu/drm/radeon/r600_audio.c
index c92eb86..47fc2b8 100644
--- a/drivers/gpu/drm/radeon/r600_audio.c
+++ b/drivers/gpu/drm/radeon/r600_audio.c
@@ -57,12 +57,12 @@
  */
 static int r600_audio_chipset_supported(struct radeon_device *rdev)
 {
-	return ASIC_IS_DCE2(rdev) && !ASIC_IS_DCE6(rdev);
+	return ASIC_IS_DCE2(rdev) && !ASIC_IS_NODCE(rdev);
 }
 
-struct r600_audio r600_audio_status(struct radeon_device *rdev)
+struct r600_audio_pin r600_audio_status(struct radeon_device *rdev)
 {
-	struct r600_audio status;
+	struct r600_audio_pin status;
 	uint32_t value;
 
 	value = RREG32(R600_AUDIO_RATE_BPS_CHANNEL);
@@ -120,16 +120,16 @@
 	struct radeon_device *rdev = container_of(work, struct radeon_device,
 						  audio_work);
 	struct drm_device *dev = rdev->ddev;
-	struct r600_audio audio_status = r600_audio_status(rdev);
+	struct r600_audio_pin audio_status = r600_audio_status(rdev);
 	struct drm_encoder *encoder;
 	bool changed = false;
 
-	if (rdev->audio_status.channels != audio_status.channels ||
-	    rdev->audio_status.rate != audio_status.rate ||
-	    rdev->audio_status.bits_per_sample != audio_status.bits_per_sample ||
-	    rdev->audio_status.status_bits != audio_status.status_bits ||
-	    rdev->audio_status.category_code != audio_status.category_code) {
-		rdev->audio_status = audio_status;
+	if (rdev->audio.pin[0].channels != audio_status.channels ||
+	    rdev->audio.pin[0].rate != audio_status.rate ||
+	    rdev->audio.pin[0].bits_per_sample != audio_status.bits_per_sample ||
+	    rdev->audio.pin[0].status_bits != audio_status.status_bits ||
+	    rdev->audio.pin[0].category_code != audio_status.category_code) {
+		rdev->audio.pin[0] = audio_status;
 		changed = true;
 	}
 
@@ -141,13 +141,13 @@
 	}
 }
 
-/*
- * turn on/off audio engine
- */
-static void r600_audio_engine_enable(struct radeon_device *rdev, bool enable)
+/* enable the audio stream */
+static void r600_audio_enable(struct radeon_device *rdev,
+			      struct r600_audio_pin *pin,
+			      bool enable)
 {
 	u32 value = 0;
-	DRM_INFO("%s audio support\n", enable ? "Enabling" : "Disabling");
+
 	if (ASIC_IS_DCE4(rdev)) {
 		if (enable) {
 			value |= 0x81000000; /* Required to enable audio */
@@ -158,7 +158,7 @@
 		WREG32_P(R600_AUDIO_ENABLE,
 			 enable ? 0x81000000 : 0x0, ~0x81000000);
 	}
-	rdev->audio_enabled = enable;
+	DRM_INFO("%s audio %d support\n", enable ? "Enabling" : "Disabling", pin->id);
 }
 
 /*
@@ -169,13 +169,17 @@
 	if (!radeon_audio || !r600_audio_chipset_supported(rdev))
 		return 0;
 
-	r600_audio_engine_enable(rdev, true);
+	rdev->audio.enabled = true;
 
-	rdev->audio_status.channels = -1;
-	rdev->audio_status.rate = -1;
-	rdev->audio_status.bits_per_sample = -1;
-	rdev->audio_status.status_bits = 0;
-	rdev->audio_status.category_code = 0;
+	rdev->audio.num_pins = 1;
+	rdev->audio.pin[0].channels = -1;
+	rdev->audio.pin[0].rate = -1;
+	rdev->audio.pin[0].bits_per_sample = -1;
+	rdev->audio.pin[0].status_bits = 0;
+	rdev->audio.pin[0].category_code = 0;
+	rdev->audio.pin[0].id = 0;
+
+	r600_audio_enable(rdev, &rdev->audio.pin[0], true);
 
 	return 0;
 }
@@ -186,8 +190,16 @@
  */
 void r600_audio_fini(struct radeon_device *rdev)
 {
-	if (!rdev->audio_enabled)
+	if (!rdev->audio.enabled)
 		return;
 
-	r600_audio_engine_enable(rdev, false);
+	r600_audio_enable(rdev, &rdev->audio.pin[0], false);
+
+	rdev->audio.enabled = false;
+}
+
+struct r600_audio_pin *r600_audio_get_pin(struct radeon_device *rdev)
+{
+	/* only one pin on 6xx-NI */
+	return &rdev->audio.pin[0];
 }
diff --git a/drivers/gpu/drm/radeon/r600_blit.c b/drivers/gpu/drm/radeon/r600_blit.c
index f651881..daf7572 100644
--- a/drivers/gpu/drm/radeon/r600_blit.c
+++ b/drivers/gpu/drm/radeon/r600_blit.c
@@ -31,6 +31,37 @@
 
 #include "r600_blit_shaders.h"
 
+/* 23 bits of float fractional data */
+#define I2F_FRAC_BITS  23
+#define I2F_MASK ((1 << I2F_FRAC_BITS) - 1)
+
+/*
+ * Converts unsigned integer into 32-bit IEEE floating point representation.
+ * Will be exact from 0 to 2^24.  Above that, we round towards zero
+ * as the fractional bits will not fit in a float.  (It would be better to
+ * round towards even as the fpu does, but that is slower.)
+ */
+static __pure uint32_t int2float(uint32_t x)
+{
+	uint32_t msb, exponent, fraction;
+
+	/* Zero is special */
+	if (!x) return 0;
+
+	/* Get location of the most significant bit */
+	msb = __fls(x);
+
+	/*
+	 * Use a rotate instead of a shift because that works both leftwards
+	 * and rightwards due to the mod(32) behaviour.  This means we don't
+	 * need to check to see if we are above 2^24 or not.
+	 */
+	fraction = ror32(x, (msb - I2F_FRAC_BITS) & 0x1f) & I2F_MASK;
+	exponent = (127 + msb) << I2F_FRAC_BITS;
+
+	return fraction + exponent;
+}
+
 #define DI_PT_RECTLIST        0x11
 #define DI_INDEX_SIZE_16_BIT  0x0
 #define DI_SRC_SEL_AUTO_INDEX 0x2
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c
deleted file mode 100644
index 9fb5780..0000000
--- a/drivers/gpu/drm/radeon/r600_blit_kms.c
+++ /dev/null
@@ -1,785 +0,0 @@
-/*
- * Copyright 2009 Advanced Micro Devices, Inc.
- * Copyright 2009 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include <drm/drmP.h>
-#include <drm/radeon_drm.h>
-#include "radeon.h"
-
-#include "r600d.h"
-#include "r600_blit_shaders.h"
-#include "radeon_blit_common.h"
-
-/* 23 bits of float fractional data */
-#define I2F_FRAC_BITS  23
-#define I2F_MASK ((1 << I2F_FRAC_BITS) - 1)
-
-/*
- * Converts unsigned integer into 32-bit IEEE floating point representation.
- * Will be exact from 0 to 2^24.  Above that, we round towards zero
- * as the fractional bits will not fit in a float.  (It would be better to
- * round towards even as the fpu does, but that is slower.)
- */
-__pure uint32_t int2float(uint32_t x)
-{
-	uint32_t msb, exponent, fraction;
-
-	/* Zero is special */
-	if (!x) return 0;
-
-	/* Get location of the most significant bit */
-	msb = __fls(x);
-
-	/*
-	 * Use a rotate instead of a shift because that works both leftwards
-	 * and rightwards due to the mod(32) behaviour.  This means we don't
-	 * need to check to see if we are above 2^24 or not.
-	 */
-	fraction = ror32(x, (msb - I2F_FRAC_BITS) & 0x1f) & I2F_MASK;
-	exponent = (127 + msb) << I2F_FRAC_BITS;
-
-	return fraction + exponent;
-}
-
-/* emits 21 on rv770+, 23 on r600 */
-static void
-set_render_target(struct radeon_device *rdev, int format,
-		  int w, int h, u64 gpu_addr)
-{
-	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
-	u32 cb_color_info;
-	int pitch, slice;
-
-	h = ALIGN(h, 8);
-	if (h < 8)
-		h = 8;
-
-	cb_color_info = CB_FORMAT(format) |
-		CB_SOURCE_FORMAT(CB_SF_EXPORT_NORM) |
-		CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
-	pitch = (w / 8) - 1;
-	slice = ((w * h) / 64) - 1;
-
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-	radeon_ring_write(ring, (CB_COLOR0_BASE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(ring, gpu_addr >> 8);
-
-	if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770) {
-		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_BASE_UPDATE, 0));
-		radeon_ring_write(ring, 2 << 0);
-	}
-
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-	radeon_ring_write(ring, (CB_COLOR0_SIZE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(ring, (pitch << 0) | (slice << 10));
-
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-	radeon_ring_write(ring, (CB_COLOR0_VIEW - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(ring, 0);
-
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-	radeon_ring_write(ring, (CB_COLOR0_INFO - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(ring, cb_color_info);
-
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-	radeon_ring_write(ring, (CB_COLOR0_TILE - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(ring, 0);
-
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-	radeon_ring_write(ring, (CB_COLOR0_FRAG - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(ring, 0);
-
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-	radeon_ring_write(ring, (CB_COLOR0_MASK - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(ring, 0);
-}
-
-/* emits 5dw */
-static void
-cp_set_surface_sync(struct radeon_device *rdev,
-		    u32 sync_type, u32 size,
-		    u64 mc_addr)
-{
-	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
-	u32 cp_coher_size;
-
-	if (size == 0xffffffff)
-		cp_coher_size = 0xffffffff;
-	else
-		cp_coher_size = ((size + 255) >> 8);
-
-	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
-	radeon_ring_write(ring, sync_type);
-	radeon_ring_write(ring, cp_coher_size);
-	radeon_ring_write(ring, mc_addr >> 8);
-	radeon_ring_write(ring, 10); /* poll interval */
-}
-
-/* emits 21dw + 1 surface sync = 26dw */
-static void
-set_shaders(struct radeon_device *rdev)
-{
-	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
-	u64 gpu_addr;
-	u32 sq_pgm_resources;
-
-	/* setup shader regs */
-	sq_pgm_resources = (1 << 0);
-
-	/* VS */
-	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-	radeon_ring_write(ring, (SQ_PGM_START_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(ring, gpu_addr >> 8);
-
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-	radeon_ring_write(ring, (SQ_PGM_RESOURCES_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(ring, sq_pgm_resources);
-
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-	radeon_ring_write(ring, (SQ_PGM_CF_OFFSET_VS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(ring, 0);
-
-	/* PS */
-	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.ps_offset;
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-	radeon_ring_write(ring, (SQ_PGM_START_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(ring, gpu_addr >> 8);
-
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-	radeon_ring_write(ring, (SQ_PGM_RESOURCES_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(ring, sq_pgm_resources | (1 << 28));
-
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-	radeon_ring_write(ring, (SQ_PGM_EXPORTS_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(ring, 2);
-
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
-	radeon_ring_write(ring, (SQ_PGM_CF_OFFSET_PS - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(ring, 0);
-
-	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.vs_offset;
-	cp_set_surface_sync(rdev, PACKET3_SH_ACTION_ENA, 512, gpu_addr);
-}
-
-/* emits 9 + 1 sync (5) = 14*/
-static void
-set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
-{
-	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
-	u32 sq_vtx_constant_word2;
-
-	sq_vtx_constant_word2 = SQ_VTXC_BASE_ADDR_HI(upper_32_bits(gpu_addr) & 0xff) |
-		SQ_VTXC_STRIDE(16);
-#ifdef __BIG_ENDIAN
-	sq_vtx_constant_word2 |=  SQ_VTXC_ENDIAN_SWAP(SQ_ENDIAN_8IN32);
-#endif
-
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_RESOURCE, 7));
-	radeon_ring_write(ring, 0x460);
-	radeon_ring_write(ring, gpu_addr & 0xffffffff);
-	radeon_ring_write(ring, 48 - 1);
-	radeon_ring_write(ring, sq_vtx_constant_word2);
-	radeon_ring_write(ring, 1 << 0);
-	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, SQ_TEX_VTX_VALID_BUFFER << 30);
-
-	if ((rdev->family == CHIP_RV610) ||
-	    (rdev->family == CHIP_RV620) ||
-	    (rdev->family == CHIP_RS780) ||
-	    (rdev->family == CHIP_RS880) ||
-	    (rdev->family == CHIP_RV710))
-		cp_set_surface_sync(rdev,
-				    PACKET3_TC_ACTION_ENA, 48, gpu_addr);
-	else
-		cp_set_surface_sync(rdev,
-				    PACKET3_VC_ACTION_ENA, 48, gpu_addr);
-}
-
-/* emits 9 */
-static void
-set_tex_resource(struct radeon_device *rdev,
-		 int format, int w, int h, int pitch,
-		 u64 gpu_addr, u32 size)
-{
-	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
-	uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4;
-
-	if (h < 1)
-		h = 1;
-
-	sq_tex_resource_word0 = S_038000_DIM(V_038000_SQ_TEX_DIM_2D) |
-		S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1);
-	sq_tex_resource_word0 |= S_038000_PITCH((pitch >> 3) - 1) |
-		S_038000_TEX_WIDTH(w - 1);
-
-	sq_tex_resource_word1 = S_038004_DATA_FORMAT(format);
-	sq_tex_resource_word1 |= S_038004_TEX_HEIGHT(h - 1);
-
-	sq_tex_resource_word4 = S_038010_REQUEST_SIZE(1) |
-		S_038010_DST_SEL_X(SQ_SEL_X) |
-		S_038010_DST_SEL_Y(SQ_SEL_Y) |
-		S_038010_DST_SEL_Z(SQ_SEL_Z) |
-		S_038010_DST_SEL_W(SQ_SEL_W);
-
-	cp_set_surface_sync(rdev,
-			    PACKET3_TC_ACTION_ENA, size, gpu_addr);
-
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_RESOURCE, 7));
-	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, sq_tex_resource_word0);
-	radeon_ring_write(ring, sq_tex_resource_word1);
-	radeon_ring_write(ring, gpu_addr >> 8);
-	radeon_ring_write(ring, gpu_addr >> 8);
-	radeon_ring_write(ring, sq_tex_resource_word4);
-	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, SQ_TEX_VTX_VALID_TEXTURE << 30);
-}
-
-/* emits 12 */
-static void
-set_scissors(struct radeon_device *rdev, int x1, int y1,
-	     int x2, int y2)
-{
-	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
-	radeon_ring_write(ring, (PA_SC_SCREEN_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(ring, (x1 << 0) | (y1 << 16));
-	radeon_ring_write(ring, (x2 << 0) | (y2 << 16));
-
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
-	radeon_ring_write(ring, (PA_SC_GENERIC_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1 << 31));
-	radeon_ring_write(ring, (x2 << 0) | (y2 << 16));
-
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
-	radeon_ring_write(ring, (PA_SC_WINDOW_SCISSOR_TL - PACKET3_SET_CONTEXT_REG_OFFSET) >> 2);
-	radeon_ring_write(ring, (x1 << 0) | (y1 << 16) | (1 << 31));
-	radeon_ring_write(ring, (x2 << 0) | (y2 << 16));
-}
-
-/* emits 10 */
-static void
-draw_auto(struct radeon_device *rdev)
-{
-	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
-	radeon_ring_write(ring, (VGT_PRIMITIVE_TYPE - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
-	radeon_ring_write(ring, DI_PT_RECTLIST);
-
-	radeon_ring_write(ring, PACKET3(PACKET3_INDEX_TYPE, 0));
-	radeon_ring_write(ring,
-#ifdef __BIG_ENDIAN
-			  (2 << 2) |
-#endif
-			  DI_INDEX_SIZE_16_BIT);
-
-	radeon_ring_write(ring, PACKET3(PACKET3_NUM_INSTANCES, 0));
-	radeon_ring_write(ring, 1);
-
-	radeon_ring_write(ring, PACKET3(PACKET3_DRAW_INDEX_AUTO, 1));
-	radeon_ring_write(ring, 3);
-	radeon_ring_write(ring, DI_SRC_SEL_AUTO_INDEX);
-
-}
-
-/* emits 14 */
-static void
-set_default_state(struct radeon_device *rdev)
-{
-	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
-	u32 sq_config, sq_gpr_resource_mgmt_1, sq_gpr_resource_mgmt_2;
-	u32 sq_thread_resource_mgmt, sq_stack_resource_mgmt_1, sq_stack_resource_mgmt_2;
-	int num_ps_gprs, num_vs_gprs, num_temp_gprs, num_gs_gprs, num_es_gprs;
-	int num_ps_threads, num_vs_threads, num_gs_threads, num_es_threads;
-	int num_ps_stack_entries, num_vs_stack_entries, num_gs_stack_entries, num_es_stack_entries;
-	u64 gpu_addr;
-	int dwords;
-
-	switch (rdev->family) {
-	case CHIP_R600:
-		num_ps_gprs = 192;
-		num_vs_gprs = 56;
-		num_temp_gprs = 4;
-		num_gs_gprs = 0;
-		num_es_gprs = 0;
-		num_ps_threads = 136;
-		num_vs_threads = 48;
-		num_gs_threads = 4;
-		num_es_threads = 4;
-		num_ps_stack_entries = 128;
-		num_vs_stack_entries = 128;
-		num_gs_stack_entries = 0;
-		num_es_stack_entries = 0;
-		break;
-	case CHIP_RV630:
-	case CHIP_RV635:
-		num_ps_gprs = 84;
-		num_vs_gprs = 36;
-		num_temp_gprs = 4;
-		num_gs_gprs = 0;
-		num_es_gprs = 0;
-		num_ps_threads = 144;
-		num_vs_threads = 40;
-		num_gs_threads = 4;
-		num_es_threads = 4;
-		num_ps_stack_entries = 40;
-		num_vs_stack_entries = 40;
-		num_gs_stack_entries = 32;
-		num_es_stack_entries = 16;
-		break;
-	case CHIP_RV610:
-	case CHIP_RV620:
-	case CHIP_RS780:
-	case CHIP_RS880:
-	default:
-		num_ps_gprs = 84;
-		num_vs_gprs = 36;
-		num_temp_gprs = 4;
-		num_gs_gprs = 0;
-		num_es_gprs = 0;
-		num_ps_threads = 136;
-		num_vs_threads = 48;
-		num_gs_threads = 4;
-		num_es_threads = 4;
-		num_ps_stack_entries = 40;
-		num_vs_stack_entries = 40;
-		num_gs_stack_entries = 32;
-		num_es_stack_entries = 16;
-		break;
-	case CHIP_RV670:
-		num_ps_gprs = 144;
-		num_vs_gprs = 40;
-		num_temp_gprs = 4;
-		num_gs_gprs = 0;
-		num_es_gprs = 0;
-		num_ps_threads = 136;
-		num_vs_threads = 48;
-		num_gs_threads = 4;
-		num_es_threads = 4;
-		num_ps_stack_entries = 40;
-		num_vs_stack_entries = 40;
-		num_gs_stack_entries = 32;
-		num_es_stack_entries = 16;
-		break;
-	case CHIP_RV770:
-		num_ps_gprs = 192;
-		num_vs_gprs = 56;
-		num_temp_gprs = 4;
-		num_gs_gprs = 0;
-		num_es_gprs = 0;
-		num_ps_threads = 188;
-		num_vs_threads = 60;
-		num_gs_threads = 0;
-		num_es_threads = 0;
-		num_ps_stack_entries = 256;
-		num_vs_stack_entries = 256;
-		num_gs_stack_entries = 0;
-		num_es_stack_entries = 0;
-		break;
-	case CHIP_RV730:
-	case CHIP_RV740:
-		num_ps_gprs = 84;
-		num_vs_gprs = 36;
-		num_temp_gprs = 4;
-		num_gs_gprs = 0;
-		num_es_gprs = 0;
-		num_ps_threads = 188;
-		num_vs_threads = 60;
-		num_gs_threads = 0;
-		num_es_threads = 0;
-		num_ps_stack_entries = 128;
-		num_vs_stack_entries = 128;
-		num_gs_stack_entries = 0;
-		num_es_stack_entries = 0;
-		break;
-	case CHIP_RV710:
-		num_ps_gprs = 192;
-		num_vs_gprs = 56;
-		num_temp_gprs = 4;
-		num_gs_gprs = 0;
-		num_es_gprs = 0;
-		num_ps_threads = 144;
-		num_vs_threads = 48;
-		num_gs_threads = 0;
-		num_es_threads = 0;
-		num_ps_stack_entries = 128;
-		num_vs_stack_entries = 128;
-		num_gs_stack_entries = 0;
-		num_es_stack_entries = 0;
-		break;
-	}
-
-	if ((rdev->family == CHIP_RV610) ||
-	    (rdev->family == CHIP_RV620) ||
-	    (rdev->family == CHIP_RS780) ||
-	    (rdev->family == CHIP_RS880) ||
-	    (rdev->family == CHIP_RV710))
-		sq_config = 0;
-	else
-		sq_config = VC_ENABLE;
-
-	sq_config |= (DX9_CONSTS |
-		      ALU_INST_PREFER_VECTOR |
-		      PS_PRIO(0) |
-		      VS_PRIO(1) |
-		      GS_PRIO(2) |
-		      ES_PRIO(3));
-
-	sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |
-				  NUM_VS_GPRS(num_vs_gprs) |
-				  NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
-	sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |
-				  NUM_ES_GPRS(num_es_gprs));
-	sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |
-				   NUM_VS_THREADS(num_vs_threads) |
-				   NUM_GS_THREADS(num_gs_threads) |
-				   NUM_ES_THREADS(num_es_threads));
-	sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
-				    NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
-	sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
-				    NUM_ES_STACK_ENTRIES(num_es_stack_entries));
-
-	/* emit an IB pointing at default state */
-	dwords = ALIGN(rdev->r600_blit.state_len, 0x10);
-	gpu_addr = rdev->r600_blit.shader_gpu_addr + rdev->r600_blit.state_offset;
-	radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
-	radeon_ring_write(ring,
-#ifdef __BIG_ENDIAN
-			  (2 << 0) |
-#endif
-			  (gpu_addr & 0xFFFFFFFC));
-	radeon_ring_write(ring, upper_32_bits(gpu_addr) & 0xFF);
-	radeon_ring_write(ring, dwords);
-
-	/* SQ config */
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 6));
-	radeon_ring_write(ring, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
-	radeon_ring_write(ring, sq_config);
-	radeon_ring_write(ring, sq_gpr_resource_mgmt_1);
-	radeon_ring_write(ring, sq_gpr_resource_mgmt_2);
-	radeon_ring_write(ring, sq_thread_resource_mgmt);
-	radeon_ring_write(ring, sq_stack_resource_mgmt_1);
-	radeon_ring_write(ring, sq_stack_resource_mgmt_2);
-}
-
-int r600_blit_init(struct radeon_device *rdev)
-{
-	u32 obj_size;
-	int i, r, dwords;
-	void *ptr;
-	u32 packet2s[16];
-	int num_packet2s = 0;
-
-	rdev->r600_blit.primitives.set_render_target = set_render_target;
-	rdev->r600_blit.primitives.cp_set_surface_sync = cp_set_surface_sync;
-	rdev->r600_blit.primitives.set_shaders = set_shaders;
-	rdev->r600_blit.primitives.set_vtx_resource = set_vtx_resource;
-	rdev->r600_blit.primitives.set_tex_resource = set_tex_resource;
-	rdev->r600_blit.primitives.set_scissors = set_scissors;
-	rdev->r600_blit.primitives.draw_auto = draw_auto;
-	rdev->r600_blit.primitives.set_default_state = set_default_state;
-
-	rdev->r600_blit.ring_size_common = 8; /* sync semaphore */
-	rdev->r600_blit.ring_size_common += 40; /* shaders + def state */
-	rdev->r600_blit.ring_size_common += 5; /* done copy */
-	rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */
-
-	rdev->r600_blit.ring_size_per_loop = 76;
-	/* set_render_target emits 2 extra dwords on rv6xx */
-	if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770)
-		rdev->r600_blit.ring_size_per_loop += 2;
-
-	rdev->r600_blit.max_dim = 8192;
-
-	rdev->r600_blit.state_offset = 0;
-
-	if (rdev->family >= CHIP_RV770)
-		rdev->r600_blit.state_len = r7xx_default_size;
-	else
-		rdev->r600_blit.state_len = r6xx_default_size;
-
-	dwords = rdev->r600_blit.state_len;
-	while (dwords & 0xf) {
-		packet2s[num_packet2s++] = cpu_to_le32(PACKET2(0));
-		dwords++;
-	}
-
-	obj_size = dwords * 4;
-	obj_size = ALIGN(obj_size, 256);
-
-	rdev->r600_blit.vs_offset = obj_size;
-	obj_size += r6xx_vs_size * 4;
-	obj_size = ALIGN(obj_size, 256);
-
-	rdev->r600_blit.ps_offset = obj_size;
-	obj_size += r6xx_ps_size * 4;
-	obj_size = ALIGN(obj_size, 256);
-
-	/* pin copy shader into vram if not already initialized */
-	if (rdev->r600_blit.shader_obj == NULL) {
-		r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true,
-				     RADEON_GEM_DOMAIN_VRAM,
-				     NULL, &rdev->r600_blit.shader_obj);
-		if (r) {
-			DRM_ERROR("r600 failed to allocate shader\n");
-			return r;
-		}
-
-		r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
-		if (unlikely(r != 0))
-			return r;
-		r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM,
-				  &rdev->r600_blit.shader_gpu_addr);
-		radeon_bo_unreserve(rdev->r600_blit.shader_obj);
-		if (r) {
-			dev_err(rdev->dev, "(%d) pin blit object failed\n", r);
-			return r;
-		}
-	}
-
-	DRM_DEBUG("r6xx blit allocated bo %08x vs %08x ps %08x\n",
-		  obj_size,
-		  rdev->r600_blit.vs_offset, rdev->r600_blit.ps_offset);
-
-	r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
-	if (unlikely(r != 0))
-		return r;
-	r = radeon_bo_kmap(rdev->r600_blit.shader_obj, &ptr);
-	if (r) {
-		DRM_ERROR("failed to map blit object %d\n", r);
-		return r;
-	}
-	if (rdev->family >= CHIP_RV770)
-		memcpy_toio(ptr + rdev->r600_blit.state_offset,
-			    r7xx_default_state, rdev->r600_blit.state_len * 4);
-	else
-		memcpy_toio(ptr + rdev->r600_blit.state_offset,
-			    r6xx_default_state, rdev->r600_blit.state_len * 4);
-	if (num_packet2s)
-		memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
-			    packet2s, num_packet2s * 4);
-	for (i = 0; i < r6xx_vs_size; i++)
-		*(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(r6xx_vs[i]);
-	for (i = 0; i < r6xx_ps_size; i++)
-		*(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(r6xx_ps[i]);
-	radeon_bo_kunmap(rdev->r600_blit.shader_obj);
-	radeon_bo_unreserve(rdev->r600_blit.shader_obj);
-
-	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
-	return 0;
-}
-
-void r600_blit_fini(struct radeon_device *rdev)
-{
-	int r;
-
-	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
-	if (rdev->r600_blit.shader_obj == NULL)
-		return;
-	/* If we can't reserve the bo, unref should be enough to destroy
-	 * it when it becomes idle.
-	 */
-	r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
-	if (!r) {
-		radeon_bo_unpin(rdev->r600_blit.shader_obj);
-		radeon_bo_unreserve(rdev->r600_blit.shader_obj);
-	}
-	radeon_bo_unref(&rdev->r600_blit.shader_obj);
-}
-
-static unsigned r600_blit_create_rect(unsigned num_gpu_pages,
-				      int *width, int *height, int max_dim)
-{
-	unsigned max_pages;
-	unsigned pages = num_gpu_pages;
-	int w, h;
-
-	if (num_gpu_pages == 0) {
-		/* not supposed to be called with no pages, but just in case */
-		h = 0;
-		w = 0;
-		pages = 0;
-		WARN_ON(1);
-	} else {
-		int rect_order = 2;
-		h = RECT_UNIT_H;
-		while (num_gpu_pages / rect_order) {
-			h *= 2;
-			rect_order *= 4;
-			if (h >= max_dim) {
-				h = max_dim;
-				break;
-			}
-		}
-		max_pages = (max_dim * h) / (RECT_UNIT_W * RECT_UNIT_H);
-		if (pages > max_pages)
-			pages = max_pages;
-		w = (pages * RECT_UNIT_W * RECT_UNIT_H) / h;
-		w = (w / RECT_UNIT_W) * RECT_UNIT_W;
-		pages = (w * h) / (RECT_UNIT_W * RECT_UNIT_H);
-		BUG_ON(pages == 0);
-	}
-
-
-	DRM_DEBUG("blit_rectangle: h=%d, w=%d, pages=%d\n", h, w, pages);
-
-	/* return width and height only of the caller wants it */
-	if (height)
-		*height = h;
-	if (width)
-		*width = w;
-
-	return pages;
-}
-
-
-int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages,
-			   struct radeon_fence **fence, struct radeon_sa_bo **vb,
-			   struct radeon_semaphore **sem)
-{
-	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
-	int r;
-	int ring_size;
-	int num_loops = 0;
-	int dwords_per_loop = rdev->r600_blit.ring_size_per_loop;
-
-	/* num loops */
-	while (num_gpu_pages) {
-		num_gpu_pages -=
-			r600_blit_create_rect(num_gpu_pages, NULL, NULL,
-					      rdev->r600_blit.max_dim);
-		num_loops++;
-	}
-
-	/* 48 bytes for vertex per loop */
-	r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, vb,
-			     (num_loops*48)+256, 256, true);
-	if (r) {
-		return r;
-	}
-
-	r = radeon_semaphore_create(rdev, sem);
-	if (r) {
-		radeon_sa_bo_free(rdev, vb, NULL);
-		return r;
-	}
-
-	/* calculate number of loops correctly */
-	ring_size = num_loops * dwords_per_loop;
-	ring_size += rdev->r600_blit.ring_size_common;
-	r = radeon_ring_lock(rdev, ring, ring_size);
-	if (r) {
-		radeon_sa_bo_free(rdev, vb, NULL);
-		radeon_semaphore_free(rdev, sem, NULL);
-		return r;
-	}
-
-	if (radeon_fence_need_sync(*fence, RADEON_RING_TYPE_GFX_INDEX)) {
-		radeon_semaphore_sync_rings(rdev, *sem, (*fence)->ring,
-					    RADEON_RING_TYPE_GFX_INDEX);
-		radeon_fence_note_sync(*fence, RADEON_RING_TYPE_GFX_INDEX);
-	} else {
-		radeon_semaphore_free(rdev, sem, NULL);
-	}
-
-	rdev->r600_blit.primitives.set_default_state(rdev);
-	rdev->r600_blit.primitives.set_shaders(rdev);
-	return 0;
-}
-
-void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence,
-			 struct radeon_sa_bo *vb, struct radeon_semaphore *sem)
-{
-	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
-	int r;
-
-	r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX);
-	if (r) {
-		radeon_ring_unlock_undo(rdev, ring);
-		return;
-	}
-
-	radeon_ring_unlock_commit(rdev, ring);
-	radeon_sa_bo_free(rdev, &vb, *fence);
-	radeon_semaphore_free(rdev, &sem, *fence);
-}
-
-void r600_kms_blit_copy(struct radeon_device *rdev,
-			u64 src_gpu_addr, u64 dst_gpu_addr,
-			unsigned num_gpu_pages,
-			struct radeon_sa_bo *vb)
-{
-	u64 vb_gpu_addr;
-	u32 *vb_cpu_addr;
-
-	DRM_DEBUG("emitting copy %16llx %16llx %d\n",
-		  src_gpu_addr, dst_gpu_addr, num_gpu_pages);
-	vb_cpu_addr = (u32 *)radeon_sa_bo_cpu_addr(vb);
-	vb_gpu_addr = radeon_sa_bo_gpu_addr(vb);
-
-	while (num_gpu_pages) {
-		int w, h;
-		unsigned size_in_bytes;
-		unsigned pages_per_loop =
-			r600_blit_create_rect(num_gpu_pages, &w, &h,
-					      rdev->r600_blit.max_dim);
-
-		size_in_bytes = pages_per_loop * RADEON_GPU_PAGE_SIZE;
-		DRM_DEBUG("rectangle w=%d h=%d\n", w, h);
-
-		vb_cpu_addr[0] = 0;
-		vb_cpu_addr[1] = 0;
-		vb_cpu_addr[2] = 0;
-		vb_cpu_addr[3] = 0;
-
-		vb_cpu_addr[4] = 0;
-		vb_cpu_addr[5] = int2float(h);
-		vb_cpu_addr[6] = 0;
-		vb_cpu_addr[7] = int2float(h);
-
-		vb_cpu_addr[8] = int2float(w);
-		vb_cpu_addr[9] = int2float(h);
-		vb_cpu_addr[10] = int2float(w);
-		vb_cpu_addr[11] = int2float(h);
-
-		rdev->r600_blit.primitives.set_tex_resource(rdev, FMT_8_8_8_8,
-							    w, h, w, src_gpu_addr, size_in_bytes);
-		rdev->r600_blit.primitives.set_render_target(rdev, COLOR_8_8_8_8,
-							     w, h, dst_gpu_addr);
-		rdev->r600_blit.primitives.set_scissors(rdev, 0, 0, w, h);
-		rdev->r600_blit.primitives.set_vtx_resource(rdev, vb_gpu_addr);
-		rdev->r600_blit.primitives.draw_auto(rdev);
-		rdev->r600_blit.primitives.cp_set_surface_sync(rdev,
-				    PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
-				    size_in_bytes, dst_gpu_addr);
-
-		vb_cpu_addr += 12;
-		vb_gpu_addr += 4*12;
-		src_gpu_addr += size_in_bytes;
-		dst_gpu_addr += size_in_bytes;
-		num_gpu_pages -= pages_per_loop;
-	}
-}
diff --git a/drivers/gpu/drm/radeon/r600_blit_shaders.h b/drivers/gpu/drm/radeon/r600_blit_shaders.h
index 2f3ce7a..f437d36 100644
--- a/drivers/gpu/drm/radeon/r600_blit_shaders.h
+++ b/drivers/gpu/drm/radeon/r600_blit_shaders.h
@@ -35,5 +35,4 @@
 extern const u32 r6xx_ps_size, r6xx_vs_size;
 extern const u32 r6xx_default_size, r7xx_default_size;
 
-__pure uint32_t int2float(uint32_t x);
 #endif
diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c
index 1c51c08..d8eb48b 100644
--- a/drivers/gpu/drm/radeon/r600_cp.c
+++ b/drivers/gpu/drm/radeon/r600_cp.c
@@ -2200,13 +2200,13 @@
 	dev_priv->ring.end = ((u32 *) dev_priv->cp_ring->handle
 			      + init->ring_size / sizeof(u32));
 	dev_priv->ring.size = init->ring_size;
-	dev_priv->ring.size_l2qw = drm_order(init->ring_size / 8);
+	dev_priv->ring.size_l2qw = order_base_2(init->ring_size / 8);
 
 	dev_priv->ring.rptr_update = /* init->rptr_update */ 4096;
-	dev_priv->ring.rptr_update_l2qw = drm_order(/* init->rptr_update */ 4096 / 8);
+	dev_priv->ring.rptr_update_l2qw = order_base_2(/* init->rptr_update */ 4096 / 8);
 
 	dev_priv->ring.fetch_size = /* init->fetch_size */ 32;
-	dev_priv->ring.fetch_size_l2ow = drm_order(/* init->fetch_size */ 32 / 16);
+	dev_priv->ring.fetch_size_l2ow = order_base_2(/* init->fetch_size */ 32 / 16);
 
 	dev_priv->ring.tail_mask = (dev_priv->ring.size / sizeof(u32)) - 1;
 
diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c
new file mode 100644
index 0000000..3b31745
--- /dev/null
+++ b/drivers/gpu/drm/radeon/r600_dma.c
@@ -0,0 +1,497 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Alex Deucher
+ */
+#include <drm/drmP.h>
+#include "radeon.h"
+#include "radeon_asic.h"
+#include "r600d.h"
+
+u32 r600_gpu_check_soft_reset(struct radeon_device *rdev);
+
+/*
+ * DMA
+ * Starting with R600, the GPU has an asynchronous
+ * DMA engine.  The programming model is very similar
+ * to the 3D engine (ring buffer, IBs, etc.), but the
+ * DMA controller has it's own packet format that is
+ * different form the PM4 format used by the 3D engine.
+ * It supports copying data, writing embedded data,
+ * solid fills, and a number of other things.  It also
+ * has support for tiling/detiling of buffers.
+ */
+
+/**
+ * r600_dma_get_rptr - get the current read pointer
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon ring pointer
+ *
+ * Get the current rptr from the hardware (r6xx+).
+ */
+uint32_t r600_dma_get_rptr(struct radeon_device *rdev,
+			   struct radeon_ring *ring)
+{
+	return (radeon_ring_generic_get_rptr(rdev, ring) & 0x3fffc) >> 2;
+}
+
+/**
+ * r600_dma_get_wptr - get the current write pointer
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon ring pointer
+ *
+ * Get the current wptr from the hardware (r6xx+).
+ */
+uint32_t r600_dma_get_wptr(struct radeon_device *rdev,
+			   struct radeon_ring *ring)
+{
+	return (RREG32(ring->wptr_reg) & 0x3fffc) >> 2;
+}
+
+/**
+ * r600_dma_set_wptr - commit the write pointer
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon ring pointer
+ *
+ * Write the wptr back to the hardware (r6xx+).
+ */
+void r600_dma_set_wptr(struct radeon_device *rdev,
+		       struct radeon_ring *ring)
+{
+	WREG32(ring->wptr_reg, (ring->wptr << 2) & 0x3fffc);
+}
+
+/**
+ * r600_dma_stop - stop the async dma engine
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Stop the async dma engine (r6xx-evergreen).
+ */
+void r600_dma_stop(struct radeon_device *rdev)
+{
+	u32 rb_cntl = RREG32(DMA_RB_CNTL);
+
+	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
+
+	rb_cntl &= ~DMA_RB_ENABLE;
+	WREG32(DMA_RB_CNTL, rb_cntl);
+
+	rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
+}
+
+/**
+ * r600_dma_resume - setup and start the async dma engine
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Set up the DMA ring buffer and enable it. (r6xx-evergreen).
+ * Returns 0 for success, error for failure.
+ */
+int r600_dma_resume(struct radeon_device *rdev)
+{
+	struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
+	u32 rb_cntl, dma_cntl, ib_cntl;
+	u32 rb_bufsz;
+	int r;
+
+	/* Reset dma */
+	if (rdev->family >= CHIP_RV770)
+		WREG32(SRBM_SOFT_RESET, RV770_SOFT_RESET_DMA);
+	else
+		WREG32(SRBM_SOFT_RESET, SOFT_RESET_DMA);
+	RREG32(SRBM_SOFT_RESET);
+	udelay(50);
+	WREG32(SRBM_SOFT_RESET, 0);
+
+	WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL, 0);
+	WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
+
+	/* Set ring buffer size in dwords */
+	rb_bufsz = order_base_2(ring->ring_size / 4);
+	rb_cntl = rb_bufsz << 1;
+#ifdef __BIG_ENDIAN
+	rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE;
+#endif
+	WREG32(DMA_RB_CNTL, rb_cntl);
+
+	/* Initialize the ring buffer's read and write pointers */
+	WREG32(DMA_RB_RPTR, 0);
+	WREG32(DMA_RB_WPTR, 0);
+
+	/* set the wb address whether it's enabled or not */
+	WREG32(DMA_RB_RPTR_ADDR_HI,
+	       upper_32_bits(rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFF);
+	WREG32(DMA_RB_RPTR_ADDR_LO,
+	       ((rdev->wb.gpu_addr + R600_WB_DMA_RPTR_OFFSET) & 0xFFFFFFFC));
+
+	if (rdev->wb.enabled)
+		rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE;
+
+	WREG32(DMA_RB_BASE, ring->gpu_addr >> 8);
+
+	/* enable DMA IBs */
+	ib_cntl = DMA_IB_ENABLE;
+#ifdef __BIG_ENDIAN
+	ib_cntl |= DMA_IB_SWAP_ENABLE;
+#endif
+	WREG32(DMA_IB_CNTL, ib_cntl);
+
+	dma_cntl = RREG32(DMA_CNTL);
+	dma_cntl &= ~CTXEMPTY_INT_ENABLE;
+	WREG32(DMA_CNTL, dma_cntl);
+
+	if (rdev->family >= CHIP_RV770)
+		WREG32(DMA_MODE, 1);
+
+	ring->wptr = 0;
+	WREG32(DMA_RB_WPTR, ring->wptr << 2);
+
+	ring->rptr = RREG32(DMA_RB_RPTR) >> 2;
+
+	WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE);
+
+	ring->ready = true;
+
+	r = radeon_ring_test(rdev, R600_RING_TYPE_DMA_INDEX, ring);
+	if (r) {
+		ring->ready = false;
+		return r;
+	}
+
+	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
+
+	return 0;
+}
+
+/**
+ * r600_dma_fini - tear down the async dma engine
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Stop the async dma engine and free the ring (r6xx-evergreen).
+ */
+void r600_dma_fini(struct radeon_device *rdev)
+{
+	r600_dma_stop(rdev);
+	radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
+}
+
+/**
+ * r600_dma_is_lockup - Check if the DMA engine is locked up
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Check if the async DMA engine is locked up.
+ * Returns true if the engine appears to be locked up, false if not.
+ */
+bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+	u32 reset_mask = r600_gpu_check_soft_reset(rdev);
+
+	if (!(reset_mask & RADEON_RESET_DMA)) {
+		radeon_ring_lockup_update(ring);
+		return false;
+	}
+	/* force ring activities */
+	radeon_ring_force_activity(rdev, ring);
+	return radeon_ring_test_lockup(rdev, ring);
+}
+
+
+/**
+ * r600_dma_ring_test - simple async dma engine test
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Test the DMA engine by writing using it to write an
+ * value to memory. (r6xx-SI).
+ * Returns 0 for success, error for failure.
+ */
+int r600_dma_ring_test(struct radeon_device *rdev,
+		       struct radeon_ring *ring)
+{
+	unsigned i;
+	int r;
+	void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
+	u32 tmp;
+
+	if (!ptr) {
+		DRM_ERROR("invalid vram scratch pointer\n");
+		return -EINVAL;
+	}
+
+	tmp = 0xCAFEDEAD;
+	writel(tmp, ptr);
+
+	r = radeon_ring_lock(rdev, ring, 4);
+	if (r) {
+		DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
+		return r;
+	}
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
+	radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
+	radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff);
+	radeon_ring_write(ring, 0xDEADBEEF);
+	radeon_ring_unlock_commit(rdev, ring);
+
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		tmp = readl(ptr);
+		if (tmp == 0xDEADBEEF)
+			break;
+		DRM_UDELAY(1);
+	}
+
+	if (i < rdev->usec_timeout) {
+		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
+	} else {
+		DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
+			  ring->idx, tmp);
+		r = -EINVAL;
+	}
+	return r;
+}
+
+/**
+ * r600_dma_fence_ring_emit - emit a fence on the DMA ring
+ *
+ * @rdev: radeon_device pointer
+ * @fence: radeon fence object
+ *
+ * Add a DMA fence packet to the ring to write
+ * the fence seq number and DMA trap packet to generate
+ * an interrupt if needed (r6xx-r7xx).
+ */
+void r600_dma_fence_ring_emit(struct radeon_device *rdev,
+			      struct radeon_fence *fence)
+{
+	struct radeon_ring *ring = &rdev->ring[fence->ring];
+	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
+
+	/* write the fence */
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_FENCE, 0, 0, 0));
+	radeon_ring_write(ring, addr & 0xfffffffc);
+	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff));
+	radeon_ring_write(ring, lower_32_bits(fence->seq));
+	/* generate an interrupt */
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_TRAP, 0, 0, 0));
+}
+
+/**
+ * r600_dma_semaphore_ring_emit - emit a semaphore on the dma ring
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ * @semaphore: radeon semaphore object
+ * @emit_wait: wait or signal semaphore
+ *
+ * Add a DMA semaphore packet to the ring wait on or signal
+ * other rings (r6xx-SI).
+ */
+void r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
+				  struct radeon_ring *ring,
+				  struct radeon_semaphore *semaphore,
+				  bool emit_wait)
+{
+	u64 addr = semaphore->gpu_addr;
+	u32 s = emit_wait ? 0 : 1;
+
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SEMAPHORE, 0, s, 0));
+	radeon_ring_write(ring, addr & 0xfffffffc);
+	radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
+}
+
+/**
+ * r600_dma_ib_test - test an IB on the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Test a simple IB in the DMA ring (r6xx-SI).
+ * Returns 0 on success, error on failure.
+ */
+int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+	struct radeon_ib ib;
+	unsigned i;
+	int r;
+	void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
+	u32 tmp = 0;
+
+	if (!ptr) {
+		DRM_ERROR("invalid vram scratch pointer\n");
+		return -EINVAL;
+	}
+
+	tmp = 0xCAFEDEAD;
+	writel(tmp, ptr);
+
+	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
+	if (r) {
+		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
+		return r;
+	}
+
+	ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1);
+	ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
+	ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff;
+	ib.ptr[3] = 0xDEADBEEF;
+	ib.length_dw = 4;
+
+	r = radeon_ib_schedule(rdev, &ib, NULL);
+	if (r) {
+		radeon_ib_free(rdev, &ib);
+		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
+		return r;
+	}
+	r = radeon_fence_wait(ib.fence, false);
+	if (r) {
+		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
+		return r;
+	}
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		tmp = readl(ptr);
+		if (tmp == 0xDEADBEEF)
+			break;
+		DRM_UDELAY(1);
+	}
+	if (i < rdev->usec_timeout) {
+		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
+	} else {
+		DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
+		r = -EINVAL;
+	}
+	radeon_ib_free(rdev, &ib);
+	return r;
+}
+
+/**
+ * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @ib: IB object to schedule
+ *
+ * Schedule an IB in the DMA ring (r6xx-r7xx).
+ */
+void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+	struct radeon_ring *ring = &rdev->ring[ib->ring];
+
+	if (rdev->wb.enabled) {
+		u32 next_rptr = ring->wptr + 4;
+		while ((next_rptr & 7) != 5)
+			next_rptr++;
+		next_rptr += 3;
+		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1));
+		radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
+		radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff);
+		radeon_ring_write(ring, next_rptr);
+	}
+
+	/* The indirect buffer packet must end on an 8 DW boundary in the DMA ring.
+	 * Pad as necessary with NOPs.
+	 */
+	while ((ring->wptr & 7) != 5)
+		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_INDIRECT_BUFFER, 0, 0, 0));
+	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0));
+	radeon_ring_write(ring, (ib->length_dw << 16) | (upper_32_bits(ib->gpu_addr) & 0xFF));
+
+}
+
+/**
+ * r600_copy_dma - copy pages using the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @num_gpu_pages: number of GPU pages to xfer
+ * @fence: radeon fence object
+ *
+ * Copy GPU paging using the DMA engine (r6xx).
+ * Used by the radeon ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+int r600_copy_dma(struct radeon_device *rdev,
+		  uint64_t src_offset, uint64_t dst_offset,
+		  unsigned num_gpu_pages,
+		  struct radeon_fence **fence)
+{
+	struct radeon_semaphore *sem = NULL;
+	int ring_index = rdev->asic->copy.dma_ring_index;
+	struct radeon_ring *ring = &rdev->ring[ring_index];
+	u32 size_in_dw, cur_size_in_dw;
+	int i, num_loops;
+	int r = 0;
+
+	r = radeon_semaphore_create(rdev, &sem);
+	if (r) {
+		DRM_ERROR("radeon: moving bo (%d).\n", r);
+		return r;
+	}
+
+	size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
+	num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE);
+	r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8);
+	if (r) {
+		DRM_ERROR("radeon: moving bo (%d).\n", r);
+		radeon_semaphore_free(rdev, &sem, NULL);
+		return r;
+	}
+
+	if (radeon_fence_need_sync(*fence, ring->idx)) {
+		radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
+					    ring->idx);
+		radeon_fence_note_sync(*fence, ring->idx);
+	} else {
+		radeon_semaphore_free(rdev, &sem, NULL);
+	}
+
+	for (i = 0; i < num_loops; i++) {
+		cur_size_in_dw = size_in_dw;
+		if (cur_size_in_dw > 0xFFFE)
+			cur_size_in_dw = 0xFFFE;
+		size_in_dw -= cur_size_in_dw;
+		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
+		radeon_ring_write(ring, dst_offset & 0xfffffffc);
+		radeon_ring_write(ring, src_offset & 0xfffffffc);
+		radeon_ring_write(ring, (((upper_32_bits(dst_offset) & 0xff) << 16) |
+					 (upper_32_bits(src_offset) & 0xff)));
+		src_offset += cur_size_in_dw * 4;
+		dst_offset += cur_size_in_dw * 4;
+	}
+
+	r = radeon_fence_emit(rdev, fence, ring->idx);
+	if (r) {
+		radeon_ring_unlock_undo(rdev, ring);
+		return r;
+	}
+
+	radeon_ring_unlock_commit(rdev, ring);
+	radeon_semaphore_free(rdev, &sem, *fence);
+
+	return r;
+}
diff --git a/drivers/gpu/drm/radeon/r600_dpm.c b/drivers/gpu/drm/radeon/r600_dpm.c
index b88f54b..fa0de46 100644
--- a/drivers/gpu/drm/radeon/r600_dpm.c
+++ b/drivers/gpu/drm/radeon/r600_dpm.c
@@ -174,6 +174,24 @@
 	return vblank_time_us;
 }
 
+u32 r600_dpm_get_vrefresh(struct radeon_device *rdev)
+{
+	struct drm_device *dev = rdev->ddev;
+	struct drm_crtc *crtc;
+	struct radeon_crtc *radeon_crtc;
+	u32 vrefresh = 0;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+		radeon_crtc = to_radeon_crtc(crtc);
+		if (crtc->enabled && radeon_crtc->enabled && radeon_crtc->hw_mode.clock) {
+			vrefresh = radeon_crtc->hw_mode.vrefresh;
+			break;
+		}
+	}
+
+	return vrefresh;
+}
+
 void r600_calculate_u_and_p(u32 i, u32 r_c, u32 p_b,
 			    u32 *p, u32 *u)
 {
@@ -278,9 +296,9 @@
 void r600_enable_sclk_control(struct radeon_device *rdev, bool enable)
 {
 	if (enable)
-		WREG32_P(GENERAL_PWRMGT, 0, ~SCLK_PWRMGT_OFF);
+		WREG32_P(SCLK_PWRMGT_CNTL, 0, ~SCLK_PWRMGT_OFF);
 	else
-		WREG32_P(GENERAL_PWRMGT, SCLK_PWRMGT_OFF, ~SCLK_PWRMGT_OFF);
+		WREG32_P(SCLK_PWRMGT_CNTL, SCLK_PWRMGT_OFF, ~SCLK_PWRMGT_OFF);
 }
 
 void r600_enable_mclk_control(struct radeon_device *rdev, bool enable)
@@ -745,6 +763,8 @@
 	case THERMAL_TYPE_SUMO:
 	case THERMAL_TYPE_NI:
 	case THERMAL_TYPE_SI:
+	case THERMAL_TYPE_CI:
+	case THERMAL_TYPE_KV:
 		return true;
 	case THERMAL_TYPE_ADT7473_WITH_INTERNAL:
 	case THERMAL_TYPE_EMC2103_WITH_INTERNAL:
@@ -779,15 +799,19 @@
 	u32 size = atom_table->ucNumEntries *
 		sizeof(struct radeon_clock_voltage_dependency_entry);
 	int i;
+	ATOM_PPLIB_Clock_Voltage_Dependency_Record *entry;
 
 	radeon_table->entries = kzalloc(size, GFP_KERNEL);
 	if (!radeon_table->entries)
 		return -ENOMEM;
 
+	entry = &atom_table->entries[0];
 	for (i = 0; i < atom_table->ucNumEntries; i++) {
-		radeon_table->entries[i].clk = le16_to_cpu(atom_table->entries[i].usClockLow) |
-			(atom_table->entries[i].ucClockHigh << 16);
-		radeon_table->entries[i].v = le16_to_cpu(atom_table->entries[i].usVoltage);
+		radeon_table->entries[i].clk = le16_to_cpu(entry->usClockLow) |
+			(entry->ucClockHigh << 16);
+		radeon_table->entries[i].v = le16_to_cpu(entry->usVoltage);
+		entry = (ATOM_PPLIB_Clock_Voltage_Dependency_Record *)
+			((u8 *)entry + sizeof(ATOM_PPLIB_Clock_Voltage_Dependency_Record));
 	}
 	radeon_table->count = atom_table->ucNumEntries;
 
@@ -875,6 +899,19 @@
 				return ret;
 			}
 		}
+		if (power_info->pplib4.usMvddDependencyOnMCLKOffset) {
+			dep_table = (ATOM_PPLIB_Clock_Voltage_Dependency_Table *)
+				(mode_info->atom_context->bios + data_offset +
+				 le16_to_cpu(power_info->pplib4.usMvddDependencyOnMCLKOffset));
+			ret = r600_parse_clk_voltage_dep_table(&rdev->pm.dpm.dyn_state.mvdd_dependency_on_mclk,
+							       dep_table);
+			if (ret) {
+				kfree(rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries);
+				kfree(rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk.entries);
+				kfree(rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk.entries);
+				return ret;
+			}
+		}
 		if (power_info->pplib4.usMaxClockVoltageOnDCOffset) {
 			ATOM_PPLIB_Clock_Voltage_Limit_Table *clk_v =
 				(ATOM_PPLIB_Clock_Voltage_Limit_Table *)
@@ -898,27 +935,27 @@
 				(ATOM_PPLIB_PhaseSheddingLimits_Table *)
 				(mode_info->atom_context->bios + data_offset +
 				 le16_to_cpu(power_info->pplib4.usVddcPhaseShedLimitsTableOffset));
+			ATOM_PPLIB_PhaseSheddingLimits_Record *entry;
 
 			rdev->pm.dpm.dyn_state.phase_shedding_limits_table.entries =
 				kzalloc(psl->ucNumEntries *
 					sizeof(struct radeon_phase_shedding_limits_entry),
 					GFP_KERNEL);
 			if (!rdev->pm.dpm.dyn_state.phase_shedding_limits_table.entries) {
-				kfree(rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries);
-				kfree(rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk.entries);
-				kfree(rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk.entries);
+				r600_free_extended_power_table(rdev);
 				return -ENOMEM;
 			}
 
+			entry = &psl->entries[0];
 			for (i = 0; i < psl->ucNumEntries; i++) {
 				rdev->pm.dpm.dyn_state.phase_shedding_limits_table.entries[i].sclk =
-					le16_to_cpu(psl->entries[i].usSclkLow) |
-					(psl->entries[i].ucSclkHigh << 16);
+					le16_to_cpu(entry->usSclkLow) | (entry->ucSclkHigh << 16);
 				rdev->pm.dpm.dyn_state.phase_shedding_limits_table.entries[i].mclk =
-					le16_to_cpu(psl->entries[i].usMclkLow) |
-					(psl->entries[i].ucMclkHigh << 16);
+					le16_to_cpu(entry->usMclkLow) | (entry->ucMclkHigh << 16);
 				rdev->pm.dpm.dyn_state.phase_shedding_limits_table.entries[i].voltage =
-					le16_to_cpu(psl->entries[i].usVoltage);
+					le16_to_cpu(entry->usVoltage);
+				entry = (ATOM_PPLIB_PhaseSheddingLimits_Record *)
+					((u8 *)entry + sizeof(ATOM_PPLIB_PhaseSheddingLimits_Record));
 			}
 			rdev->pm.dpm.dyn_state.phase_shedding_limits_table.count =
 				psl->ucNumEntries;
@@ -945,30 +982,140 @@
 				(ATOM_PPLIB_CAC_Leakage_Table *)
 				(mode_info->atom_context->bios + data_offset +
 				 le16_to_cpu(power_info->pplib5.usCACLeakageTableOffset));
+			ATOM_PPLIB_CAC_Leakage_Record *entry;
 			u32 size = cac_table->ucNumEntries * sizeof(struct radeon_cac_leakage_table);
 			rdev->pm.dpm.dyn_state.cac_leakage_table.entries = kzalloc(size, GFP_KERNEL);
 			if (!rdev->pm.dpm.dyn_state.cac_leakage_table.entries) {
-				kfree(rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries);
-				kfree(rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk.entries);
-				kfree(rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk.entries);
+				r600_free_extended_power_table(rdev);
 				return -ENOMEM;
 			}
+			entry = &cac_table->entries[0];
 			for (i = 0; i < cac_table->ucNumEntries; i++) {
-				rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].vddc =
-					le16_to_cpu(cac_table->entries[i].usVddc);
-				rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].leakage =
-					le32_to_cpu(cac_table->entries[i].ulLeakageValue);
+				if (rdev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_EVV) {
+					rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].vddc1 =
+						le16_to_cpu(entry->usVddc1);
+					rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].vddc2 =
+						le16_to_cpu(entry->usVddc2);
+					rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].vddc3 =
+						le16_to_cpu(entry->usVddc3);
+				} else {
+					rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].vddc =
+						le16_to_cpu(entry->usVddc);
+					rdev->pm.dpm.dyn_state.cac_leakage_table.entries[i].leakage =
+						le32_to_cpu(entry->ulLeakageValue);
+				}
+				entry = (ATOM_PPLIB_CAC_Leakage_Record *)
+					((u8 *)entry + sizeof(ATOM_PPLIB_CAC_Leakage_Record));
 			}
 			rdev->pm.dpm.dyn_state.cac_leakage_table.count = cac_table->ucNumEntries;
 		}
 	}
 
-	/* ppm table */
+	/* ext tables */
 	if (le16_to_cpu(power_info->pplib.usTableSize) >=
 	    sizeof(struct _ATOM_PPLIB_POWERPLAYTABLE3)) {
 		ATOM_PPLIB_EXTENDEDHEADER *ext_hdr = (ATOM_PPLIB_EXTENDEDHEADER *)
 			(mode_info->atom_context->bios + data_offset +
 			 le16_to_cpu(power_info->pplib3.usExtendendedHeaderOffset));
+		if ((le16_to_cpu(ext_hdr->usSize) >= SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V2) &&
+			ext_hdr->usVCETableOffset) {
+			VCEClockInfoArray *array = (VCEClockInfoArray *)
+				(mode_info->atom_context->bios + data_offset +
+                                 le16_to_cpu(ext_hdr->usVCETableOffset) + 1);
+			ATOM_PPLIB_VCE_Clock_Voltage_Limit_Table *limits =
+				(ATOM_PPLIB_VCE_Clock_Voltage_Limit_Table *)
+				(mode_info->atom_context->bios + data_offset +
+				 le16_to_cpu(ext_hdr->usVCETableOffset) + 1 +
+				 1 + array->ucNumEntries * sizeof(VCEClockInfo));
+			ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record *entry;
+			u32 size = limits->numEntries *
+				sizeof(struct radeon_vce_clock_voltage_dependency_entry);
+			rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries =
+				kzalloc(size, GFP_KERNEL);
+			if (!rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries) {
+				r600_free_extended_power_table(rdev);
+				return -ENOMEM;
+			}
+			rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.count =
+				limits->numEntries;
+			entry = &limits->entries[0];
+			for (i = 0; i < limits->numEntries; i++) {
+				VCEClockInfo *vce_clk = (VCEClockInfo *)
+					((u8 *)&array->entries[0] +
+					 (entry->ucVCEClockInfoIndex * sizeof(VCEClockInfo)));
+				rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries[i].evclk =
+					le16_to_cpu(vce_clk->usEVClkLow) | (vce_clk->ucEVClkHigh << 16);
+				rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries[i].ecclk =
+					le16_to_cpu(vce_clk->usECClkLow) | (vce_clk->ucECClkHigh << 16);
+				rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries[i].v =
+					le16_to_cpu(entry->usVoltage);
+				entry = (ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record *)
+					((u8 *)entry + sizeof(ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record));
+			}
+		}
+		if ((le16_to_cpu(ext_hdr->usSize) >= SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V3) &&
+			ext_hdr->usUVDTableOffset) {
+			UVDClockInfoArray *array = (UVDClockInfoArray *)
+				(mode_info->atom_context->bios + data_offset +
+				 le16_to_cpu(ext_hdr->usUVDTableOffset) + 1);
+			ATOM_PPLIB_UVD_Clock_Voltage_Limit_Table *limits =
+				(ATOM_PPLIB_UVD_Clock_Voltage_Limit_Table *)
+				(mode_info->atom_context->bios + data_offset +
+				 le16_to_cpu(ext_hdr->usUVDTableOffset) + 1 +
+				 1 + (array->ucNumEntries * sizeof (UVDClockInfo)));
+			ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record *entry;
+			u32 size = limits->numEntries *
+				sizeof(struct radeon_uvd_clock_voltage_dependency_entry);
+			rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries =
+				kzalloc(size, GFP_KERNEL);
+			if (!rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries) {
+				r600_free_extended_power_table(rdev);
+				return -ENOMEM;
+			}
+			rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.count =
+				limits->numEntries;
+			entry = &limits->entries[0];
+			for (i = 0; i < limits->numEntries; i++) {
+				UVDClockInfo *uvd_clk = (UVDClockInfo *)
+					((u8 *)&array->entries[0] +
+					 (entry->ucUVDClockInfoIndex * sizeof(UVDClockInfo)));
+				rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries[i].vclk =
+					le16_to_cpu(uvd_clk->usVClkLow) | (uvd_clk->ucVClkHigh << 16);
+				rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries[i].dclk =
+					le16_to_cpu(uvd_clk->usDClkLow) | (uvd_clk->ucDClkHigh << 16);
+				rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries[i].v =
+					le16_to_cpu(limits->entries[i].usVoltage);
+				entry = (ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record *)
+					((u8 *)entry + sizeof(ATOM_PPLIB_UVD_Clock_Voltage_Limit_Record));
+			}
+		}
+		if ((le16_to_cpu(ext_hdr->usSize) >= SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V4) &&
+			ext_hdr->usSAMUTableOffset) {
+			ATOM_PPLIB_SAMClk_Voltage_Limit_Table *limits =
+				(ATOM_PPLIB_SAMClk_Voltage_Limit_Table *)
+				(mode_info->atom_context->bios + data_offset +
+				 le16_to_cpu(ext_hdr->usSAMUTableOffset) + 1);
+			ATOM_PPLIB_SAMClk_Voltage_Limit_Record *entry;
+			u32 size = limits->numEntries *
+				sizeof(struct radeon_clock_voltage_dependency_entry);
+			rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries =
+				kzalloc(size, GFP_KERNEL);
+			if (!rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries) {
+				r600_free_extended_power_table(rdev);
+				return -ENOMEM;
+			}
+			rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.count =
+				limits->numEntries;
+			entry = &limits->entries[0];
+			for (i = 0; i < limits->numEntries; i++) {
+				rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries[i].clk =
+					le16_to_cpu(entry->usSAMClockLow) | (entry->ucSAMClockHigh << 16);
+				rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries[i].v =
+					le16_to_cpu(entry->usVoltage);
+				entry = (ATOM_PPLIB_SAMClk_Voltage_Limit_Record *)
+					((u8 *)entry + sizeof(ATOM_PPLIB_SAMClk_Voltage_Limit_Record));
+			}
+		}
 		if ((le16_to_cpu(ext_hdr->usSize) >= SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V5) &&
 		    ext_hdr->usPPMTableOffset) {
 			ATOM_PPLIB_PPM_Table *ppm = (ATOM_PPLIB_PPM_Table *)
@@ -977,10 +1124,7 @@
 			rdev->pm.dpm.dyn_state.ppm_table =
 				kzalloc(sizeof(struct radeon_ppm_table), GFP_KERNEL);
 			if (!rdev->pm.dpm.dyn_state.ppm_table) {
-				kfree(rdev->pm.dpm.dyn_state.vddc_dependency_on_sclk.entries);
-				kfree(rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk.entries);
-				kfree(rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk.entries);
-				kfree(rdev->pm.dpm.dyn_state.cac_leakage_table.entries);
+				r600_free_extended_power_table(rdev);
 				return -ENOMEM;
 			}
 			rdev->pm.dpm.dyn_state.ppm_table->ppm_design = ppm->ucPpmDesign;
@@ -1003,6 +1147,71 @@
 			rdev->pm.dpm.dyn_state.ppm_table->tj_max =
 				le32_to_cpu(ppm->ulTjmax);
 		}
+		if ((le16_to_cpu(ext_hdr->usSize) >= SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V6) &&
+			ext_hdr->usACPTableOffset) {
+			ATOM_PPLIB_ACPClk_Voltage_Limit_Table *limits =
+				(ATOM_PPLIB_ACPClk_Voltage_Limit_Table *)
+				(mode_info->atom_context->bios + data_offset +
+				 le16_to_cpu(ext_hdr->usACPTableOffset) + 1);
+			ATOM_PPLIB_ACPClk_Voltage_Limit_Record *entry;
+			u32 size = limits->numEntries *
+				sizeof(struct radeon_clock_voltage_dependency_entry);
+			rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries =
+				kzalloc(size, GFP_KERNEL);
+			if (!rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries) {
+				r600_free_extended_power_table(rdev);
+				return -ENOMEM;
+			}
+			rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.count =
+				limits->numEntries;
+			entry = &limits->entries[0];
+			for (i = 0; i < limits->numEntries; i++) {
+				rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries[i].clk =
+					le16_to_cpu(entry->usACPClockLow) | (entry->ucACPClockHigh << 16);
+				rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries[i].v =
+					le16_to_cpu(entry->usVoltage);
+				entry = (ATOM_PPLIB_ACPClk_Voltage_Limit_Record *)
+					((u8 *)entry + sizeof(ATOM_PPLIB_ACPClk_Voltage_Limit_Record));
+			}
+		}
+		if ((le16_to_cpu(ext_hdr->usSize) >= SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V7) &&
+			ext_hdr->usPowerTuneTableOffset) {
+			u8 rev = *(u8 *)(mode_info->atom_context->bios + data_offset +
+					 le16_to_cpu(ext_hdr->usPowerTuneTableOffset));
+			ATOM_PowerTune_Table *pt;
+			rdev->pm.dpm.dyn_state.cac_tdp_table =
+				kzalloc(sizeof(struct radeon_cac_tdp_table), GFP_KERNEL);
+			if (!rdev->pm.dpm.dyn_state.cac_tdp_table) {
+				r600_free_extended_power_table(rdev);
+				return -ENOMEM;
+			}
+			if (rev > 0) {
+				ATOM_PPLIB_POWERTUNE_Table_V1 *ppt = (ATOM_PPLIB_POWERTUNE_Table_V1 *)
+					(mode_info->atom_context->bios + data_offset +
+					 le16_to_cpu(ext_hdr->usPowerTuneTableOffset));
+				rdev->pm.dpm.dyn_state.cac_tdp_table->maximum_power_delivery_limit =
+					ppt->usMaximumPowerDeliveryLimit;
+				pt = &ppt->power_tune_table;
+			} else {
+				ATOM_PPLIB_POWERTUNE_Table *ppt = (ATOM_PPLIB_POWERTUNE_Table *)
+					(mode_info->atom_context->bios + data_offset +
+					 le16_to_cpu(ext_hdr->usPowerTuneTableOffset));
+				rdev->pm.dpm.dyn_state.cac_tdp_table->maximum_power_delivery_limit = 255;
+				pt = &ppt->power_tune_table;
+			}
+			rdev->pm.dpm.dyn_state.cac_tdp_table->tdp = le16_to_cpu(pt->usTDP);
+			rdev->pm.dpm.dyn_state.cac_tdp_table->configurable_tdp =
+				le16_to_cpu(pt->usConfigurableTDP);
+			rdev->pm.dpm.dyn_state.cac_tdp_table->tdc = le16_to_cpu(pt->usTDC);
+			rdev->pm.dpm.dyn_state.cac_tdp_table->battery_power_limit =
+				le16_to_cpu(pt->usBatteryPowerLimit);
+			rdev->pm.dpm.dyn_state.cac_tdp_table->small_power_limit =
+				le16_to_cpu(pt->usSmallPowerLimit);
+			rdev->pm.dpm.dyn_state.cac_tdp_table->low_cac_leakage =
+				le16_to_cpu(pt->usLowCACLeakage);
+			rdev->pm.dpm.dyn_state.cac_tdp_table->high_cac_leakage =
+				le16_to_cpu(pt->usHighCACLeakage);
+		}
 	}
 
 	return 0;
@@ -1016,12 +1225,24 @@
 		kfree(rdev->pm.dpm.dyn_state.vddci_dependency_on_mclk.entries);
 	if (rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk.entries)
 		kfree(rdev->pm.dpm.dyn_state.vddc_dependency_on_mclk.entries);
+	if (rdev->pm.dpm.dyn_state.mvdd_dependency_on_mclk.entries)
+		kfree(rdev->pm.dpm.dyn_state.mvdd_dependency_on_mclk.entries);
 	if (rdev->pm.dpm.dyn_state.cac_leakage_table.entries)
 		kfree(rdev->pm.dpm.dyn_state.cac_leakage_table.entries);
 	if (rdev->pm.dpm.dyn_state.phase_shedding_limits_table.entries)
 		kfree(rdev->pm.dpm.dyn_state.phase_shedding_limits_table.entries);
 	if (rdev->pm.dpm.dyn_state.ppm_table)
 		kfree(rdev->pm.dpm.dyn_state.ppm_table);
+	if (rdev->pm.dpm.dyn_state.cac_tdp_table)
+		kfree(rdev->pm.dpm.dyn_state.cac_tdp_table);
+	if (rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries)
+		kfree(rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries);
+	if (rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries)
+		kfree(rdev->pm.dpm.dyn_state.uvd_clock_voltage_dependency_table.entries);
+	if (rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries)
+		kfree(rdev->pm.dpm.dyn_state.samu_clock_voltage_dependency_table.entries);
+	if (rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries)
+		kfree(rdev->pm.dpm.dyn_state.acp_clock_voltage_dependency_table.entries);
 }
 
 enum radeon_pcie_gen r600_get_pcie_gen_support(struct radeon_device *rdev,
@@ -1046,3 +1267,36 @@
 	}
 	return RADEON_PCIE_GEN1;
 }
+
+u16 r600_get_pcie_lane_support(struct radeon_device *rdev,
+			       u16 asic_lanes,
+			       u16 default_lanes)
+{
+	switch (asic_lanes) {
+	case 0:
+	default:
+		return default_lanes;
+	case 1:
+		return 1;
+	case 2:
+		return 2;
+	case 4:
+		return 4;
+	case 8:
+		return 8;
+	case 12:
+		return 12;
+	case 16:
+		return 16;
+	}
+}
+
+u8 r600_encode_pci_lane_width(u32 lanes)
+{
+	u8 encoded_lanes[] = { 0, 1, 2, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 6 };
+
+	if (lanes > 16)
+		return 0;
+
+	return encoded_lanes[lanes];
+}
diff --git a/drivers/gpu/drm/radeon/r600_dpm.h b/drivers/gpu/drm/radeon/r600_dpm.h
index 7c822d9..1000bf9 100644
--- a/drivers/gpu/drm/radeon/r600_dpm.h
+++ b/drivers/gpu/drm/radeon/r600_dpm.h
@@ -130,6 +130,7 @@
 void r600_dpm_print_ps_status(struct radeon_device *rdev,
 			      struct radeon_ps *rps);
 u32 r600_dpm_get_vblank_time(struct radeon_device *rdev);
+u32 r600_dpm_get_vrefresh(struct radeon_device *rdev);
 bool r600_is_uvd_state(u32 class, u32 class2);
 void r600_calculate_u_and_p(u32 i, u32 r_c, u32 p_b,
 			    u32 *p, u32 *u);
@@ -224,4 +225,9 @@
 					       enum radeon_pcie_gen asic_gen,
 					       enum radeon_pcie_gen default_gen);
 
+u16 r600_get_pcie_lane_support(struct radeon_device *rdev,
+			       u16 asic_lanes,
+			       u16 default_lanes);
+u8 r600_encode_pci_lane_width(u32 lanes);
+
 #endif
diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c
index f48240b..f443010 100644
--- a/drivers/gpu/drm/radeon/r600_hdmi.c
+++ b/drivers/gpu/drm/radeon/r600_hdmi.c
@@ -226,10 +226,29 @@
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
 	struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
 	u32 base_rate = 24000;
+	u32 max_ratio = clock / base_rate;
+	u32 dto_phase;
+	u32 dto_modulo = clock;
+	u32 wallclock_ratio;
+	u32 dto_cntl;
 
 	if (!dig || !dig->afmt)
 		return;
 
+	if (max_ratio >= 8) {
+		dto_phase = 192 * 1000;
+		wallclock_ratio = 3;
+	} else if (max_ratio >= 4) {
+		dto_phase = 96 * 1000;
+		wallclock_ratio = 2;
+	} else if (max_ratio >= 2) {
+		dto_phase = 48 * 1000;
+		wallclock_ratio = 1;
+	} else {
+		dto_phase = 24 * 1000;
+		wallclock_ratio = 0;
+	}
+
 	/* there are two DTOs selected by DCCG_AUDIO_DTO_SELECT.
 	 * doesn't matter which one you use.  Just use the first one.
 	 */
@@ -242,9 +261,21 @@
 		/* according to the reg specs, this should DCE3.2 only, but in
 		 * practice it seems to cover DCE3.0 as well.
 		 */
-		WREG32(DCCG_AUDIO_DTO0_PHASE, base_rate * 100);
-		WREG32(DCCG_AUDIO_DTO0_MODULE, clock * 100);
-		WREG32(DCCG_AUDIO_DTO_SELECT, 0); /* select DTO0 */
+		if (dig->dig_encoder == 0) {
+			dto_cntl = RREG32(DCCG_AUDIO_DTO0_CNTL) & ~DCCG_AUDIO_DTO_WALLCLOCK_RATIO_MASK;
+			dto_cntl |= DCCG_AUDIO_DTO_WALLCLOCK_RATIO(wallclock_ratio);
+			WREG32(DCCG_AUDIO_DTO0_CNTL, dto_cntl);
+			WREG32(DCCG_AUDIO_DTO0_PHASE, dto_phase);
+			WREG32(DCCG_AUDIO_DTO0_MODULE, dto_modulo);
+			WREG32(DCCG_AUDIO_DTO_SELECT, 0); /* select DTO0 */
+		} else {
+			dto_cntl = RREG32(DCCG_AUDIO_DTO1_CNTL) & ~DCCG_AUDIO_DTO_WALLCLOCK_RATIO_MASK;
+			dto_cntl |= DCCG_AUDIO_DTO_WALLCLOCK_RATIO(wallclock_ratio);
+			WREG32(DCCG_AUDIO_DTO1_CNTL, dto_cntl);
+			WREG32(DCCG_AUDIO_DTO1_PHASE, dto_phase);
+			WREG32(DCCG_AUDIO_DTO1_MODULE, dto_modulo);
+			WREG32(DCCG_AUDIO_DTO_SELECT, 1); /* select DTO1 */
+		}
 	} else {
 		/* according to the reg specs, this should be DCE2.0 and DCE3.0 */
 		WREG32(AUDIO_DTO, AUDIO_DTO_PHASE(base_rate / 10) |
@@ -252,6 +283,107 @@
 	}
 }
 
+static void dce3_2_afmt_write_speaker_allocation(struct drm_encoder *encoder)
+{
+	struct radeon_device *rdev = encoder->dev->dev_private;
+	struct drm_connector *connector;
+	struct radeon_connector *radeon_connector = NULL;
+	u32 tmp;
+	u8 *sadb;
+	int sad_count;
+
+	list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+		if (connector->encoder == encoder)
+			radeon_connector = to_radeon_connector(connector);
+	}
+
+	if (!radeon_connector) {
+		DRM_ERROR("Couldn't find encoder's connector\n");
+		return;
+	}
+
+	sad_count = drm_edid_to_speaker_allocation(radeon_connector->edid, &sadb);
+	if (sad_count < 0) {
+		DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count);
+		return;
+	}
+
+	/* program the speaker allocation */
+	tmp = RREG32(AZ_F0_CODEC_PIN0_CONTROL_CHANNEL_SPEAKER);
+	tmp &= ~(DP_CONNECTION | SPEAKER_ALLOCATION_MASK);
+	/* set HDMI mode */
+	tmp |= HDMI_CONNECTION;
+	if (sad_count)
+		tmp |= SPEAKER_ALLOCATION(sadb[0]);
+	else
+		tmp |= SPEAKER_ALLOCATION(5); /* stereo */
+	WREG32(AZ_F0_CODEC_PIN0_CONTROL_CHANNEL_SPEAKER, tmp);
+
+	kfree(sadb);
+}
+
+static void dce3_2_afmt_write_sad_regs(struct drm_encoder *encoder)
+{
+	struct radeon_device *rdev = encoder->dev->dev_private;
+	struct drm_connector *connector;
+	struct radeon_connector *radeon_connector = NULL;
+	struct cea_sad *sads;
+	int i, sad_count;
+
+	static const u16 eld_reg_to_type[][2] = {
+		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR0, HDMI_AUDIO_CODING_TYPE_PCM },
+		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR1, HDMI_AUDIO_CODING_TYPE_AC3 },
+		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR2, HDMI_AUDIO_CODING_TYPE_MPEG1 },
+		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR3, HDMI_AUDIO_CODING_TYPE_MP3 },
+		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR4, HDMI_AUDIO_CODING_TYPE_MPEG2 },
+		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR5, HDMI_AUDIO_CODING_TYPE_AAC_LC },
+		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR6, HDMI_AUDIO_CODING_TYPE_DTS },
+		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR7, HDMI_AUDIO_CODING_TYPE_ATRAC },
+		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR9, HDMI_AUDIO_CODING_TYPE_EAC3 },
+		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR10, HDMI_AUDIO_CODING_TYPE_DTS_HD },
+		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR11, HDMI_AUDIO_CODING_TYPE_MLP },
+		{ AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO },
+	};
+
+	list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) {
+		if (connector->encoder == encoder)
+			radeon_connector = to_radeon_connector(connector);
+	}
+
+	if (!radeon_connector) {
+		DRM_ERROR("Couldn't find encoder's connector\n");
+		return;
+	}
+
+	sad_count = drm_edid_to_sad(radeon_connector->edid, &sads);
+	if (sad_count < 0) {
+		DRM_ERROR("Couldn't read SADs: %d\n", sad_count);
+		return;
+	}
+	BUG_ON(!sads);
+
+	for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) {
+		u32 value = 0;
+		int j;
+
+		for (j = 0; j < sad_count; j++) {
+			struct cea_sad *sad = &sads[j];
+
+			if (sad->format == eld_reg_to_type[i][1]) {
+				value = MAX_CHANNELS(sad->channels) |
+					DESCRIPTOR_BYTE_2(sad->byte2) |
+					SUPPORTED_FREQUENCIES(sad->freq);
+				if (sad->format == HDMI_AUDIO_CODING_TYPE_PCM)
+					value |= SUPPORTED_FREQUENCIES_STEREO(sad->freq);
+				break;
+			}
+		}
+		WREG32(eld_reg_to_type[i][0], value);
+	}
+
+	kfree(sads);
+}
+
 /*
  * update the info frames with the data from the current display mode
  */
@@ -296,6 +428,11 @@
 		       HDMI0_60958_CS_UPDATE); /* allow 60958 channel status fields to be updated */
 	}
 
+	if (ASIC_IS_DCE32(rdev)) {
+		dce3_2_afmt_write_speaker_allocation(encoder);
+		dce3_2_afmt_write_sad_regs(encoder);
+	}
+
 	WREG32(HDMI0_ACR_PACKET_CONTROL + offset,
 	       HDMI0_ACR_AUTO_SEND | /* allow hw to sent ACR packets when required */
 	       HDMI0_ACR_SOURCE); /* select SW CTS value */
@@ -351,7 +488,7 @@
 	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
 	struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
-	struct r600_audio audio = r600_audio_status(rdev);
+	struct r600_audio_pin audio = r600_audio_status(rdev);
 	uint8_t buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AUDIO_INFOFRAME_SIZE];
 	struct hdmi_audio_infoframe frame;
 	uint32_t offset;
@@ -460,6 +597,11 @@
 	if (!enable && !dig->afmt->enabled)
 		return;
 
+	if (enable)
+		dig->afmt->pin = r600_audio_get_pin(rdev);
+	else
+		dig->afmt->pin = NULL;
+
 	/* Older chipsets require setting HDMI and routing manually */
 	if (!ASIC_IS_DCE3(rdev)) {
 		if (enable)
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
index 8e3fe81..454f90a 100644
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -933,6 +933,9 @@
 #define DCCG_AUDIO_DTO0_LOAD              0x051c
 #       define DTO_LOAD                   (1 << 31)
 #define DCCG_AUDIO_DTO0_CNTL              0x0520
+#       define DCCG_AUDIO_DTO_WALLCLOCK_RATIO(x) (((x) & 7) << 0)
+#       define DCCG_AUDIO_DTO_WALLCLOCK_RATIO_MASK 7
+#       define DCCG_AUDIO_DTO_WALLCLOCK_RATIO_SHIFT 0
 
 #define DCCG_AUDIO_DTO1_PHASE             0x0524
 #define DCCG_AUDIO_DTO1_MODULE            0x0528
@@ -957,6 +960,42 @@
 #       define DIG_MODE_SDVO             4
 #define DIG1_CNTL                        0x79a0
 
+#define AZ_F0_CODEC_PIN0_CONTROL_CHANNEL_SPEAKER          0x71bc
+#define		SPEAKER_ALLOCATION(x)			(((x) & 0x7f) << 0)
+#define		SPEAKER_ALLOCATION_MASK			(0x7f << 0)
+#define		SPEAKER_ALLOCATION_SHIFT		0
+#define		HDMI_CONNECTION				(1 << 16)
+#define		DP_CONNECTION				(1 << 17)
+
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR0        0x71c8 /* LPCM */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR1        0x71cc /* AC3 */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR2        0x71d0 /* MPEG1 */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR3        0x71d4 /* MP3 */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR4        0x71d8 /* MPEG2 */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR5        0x71dc /* AAC */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR6        0x71e0 /* DTS */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR7        0x71e4 /* ATRAC */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR8        0x71e8 /* one bit audio - leave at 0 (default) */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR9        0x71ec /* Dolby Digital */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR10       0x71f0 /* DTS-HD */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR11       0x71f4 /* MAT-MLP */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR12       0x71f8 /* DTS */
+#define AZ_F0_CODEC_PIN0_CONTROL_AUDIO_DESCRIPTOR13       0x71fc /* WMA Pro */
+#       define MAX_CHANNELS(x)                            (((x) & 0x7) << 0)
+/* max channels minus one.  7 = 8 channels */
+#       define SUPPORTED_FREQUENCIES(x)                   (((x) & 0xff) << 8)
+#       define DESCRIPTOR_BYTE_2(x)                       (((x) & 0xff) << 16)
+#       define SUPPORTED_FREQUENCIES_STEREO(x)            (((x) & 0xff) << 24) /* LPCM only */
+/* SUPPORTED_FREQUENCIES, SUPPORTED_FREQUENCIES_STEREO
+ * bit0 = 32 kHz
+ * bit1 = 44.1 kHz
+ * bit2 = 48 kHz
+ * bit3 = 88.2 kHz
+ * bit4 = 96 kHz
+ * bit5 = 176.4 kHz
+ * bit6 = 192 kHz
+ */
+
 /* rs6xx/rs740 and r6xx share the same HDMI blocks, however, rs6xx has only one
  * instance of the blocks while r6xx has 2.  DCE 3.0 cards are slightly
  * different due to the new DIG blocks, but also have 2 instances.
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 2f08219..ff8b564 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -152,6 +152,47 @@
 #define RADEON_RESET_MC				(1 << 10)
 #define RADEON_RESET_DISPLAY			(1 << 11)
 
+/* CG block flags */
+#define RADEON_CG_BLOCK_GFX			(1 << 0)
+#define RADEON_CG_BLOCK_MC			(1 << 1)
+#define RADEON_CG_BLOCK_SDMA			(1 << 2)
+#define RADEON_CG_BLOCK_UVD			(1 << 3)
+#define RADEON_CG_BLOCK_VCE			(1 << 4)
+#define RADEON_CG_BLOCK_HDP			(1 << 5)
+#define RADEON_CG_BLOCK_BIF			(1 << 6)
+
+/* CG flags */
+#define RADEON_CG_SUPPORT_GFX_MGCG		(1 << 0)
+#define RADEON_CG_SUPPORT_GFX_MGLS		(1 << 1)
+#define RADEON_CG_SUPPORT_GFX_CGCG		(1 << 2)
+#define RADEON_CG_SUPPORT_GFX_CGLS		(1 << 3)
+#define RADEON_CG_SUPPORT_GFX_CGTS		(1 << 4)
+#define RADEON_CG_SUPPORT_GFX_CGTS_LS		(1 << 5)
+#define RADEON_CG_SUPPORT_GFX_CP_LS		(1 << 6)
+#define RADEON_CG_SUPPORT_GFX_RLC_LS		(1 << 7)
+#define RADEON_CG_SUPPORT_MC_LS			(1 << 8)
+#define RADEON_CG_SUPPORT_MC_MGCG		(1 << 9)
+#define RADEON_CG_SUPPORT_SDMA_LS		(1 << 10)
+#define RADEON_CG_SUPPORT_SDMA_MGCG		(1 << 11)
+#define RADEON_CG_SUPPORT_BIF_LS		(1 << 12)
+#define RADEON_CG_SUPPORT_UVD_MGCG		(1 << 13)
+#define RADEON_CG_SUPPORT_VCE_MGCG		(1 << 14)
+#define RADEON_CG_SUPPORT_HDP_LS		(1 << 15)
+#define RADEON_CG_SUPPORT_HDP_MGCG		(1 << 16)
+
+/* PG flags */
+#define RADEON_PG_SUPPORT_GFX_CG		(1 << 0)
+#define RADEON_PG_SUPPORT_GFX_SMG		(1 << 1)
+#define RADEON_PG_SUPPORT_GFX_DMG		(1 << 2)
+#define RADEON_PG_SUPPORT_UVD			(1 << 3)
+#define RADEON_PG_SUPPORT_VCE			(1 << 4)
+#define RADEON_PG_SUPPORT_CP			(1 << 5)
+#define RADEON_PG_SUPPORT_GDS			(1 << 6)
+#define RADEON_PG_SUPPORT_RLC_SMU_HS		(1 << 7)
+#define RADEON_PG_SUPPORT_SDMA			(1 << 8)
+#define RADEON_PG_SUPPORT_ACP			(1 << 9)
+#define RADEON_PG_SUPPORT_SAMU			(1 << 10)
+
 /* max cursor sizes (in pixels) */
 #define CURSOR_WIDTH 64
 #define CURSOR_HEIGHT 64
@@ -238,6 +279,12 @@
 int radeon_atom_get_leakage_vddc_based_on_leakage_idx(struct radeon_device *rdev,
 						      u16 *voltage,
 						      u16 leakage_idx);
+int radeon_atom_get_leakage_id_from_vbios(struct radeon_device *rdev,
+					  u16 *leakage_id);
+int radeon_atom_get_leakage_vddc_based_on_leakage_params(struct radeon_device *rdev,
+							 u16 *vddc, u16 *vddci,
+							 u16 virtual_voltage_id,
+							 u16 vbios_voltage_id);
 int radeon_atom_round_to_true_voltage(struct radeon_device *rdev,
 				      u8 voltage_type,
 				      u16 nominal_voltage,
@@ -492,9 +539,6 @@
 int radeon_mode_dumb_mmap(struct drm_file *filp,
 			  struct drm_device *dev,
 			  uint32_t handle, uint64_t *offset_p);
-int radeon_mode_dumb_destroy(struct drm_file *file_priv,
-			     struct drm_device *dev,
-			     uint32_t handle);
 
 /*
  * Semaphores.
@@ -682,7 +726,7 @@
 
 #define RADEON_MAX_HPD_PINS 6
 #define RADEON_MAX_CRTCS 6
-#define RADEON_MAX_AFMT_BLOCKS 6
+#define RADEON_MAX_AFMT_BLOCKS 7
 
 struct radeon_irq {
 	bool				installed;
@@ -746,8 +790,6 @@
 	uint32_t		align_mask;
 	uint32_t		ptr_mask;
 	bool			ready;
-	u32			ptr_reg_shift;
-	u32			ptr_reg_mask;
 	u32			nop;
 	u32			idx;
 	u64			last_semaphore_signal_addr;
@@ -844,35 +886,6 @@
 	bool                    enabled;
 };
 
-struct r600_blit_cp_primitives {
-	void (*set_render_target)(struct radeon_device *rdev, int format,
-				  int w, int h, u64 gpu_addr);
-	void (*cp_set_surface_sync)(struct radeon_device *rdev,
-				    u32 sync_type, u32 size,
-				    u64 mc_addr);
-	void (*set_shaders)(struct radeon_device *rdev);
-	void (*set_vtx_resource)(struct radeon_device *rdev, u64 gpu_addr);
-	void (*set_tex_resource)(struct radeon_device *rdev,
-				 int format, int w, int h, int pitch,
-				 u64 gpu_addr, u32 size);
-	void (*set_scissors)(struct radeon_device *rdev, int x1, int y1,
-			     int x2, int y2);
-	void (*draw_auto)(struct radeon_device *rdev);
-	void (*set_default_state)(struct radeon_device *rdev);
-};
-
-struct r600_blit {
-	struct radeon_bo	*shader_obj;
-	struct r600_blit_cp_primitives primitives;
-	int max_dim;
-	int ring_size_common;
-	int ring_size_per_loop;
-	u64 shader_gpu_addr;
-	u32 vs_offset, ps_offset;
-	u32 state_offset;
-	u32 state_len;
-};
-
 /*
  * RLC stuff
  */
@@ -883,13 +896,19 @@
 	struct radeon_bo	*save_restore_obj;
 	uint64_t		save_restore_gpu_addr;
 	volatile uint32_t	*sr_ptr;
-	u32                     *reg_list;
+	const u32               *reg_list;
 	u32                     reg_list_size;
 	/* for clear state */
 	struct radeon_bo	*clear_state_obj;
 	uint64_t		clear_state_gpu_addr;
 	volatile uint32_t	*cs_ptr;
-	struct cs_section_def   *cs_data;
+	const struct cs_section_def   *cs_data;
+	u32                     clear_state_size;
+	/* for cp tables */
+	struct radeon_bo	*cp_table_obj;
+	uint64_t		cp_table_gpu_addr;
+	volatile uint32_t	*cp_table_ptr;
+	u32                     cp_table_size;
 };
 
 int radeon_ib_get(struct radeon_device *rdev, int ring,
@@ -921,8 +940,7 @@
 int radeon_ring_restore(struct radeon_device *rdev, struct radeon_ring *ring,
 			unsigned size, uint32_t *data);
 int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ring_size,
-		     unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg,
-		     u32 ptr_reg_shift, u32 ptr_reg_mask, u32 nop);
+		     unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg, u32 nop);
 void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *cp);
 
 
@@ -1036,7 +1054,6 @@
 #define R600_WB_DMA_RPTR_OFFSET   1792
 #define R600_WB_IH_WPTR_OFFSET   2048
 #define CAYMAN_WB_DMA1_RPTR_OFFSET   2304
-#define R600_WB_UVD_RPTR_OFFSET  2560
 #define R600_WB_EVENT_OFFSET     3072
 #define CIK_WB_CP1_WPTR_OFFSET     3328
 #define CIK_WB_CP2_WPTR_OFFSET     3584
@@ -1147,6 +1164,7 @@
 	THERMAL_TYPE_SI,
 	THERMAL_TYPE_EMC2103_WITH_INTERNAL,
 	THERMAL_TYPE_CI,
+	THERMAL_TYPE_KV,
 };
 
 struct radeon_voltage {
@@ -1220,6 +1238,9 @@
 	/* UVD clocks */
 	u32 vclk;
 	u32 dclk;
+	/* VCE clocks */
+	u32 evclk;
+	u32 ecclk;
 	/* asic priv */
 	void *ps_priv;
 };
@@ -1270,14 +1291,21 @@
 	struct radeon_clock_voltage_dependency_entry *entries;
 };
 
-struct radeon_cac_leakage_entry {
-	u16 vddc;
-	u32 leakage;
+union radeon_cac_leakage_entry {
+	struct {
+		u16 vddc;
+		u32 leakage;
+	};
+	struct {
+		u16 vddc1;
+		u16 vddc2;
+		u16 vddc3;
+	};
 };
 
 struct radeon_cac_leakage_table {
 	u32 count;
-	struct radeon_cac_leakage_entry *entries;
+	union radeon_cac_leakage_entry *entries;
 };
 
 struct radeon_phase_shedding_limits_entry {
@@ -1291,6 +1319,28 @@
 	struct radeon_phase_shedding_limits_entry *entries;
 };
 
+struct radeon_uvd_clock_voltage_dependency_entry {
+	u32 vclk;
+	u32 dclk;
+	u16 v;
+};
+
+struct radeon_uvd_clock_voltage_dependency_table {
+	u8 count;
+	struct radeon_uvd_clock_voltage_dependency_entry *entries;
+};
+
+struct radeon_vce_clock_voltage_dependency_entry {
+	u32 ecclk;
+	u32 evclk;
+	u16 v;
+};
+
+struct radeon_vce_clock_voltage_dependency_table {
+	u8 count;
+	struct radeon_vce_clock_voltage_dependency_entry *entries;
+};
+
 struct radeon_ppm_table {
 	u8 ppm_design;
 	u16 cpu_core_number;
@@ -1304,11 +1354,27 @@
 	u32 tj_max;
 };
 
+struct radeon_cac_tdp_table {
+	u16 tdp;
+	u16 configurable_tdp;
+	u16 tdc;
+	u16 battery_power_limit;
+	u16 small_power_limit;
+	u16 low_cac_leakage;
+	u16 high_cac_leakage;
+	u16 maximum_power_delivery_limit;
+};
+
 struct radeon_dpm_dynamic_state {
 	struct radeon_clock_voltage_dependency_table vddc_dependency_on_sclk;
 	struct radeon_clock_voltage_dependency_table vddci_dependency_on_mclk;
 	struct radeon_clock_voltage_dependency_table vddc_dependency_on_mclk;
+	struct radeon_clock_voltage_dependency_table mvdd_dependency_on_mclk;
 	struct radeon_clock_voltage_dependency_table vddc_dependency_on_dispclk;
+	struct radeon_uvd_clock_voltage_dependency_table uvd_clock_voltage_dependency_table;
+	struct radeon_vce_clock_voltage_dependency_table vce_clock_voltage_dependency_table;
+	struct radeon_clock_voltage_dependency_table samu_clock_voltage_dependency_table;
+	struct radeon_clock_voltage_dependency_table acp_clock_voltage_dependency_table;
 	struct radeon_clock_array valid_sclk_values;
 	struct radeon_clock_array valid_mclk_values;
 	struct radeon_clock_and_voltage_limits max_clock_voltage_on_dc;
@@ -1320,6 +1386,7 @@
 	struct radeon_cac_leakage_table cac_leakage_table;
 	struct radeon_phase_shedding_limits_table phase_shedding_limits_table;
 	struct radeon_ppm_table *ppm_table;
+	struct radeon_cac_tdp_table *cac_tdp_table;
 };
 
 struct radeon_dpm_fan {
@@ -1389,11 +1456,12 @@
 	struct radeon_dpm_thermal thermal;
 	/* forced levels */
 	enum radeon_dpm_forced_level forced_level;
+	/* track UVD streams */
+	unsigned sd;
+	unsigned hd;
 };
 
-void radeon_dpm_enable_power_state(struct radeon_device *rdev,
-				    enum radeon_pm_state_type dpm_state);
-
+void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable);
 
 struct radeon_pm {
 	struct mutex		mutex;
@@ -1468,9 +1536,9 @@
 	void			*cpu_addr;
 	uint64_t		gpu_addr;
 	void			*saved_bo;
-	unsigned		fw_size;
 	atomic_t		handles[RADEON_MAX_UVD_HANDLES];
 	struct drm_file		*filp[RADEON_MAX_UVD_HANDLES];
+	unsigned		img_size[RADEON_MAX_UVD_HANDLES];
 	struct delayed_work	idle_work;
 };
 
@@ -1499,12 +1567,21 @@
 int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev,
                                 unsigned cg_upll_func_cntl);
 
-struct r600_audio {
+struct r600_audio_pin {
 	int			channels;
 	int			rate;
 	int			bits_per_sample;
 	u8			status_bits;
 	u8			category_code;
+	u32			offset;
+	bool			connected;
+	u32			id;
+};
+
+struct r600_audio {
+	bool enabled;
+	struct r600_audio_pin pin[RADEON_MAX_AFMT_BLOCKS];
+	int num_pins;
 };
 
 /*
@@ -1536,6 +1613,34 @@
 			     unsigned nfiles);
 int radeon_debugfs_fence_init(struct radeon_device *rdev);
 
+/*
+ * ASIC ring specific functions.
+ */
+struct radeon_asic_ring {
+	/* ring read/write ptr handling */
+	u32 (*get_rptr)(struct radeon_device *rdev, struct radeon_ring *ring);
+	u32 (*get_wptr)(struct radeon_device *rdev, struct radeon_ring *ring);
+	void (*set_wptr)(struct radeon_device *rdev, struct radeon_ring *ring);
+
+	/* validating and patching of IBs */
+	int (*ib_parse)(struct radeon_device *rdev, struct radeon_ib *ib);
+	int (*cs_parse)(struct radeon_cs_parser *p);
+
+	/* command emmit functions */
+	void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib);
+	void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence);
+	void (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp,
+			       struct radeon_semaphore *semaphore, bool emit_wait);
+	void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
+
+	/* testing functions */
+	int (*ring_test)(struct radeon_device *rdev, struct radeon_ring *cp);
+	int (*ib_test)(struct radeon_device *rdev, struct radeon_ring *cp);
+	bool (*is_lockup)(struct radeon_device *rdev, struct radeon_ring *cp);
+
+	/* deprecated */
+	void (*ring_start)(struct radeon_device *rdev, struct radeon_ring *cp);
+};
 
 /*
  * ASIC specific functions.
@@ -1579,23 +1684,7 @@
 				 uint32_t incr, uint32_t flags);
 	} vm;
 	/* ring specific callbacks */
-	struct {
-		void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib);
-		int (*ib_parse)(struct radeon_device *rdev, struct radeon_ib *ib);
-		void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence);
-		void (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp,
-				       struct radeon_semaphore *semaphore, bool emit_wait);
-		int (*cs_parse)(struct radeon_cs_parser *p);
-		void (*ring_start)(struct radeon_device *rdev, struct radeon_ring *cp);
-		int (*ring_test)(struct radeon_device *rdev, struct radeon_ring *cp);
-		int (*ib_test)(struct radeon_device *rdev, struct radeon_ring *cp);
-		bool (*is_lockup)(struct radeon_device *rdev, struct radeon_ring *cp);
-		void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
-
-		u32 (*get_rptr)(struct radeon_device *rdev, struct radeon_ring *ring);
-		u32 (*get_wptr)(struct radeon_device *rdev, struct radeon_ring *ring);
-		void (*set_wptr)(struct radeon_device *rdev, struct radeon_ring *ring);
-	} ring[RADEON_NUM_RINGS];
+	struct radeon_asic_ring *ring[RADEON_NUM_RINGS];
 	/* irqs */
 	struct {
 		int (*set)(struct radeon_device *rdev);
@@ -1688,6 +1777,7 @@
 		void (*debugfs_print_current_performance_level)(struct radeon_device *rdev, struct seq_file *m);
 		int (*force_performance_level)(struct radeon_device *rdev, enum radeon_dpm_forced_level level);
 		bool (*vblank_too_short)(struct radeon_device *rdev);
+		void (*powergate_uvd)(struct radeon_device *rdev, bool gate);
 	} dpm;
 	/* pageflipping */
 	struct {
@@ -2066,7 +2156,7 @@
 	const struct firmware *mec_fw;	/* CIK MEC firmware */
 	const struct firmware *sdma_fw;	/* CIK SDMA firmware */
 	const struct firmware *smc_fw;	/* SMC firmware */
-	struct r600_blit r600_blit;
+	const struct firmware *uvd_fw;	/* UVD firmware */
 	struct r600_vram_scratch vram_scratch;
 	int msi_enabled; /* msi enabled */
 	struct r600_ih ih; /* r6/700 interrupt ring */
@@ -2077,9 +2167,8 @@
 	struct work_struct reset_work;
 	int num_crtc; /* number of crtcs */
 	struct mutex dc_hw_i2c_mutex; /* display controller hw i2c mutex */
-	bool audio_enabled;
 	bool has_uvd;
-	struct r600_audio audio_status; /* audio stuff */
+	struct r600_audio audio; /* audio stuff */
 	struct notifier_block acpi_nb;
 	/* only one userspace can use Hyperz features or CMASK at a time */
 	struct drm_file *hyperz_filp;
@@ -2095,6 +2184,11 @@
 	/* ACPI interface */
 	struct radeon_atif		atif;
 	struct radeon_atcs		atcs;
+	/* srbm instance registers */
+	struct mutex			srbm_mutex;
+	/* clock, powergating flags */
+	u32 cg_flags;
+	u32 pg_flags;
 };
 
 int radeon_device_init(struct radeon_device *rdev,
@@ -2153,6 +2247,8 @@
 #define WREG32_PIF_PHY1(reg, v) eg_pif_phy1_wreg(rdev, (reg), (v))
 #define RREG32_UVD_CTX(reg) r600_uvd_ctx_rreg(rdev, (reg))
 #define WREG32_UVD_CTX(reg, v) r600_uvd_ctx_wreg(rdev, (reg), (v))
+#define RREG32_DIDT(reg) cik_didt_rreg(rdev, (reg))
+#define WREG32_DIDT(reg, v) cik_didt_wreg(rdev, (reg), (v))
 #define WREG32_P(reg, val, mask)				\
 	do {							\
 		uint32_t tmp_ = RREG32(reg);			\
@@ -2161,7 +2257,7 @@
 		WREG32(reg, tmp_);				\
 	} while (0)
 #define WREG32_AND(reg, and) WREG32_P(reg, 0, and)
-#define WREG32_OR(reg, or) WREG32_P(reg, or, ~or)
+#define WREG32_OR(reg, or) WREG32_P(reg, or, ~(or))
 #define WREG32_PLL_P(reg, val, mask)				\
 	do {							\
 		uint32_t tmp_ = RREG32_PLL(reg);		\
@@ -2284,6 +2380,22 @@
 	WREG32(R600_UVD_CTX_DATA, (v));
 }
 
+
+static inline u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
+{
+	u32 r;
+
+	WREG32(CIK_DIDT_IND_INDEX, (reg));
+	r = RREG32(CIK_DIDT_IND_DATA);
+	return r;
+}
+
+static inline void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+	WREG32(CIK_DIDT_IND_INDEX, (reg));
+	WREG32(CIK_DIDT_IND_DATA, (v));
+}
+
 void r100_pll_errata_after_index(struct radeon_device *rdev);
 
 
@@ -2379,7 +2491,7 @@
 #define radeon_fini(rdev) (rdev)->asic->fini((rdev))
 #define radeon_resume(rdev) (rdev)->asic->resume((rdev))
 #define radeon_suspend(rdev) (rdev)->asic->suspend((rdev))
-#define radeon_cs_parse(rdev, r, p) (rdev)->asic->ring[(r)].cs_parse((p))
+#define radeon_cs_parse(rdev, r, p) (rdev)->asic->ring[(r)]->cs_parse((p))
 #define radeon_vga_set_state(rdev, state) (rdev)->asic->vga_set_state((rdev), (state))
 #define radeon_asic_reset(rdev) (rdev)->asic->asic_reset((rdev))
 #define radeon_gart_tlb_flush(rdev) (rdev)->asic->gart.tlb_flush((rdev))
@@ -2387,16 +2499,16 @@
 #define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev))
 #define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev))
 #define radeon_asic_vm_set_page(rdev, ib, pe, addr, count, incr, flags) ((rdev)->asic->vm.set_page((rdev), (ib), (pe), (addr), (count), (incr), (flags)))
-#define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)].ring_start((rdev), (cp))
-#define radeon_ring_test(rdev, r, cp) (rdev)->asic->ring[(r)].ring_test((rdev), (cp))
-#define radeon_ib_test(rdev, r, cp) (rdev)->asic->ring[(r)].ib_test((rdev), (cp))
-#define radeon_ring_ib_execute(rdev, r, ib) (rdev)->asic->ring[(r)].ib_execute((rdev), (ib))
-#define radeon_ring_ib_parse(rdev, r, ib) (rdev)->asic->ring[(r)].ib_parse((rdev), (ib))
-#define radeon_ring_is_lockup(rdev, r, cp) (rdev)->asic->ring[(r)].is_lockup((rdev), (cp))
-#define radeon_ring_vm_flush(rdev, r, vm) (rdev)->asic->ring[(r)].vm_flush((rdev), (r), (vm))
-#define radeon_ring_get_rptr(rdev, r) (rdev)->asic->ring[(r)->idx].get_rptr((rdev), (r))
-#define radeon_ring_get_wptr(rdev, r) (rdev)->asic->ring[(r)->idx].get_wptr((rdev), (r))
-#define radeon_ring_set_wptr(rdev, r) (rdev)->asic->ring[(r)->idx].set_wptr((rdev), (r))
+#define radeon_ring_start(rdev, r, cp) (rdev)->asic->ring[(r)]->ring_start((rdev), (cp))
+#define radeon_ring_test(rdev, r, cp) (rdev)->asic->ring[(r)]->ring_test((rdev), (cp))
+#define radeon_ib_test(rdev, r, cp) (rdev)->asic->ring[(r)]->ib_test((rdev), (cp))
+#define radeon_ring_ib_execute(rdev, r, ib) (rdev)->asic->ring[(r)]->ib_execute((rdev), (ib))
+#define radeon_ring_ib_parse(rdev, r, ib) (rdev)->asic->ring[(r)]->ib_parse((rdev), (ib))
+#define radeon_ring_is_lockup(rdev, r, cp) (rdev)->asic->ring[(r)]->is_lockup((rdev), (cp))
+#define radeon_ring_vm_flush(rdev, r, vm) (rdev)->asic->ring[(r)]->vm_flush((rdev), (r), (vm))
+#define radeon_ring_get_rptr(rdev, r) (rdev)->asic->ring[(r)->idx]->get_rptr((rdev), (r))
+#define radeon_ring_get_wptr(rdev, r) (rdev)->asic->ring[(r)->idx]->get_wptr((rdev), (r))
+#define radeon_ring_set_wptr(rdev, r) (rdev)->asic->ring[(r)->idx]->set_wptr((rdev), (r))
 #define radeon_irq_set(rdev) (rdev)->asic->irq.set((rdev))
 #define radeon_irq_process(rdev) (rdev)->asic->irq.process((rdev))
 #define radeon_get_vblank_counter(rdev, crtc) (rdev)->asic->display.get_vblank_counter((rdev), (crtc))
@@ -2404,8 +2516,8 @@
 #define radeon_get_backlight_level(rdev, e) (rdev)->asic->display.get_backlight_level((e))
 #define radeon_hdmi_enable(rdev, e, b) (rdev)->asic->display.hdmi_enable((e), (b))
 #define radeon_hdmi_setmode(rdev, e, m) (rdev)->asic->display.hdmi_setmode((e), (m))
-#define radeon_fence_ring_emit(rdev, r, fence) (rdev)->asic->ring[(r)].emit_fence((rdev), (fence))
-#define radeon_semaphore_ring_emit(rdev, r, cp, semaphore, emit_wait) (rdev)->asic->ring[(r)].emit_semaphore((rdev), (cp), (semaphore), (emit_wait))
+#define radeon_fence_ring_emit(rdev, r, fence) (rdev)->asic->ring[(r)]->emit_fence((rdev), (fence))
+#define radeon_semaphore_ring_emit(rdev, r, cp, semaphore, emit_wait) (rdev)->asic->ring[(r)]->emit_semaphore((rdev), (cp), (semaphore), (emit_wait))
 #define radeon_copy_blit(rdev, s, d, np, f) (rdev)->asic->copy.blit((rdev), (s), (d), (np), (f))
 #define radeon_copy_dma(rdev, s, d, np, f) (rdev)->asic->copy.dma((rdev), (s), (d), (np), (f))
 #define radeon_copy(rdev, s, d, np, f) (rdev)->asic->copy.copy((rdev), (s), (d), (np), (f))
@@ -2456,6 +2568,7 @@
 #define radeon_dpm_debugfs_print_current_performance_level(rdev, m) rdev->asic->dpm.debugfs_print_current_performance_level((rdev), (m))
 #define radeon_dpm_force_performance_level(rdev, l) rdev->asic->dpm.force_performance_level((rdev), (l))
 #define radeon_dpm_vblank_too_short(rdev) rdev->asic->dpm.vblank_too_short((rdev))
+#define radeon_dpm_powergate_uvd(rdev, g) rdev->asic->dpm.powergate_uvd((rdev), (g))
 
 /* Common functions */
 /* AGP */
@@ -2522,6 +2635,8 @@
 
 /* audio */
 void r600_audio_update_hdmi(struct work_struct *work);
+struct r600_audio_pin *r600_audio_get_pin(struct radeon_device *rdev);
+struct r600_audio_pin *dce6_audio_get_pin(struct radeon_device *rdev);
 
 /*
  * R600 vram scratch functions
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index 78bec1a..630853b 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -172,6 +172,21 @@
 /*
  * ASIC
  */
+
+static struct radeon_asic_ring r100_gfx_ring = {
+	.ib_execute = &r100_ring_ib_execute,
+	.emit_fence = &r100_fence_ring_emit,
+	.emit_semaphore = &r100_semaphore_ring_emit,
+	.cs_parse = &r100_cs_parse,
+	.ring_start = &r100_ring_start,
+	.ring_test = &r100_ring_test,
+	.ib_test = &r100_ib_test,
+	.is_lockup = &r100_gpu_is_lockup,
+	.get_rptr = &radeon_ring_generic_get_rptr,
+	.get_wptr = &radeon_ring_generic_get_wptr,
+	.set_wptr = &radeon_ring_generic_set_wptr,
+};
+
 static struct radeon_asic r100_asic = {
 	.init = &r100_init,
 	.fini = &r100_fini,
@@ -187,19 +202,7 @@
 		.set_page = &r100_pci_gart_set_page,
 	},
 	.ring = {
-		[RADEON_RING_TYPE_GFX_INDEX] = {
-			.ib_execute = &r100_ring_ib_execute,
-			.emit_fence = &r100_fence_ring_emit,
-			.emit_semaphore = &r100_semaphore_ring_emit,
-			.cs_parse = &r100_cs_parse,
-			.ring_start = &r100_ring_start,
-			.ring_test = &r100_ring_test,
-			.ib_test = &r100_ib_test,
-			.is_lockup = &r100_gpu_is_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		}
+		[RADEON_RING_TYPE_GFX_INDEX] = &r100_gfx_ring
 	},
 	.irq = {
 		.set = &r100_irq_set,
@@ -266,19 +269,7 @@
 		.set_page = &r100_pci_gart_set_page,
 	},
 	.ring = {
-		[RADEON_RING_TYPE_GFX_INDEX] = {
-			.ib_execute = &r100_ring_ib_execute,
-			.emit_fence = &r100_fence_ring_emit,
-			.emit_semaphore = &r100_semaphore_ring_emit,
-			.cs_parse = &r100_cs_parse,
-			.ring_start = &r100_ring_start,
-			.ring_test = &r100_ring_test,
-			.ib_test = &r100_ib_test,
-			.is_lockup = &r100_gpu_is_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		}
+		[RADEON_RING_TYPE_GFX_INDEX] = &r100_gfx_ring
 	},
 	.irq = {
 		.set = &r100_irq_set,
@@ -330,6 +321,20 @@
 	},
 };
 
+static struct radeon_asic_ring r300_gfx_ring = {
+	.ib_execute = &r100_ring_ib_execute,
+	.emit_fence = &r300_fence_ring_emit,
+	.emit_semaphore = &r100_semaphore_ring_emit,
+	.cs_parse = &r300_cs_parse,
+	.ring_start = &r300_ring_start,
+	.ring_test = &r100_ring_test,
+	.ib_test = &r100_ib_test,
+	.is_lockup = &r100_gpu_is_lockup,
+	.get_rptr = &radeon_ring_generic_get_rptr,
+	.get_wptr = &radeon_ring_generic_get_wptr,
+	.set_wptr = &radeon_ring_generic_set_wptr,
+};
+
 static struct radeon_asic r300_asic = {
 	.init = &r300_init,
 	.fini = &r300_fini,
@@ -345,19 +350,7 @@
 		.set_page = &r100_pci_gart_set_page,
 	},
 	.ring = {
-		[RADEON_RING_TYPE_GFX_INDEX] = {
-			.ib_execute = &r100_ring_ib_execute,
-			.emit_fence = &r300_fence_ring_emit,
-			.emit_semaphore = &r100_semaphore_ring_emit,
-			.cs_parse = &r300_cs_parse,
-			.ring_start = &r300_ring_start,
-			.ring_test = &r100_ring_test,
-			.ib_test = &r100_ib_test,
-			.is_lockup = &r100_gpu_is_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		}
+		[RADEON_RING_TYPE_GFX_INDEX] = &r300_gfx_ring
 	},
 	.irq = {
 		.set = &r100_irq_set,
@@ -424,19 +417,7 @@
 		.set_page = &rv370_pcie_gart_set_page,
 	},
 	.ring = {
-		[RADEON_RING_TYPE_GFX_INDEX] = {
-			.ib_execute = &r100_ring_ib_execute,
-			.emit_fence = &r300_fence_ring_emit,
-			.emit_semaphore = &r100_semaphore_ring_emit,
-			.cs_parse = &r300_cs_parse,
-			.ring_start = &r300_ring_start,
-			.ring_test = &r100_ring_test,
-			.ib_test = &r100_ib_test,
-			.is_lockup = &r100_gpu_is_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		}
+		[RADEON_RING_TYPE_GFX_INDEX] = &r300_gfx_ring
 	},
 	.irq = {
 		.set = &r100_irq_set,
@@ -503,19 +484,7 @@
 		.set_page = &rv370_pcie_gart_set_page,
 	},
 	.ring = {
-		[RADEON_RING_TYPE_GFX_INDEX] = {
-			.ib_execute = &r100_ring_ib_execute,
-			.emit_fence = &r300_fence_ring_emit,
-			.emit_semaphore = &r100_semaphore_ring_emit,
-			.cs_parse = &r300_cs_parse,
-			.ring_start = &r300_ring_start,
-			.ring_test = &r100_ring_test,
-			.ib_test = &r100_ib_test,
-			.is_lockup = &r100_gpu_is_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		}
+		[RADEON_RING_TYPE_GFX_INDEX] = &r300_gfx_ring
 	},
 	.irq = {
 		.set = &r100_irq_set,
@@ -582,19 +551,7 @@
 		.set_page = &rs400_gart_set_page,
 	},
 	.ring = {
-		[RADEON_RING_TYPE_GFX_INDEX] = {
-			.ib_execute = &r100_ring_ib_execute,
-			.emit_fence = &r300_fence_ring_emit,
-			.emit_semaphore = &r100_semaphore_ring_emit,
-			.cs_parse = &r300_cs_parse,
-			.ring_start = &r300_ring_start,
-			.ring_test = &r100_ring_test,
-			.ib_test = &r100_ib_test,
-			.is_lockup = &r100_gpu_is_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		}
+		[RADEON_RING_TYPE_GFX_INDEX] = &r300_gfx_ring
 	},
 	.irq = {
 		.set = &r100_irq_set,
@@ -661,19 +618,7 @@
 		.set_page = &rs600_gart_set_page,
 	},
 	.ring = {
-		[RADEON_RING_TYPE_GFX_INDEX] = {
-			.ib_execute = &r100_ring_ib_execute,
-			.emit_fence = &r300_fence_ring_emit,
-			.emit_semaphore = &r100_semaphore_ring_emit,
-			.cs_parse = &r300_cs_parse,
-			.ring_start = &r300_ring_start,
-			.ring_test = &r100_ring_test,
-			.ib_test = &r100_ib_test,
-			.is_lockup = &r100_gpu_is_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		}
+		[RADEON_RING_TYPE_GFX_INDEX] = &r300_gfx_ring
 	},
 	.irq = {
 		.set = &rs600_irq_set,
@@ -742,19 +687,7 @@
 		.set_page = &rs400_gart_set_page,
 	},
 	.ring = {
-		[RADEON_RING_TYPE_GFX_INDEX] = {
-			.ib_execute = &r100_ring_ib_execute,
-			.emit_fence = &r300_fence_ring_emit,
-			.emit_semaphore = &r100_semaphore_ring_emit,
-			.cs_parse = &r300_cs_parse,
-			.ring_start = &r300_ring_start,
-			.ring_test = &r100_ring_test,
-			.ib_test = &r100_ib_test,
-			.is_lockup = &r100_gpu_is_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		}
+		[RADEON_RING_TYPE_GFX_INDEX] = &r300_gfx_ring
 	},
 	.irq = {
 		.set = &rs600_irq_set,
@@ -823,19 +756,7 @@
 		.set_page = &rv370_pcie_gart_set_page,
 	},
 	.ring = {
-		[RADEON_RING_TYPE_GFX_INDEX] = {
-			.ib_execute = &r100_ring_ib_execute,
-			.emit_fence = &r300_fence_ring_emit,
-			.emit_semaphore = &r100_semaphore_ring_emit,
-			.cs_parse = &r300_cs_parse,
-			.ring_start = &rv515_ring_start,
-			.ring_test = &r100_ring_test,
-			.ib_test = &r100_ib_test,
-			.is_lockup = &r100_gpu_is_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		}
+		[RADEON_RING_TYPE_GFX_INDEX] = &r300_gfx_ring
 	},
 	.irq = {
 		.set = &rs600_irq_set,
@@ -902,19 +823,7 @@
 		.set_page = &rv370_pcie_gart_set_page,
 	},
 	.ring = {
-		[RADEON_RING_TYPE_GFX_INDEX] = {
-			.ib_execute = &r100_ring_ib_execute,
-			.emit_fence = &r300_fence_ring_emit,
-			.emit_semaphore = &r100_semaphore_ring_emit,
-			.cs_parse = &r300_cs_parse,
-			.ring_start = &rv515_ring_start,
-			.ring_test = &r100_ring_test,
-			.ib_test = &r100_ib_test,
-			.is_lockup = &r100_gpu_is_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		}
+		[RADEON_RING_TYPE_GFX_INDEX] = &r300_gfx_ring
 	},
 	.irq = {
 		.set = &rs600_irq_set,
@@ -966,6 +875,32 @@
 	},
 };
 
+static struct radeon_asic_ring r600_gfx_ring = {
+	.ib_execute = &r600_ring_ib_execute,
+	.emit_fence = &r600_fence_ring_emit,
+	.emit_semaphore = &r600_semaphore_ring_emit,
+	.cs_parse = &r600_cs_parse,
+	.ring_test = &r600_ring_test,
+	.ib_test = &r600_ib_test,
+	.is_lockup = &r600_gfx_is_lockup,
+	.get_rptr = &radeon_ring_generic_get_rptr,
+	.get_wptr = &radeon_ring_generic_get_wptr,
+	.set_wptr = &radeon_ring_generic_set_wptr,
+};
+
+static struct radeon_asic_ring r600_dma_ring = {
+	.ib_execute = &r600_dma_ring_ib_execute,
+	.emit_fence = &r600_dma_fence_ring_emit,
+	.emit_semaphore = &r600_dma_semaphore_ring_emit,
+	.cs_parse = &r600_dma_cs_parse,
+	.ring_test = &r600_dma_ring_test,
+	.ib_test = &r600_dma_ib_test,
+	.is_lockup = &r600_dma_is_lockup,
+	.get_rptr = &r600_dma_get_rptr,
+	.get_wptr = &r600_dma_get_wptr,
+	.set_wptr = &r600_dma_set_wptr,
+};
+
 static struct radeon_asic r600_asic = {
 	.init = &r600_init,
 	.fini = &r600_fini,
@@ -983,30 +918,8 @@
 		.set_page = &rs600_gart_set_page,
 	},
 	.ring = {
-		[RADEON_RING_TYPE_GFX_INDEX] = {
-			.ib_execute = &r600_ring_ib_execute,
-			.emit_fence = &r600_fence_ring_emit,
-			.emit_semaphore = &r600_semaphore_ring_emit,
-			.cs_parse = &r600_cs_parse,
-			.ring_test = &r600_ring_test,
-			.ib_test = &r600_ib_test,
-			.is_lockup = &r600_gfx_is_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[R600_RING_TYPE_DMA_INDEX] = {
-			.ib_execute = &r600_dma_ring_ib_execute,
-			.emit_fence = &r600_dma_fence_ring_emit,
-			.emit_semaphore = &r600_dma_semaphore_ring_emit,
-			.cs_parse = &r600_dma_cs_parse,
-			.ring_test = &r600_dma_ring_test,
-			.ib_test = &r600_dma_ib_test,
-			.is_lockup = &r600_dma_is_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		}
+		[RADEON_RING_TYPE_GFX_INDEX] = &r600_gfx_ring,
+		[R600_RING_TYPE_DMA_INDEX] = &r600_dma_ring,
 	},
 	.irq = {
 		.set = &r600_irq_set,
@@ -1022,7 +935,7 @@
 		.hdmi_setmode = &r600_hdmi_setmode,
 	},
 	.copy = {
-		.blit = &r600_copy_blit,
+		.blit = &r600_copy_cpdma,
 		.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
 		.dma = &r600_copy_dma,
 		.dma_ring_index = R600_RING_TYPE_DMA_INDEX,
@@ -1078,30 +991,8 @@
 		.set_page = &rs600_gart_set_page,
 	},
 	.ring = {
-		[RADEON_RING_TYPE_GFX_INDEX] = {
-			.ib_execute = &r600_ring_ib_execute,
-			.emit_fence = &r600_fence_ring_emit,
-			.emit_semaphore = &r600_semaphore_ring_emit,
-			.cs_parse = &r600_cs_parse,
-			.ring_test = &r600_ring_test,
-			.ib_test = &r600_ib_test,
-			.is_lockup = &r600_gfx_is_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[R600_RING_TYPE_DMA_INDEX] = {
-			.ib_execute = &r600_dma_ring_ib_execute,
-			.emit_fence = &r600_dma_fence_ring_emit,
-			.emit_semaphore = &r600_dma_semaphore_ring_emit,
-			.cs_parse = &r600_dma_cs_parse,
-			.ring_test = &r600_dma_ring_test,
-			.ib_test = &r600_dma_ib_test,
-			.is_lockup = &r600_dma_is_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		}
+		[RADEON_RING_TYPE_GFX_INDEX] = &r600_gfx_ring,
+		[R600_RING_TYPE_DMA_INDEX] = &r600_dma_ring,
 	},
 	.irq = {
 		.set = &r600_irq_set,
@@ -1115,7 +1006,7 @@
 		.get_backlight_level = &atombios_get_backlight_level,
 	},
 	.copy = {
-		.blit = &r600_copy_blit,
+		.blit = &r600_copy_cpdma,
 		.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
 		.dma = &r600_copy_dma,
 		.dma_ring_index = R600_RING_TYPE_DMA_INDEX,
@@ -1161,6 +1052,7 @@
 		.get_mclk = &rv6xx_dpm_get_mclk,
 		.print_power_state = &rv6xx_dpm_print_power_state,
 		.debugfs_print_current_performance_level = &rv6xx_dpm_debugfs_print_current_performance_level,
+		.force_performance_level = &rv6xx_dpm_force_performance_level,
 	},
 	.pflip = {
 		.pre_page_flip = &rs600_pre_page_flip,
@@ -1186,30 +1078,8 @@
 		.set_page = &rs600_gart_set_page,
 	},
 	.ring = {
-		[RADEON_RING_TYPE_GFX_INDEX] = {
-			.ib_execute = &r600_ring_ib_execute,
-			.emit_fence = &r600_fence_ring_emit,
-			.emit_semaphore = &r600_semaphore_ring_emit,
-			.cs_parse = &r600_cs_parse,
-			.ring_test = &r600_ring_test,
-			.ib_test = &r600_ib_test,
-			.is_lockup = &r600_gfx_is_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[R600_RING_TYPE_DMA_INDEX] = {
-			.ib_execute = &r600_dma_ring_ib_execute,
-			.emit_fence = &r600_dma_fence_ring_emit,
-			.emit_semaphore = &r600_dma_semaphore_ring_emit,
-			.cs_parse = &r600_dma_cs_parse,
-			.ring_test = &r600_dma_ring_test,
-			.ib_test = &r600_dma_ib_test,
-			.is_lockup = &r600_dma_is_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		}
+		[RADEON_RING_TYPE_GFX_INDEX] = &r600_gfx_ring,
+		[R600_RING_TYPE_DMA_INDEX] = &r600_dma_ring,
 	},
 	.irq = {
 		.set = &r600_irq_set,
@@ -1225,7 +1095,7 @@
 		.hdmi_setmode = &r600_hdmi_setmode,
 	},
 	.copy = {
-		.blit = &r600_copy_blit,
+		.blit = &r600_copy_cpdma,
 		.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
 		.dma = &r600_copy_dma,
 		.dma_ring_index = R600_RING_TYPE_DMA_INDEX,
@@ -1279,6 +1149,19 @@
 	},
 };
 
+static struct radeon_asic_ring rv770_uvd_ring = {
+	.ib_execute = &uvd_v1_0_ib_execute,
+	.emit_fence = &uvd_v2_2_fence_emit,
+	.emit_semaphore = &uvd_v1_0_semaphore_emit,
+	.cs_parse = &radeon_uvd_cs_parse,
+	.ring_test = &uvd_v1_0_ring_test,
+	.ib_test = &uvd_v1_0_ib_test,
+	.is_lockup = &radeon_ring_test_lockup,
+	.get_rptr = &uvd_v1_0_get_rptr,
+	.get_wptr = &uvd_v1_0_get_wptr,
+	.set_wptr = &uvd_v1_0_set_wptr,
+};
+
 static struct radeon_asic rv770_asic = {
 	.init = &rv770_init,
 	.fini = &rv770_fini,
@@ -1296,42 +1179,9 @@
 		.set_page = &rs600_gart_set_page,
 	},
 	.ring = {
-		[RADEON_RING_TYPE_GFX_INDEX] = {
-			.ib_execute = &r600_ring_ib_execute,
-			.emit_fence = &r600_fence_ring_emit,
-			.emit_semaphore = &r600_semaphore_ring_emit,
-			.cs_parse = &r600_cs_parse,
-			.ring_test = &r600_ring_test,
-			.ib_test = &r600_ib_test,
-			.is_lockup = &r600_gfx_is_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[R600_RING_TYPE_DMA_INDEX] = {
-			.ib_execute = &r600_dma_ring_ib_execute,
-			.emit_fence = &r600_dma_fence_ring_emit,
-			.emit_semaphore = &r600_dma_semaphore_ring_emit,
-			.cs_parse = &r600_dma_cs_parse,
-			.ring_test = &r600_dma_ring_test,
-			.ib_test = &r600_dma_ib_test,
-			.is_lockup = &r600_dma_is_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[R600_RING_TYPE_UVD_INDEX] = {
-			.ib_execute = &r600_uvd_ib_execute,
-			.emit_fence = &r600_uvd_fence_emit,
-			.emit_semaphore = &r600_uvd_semaphore_emit,
-			.cs_parse = &radeon_uvd_cs_parse,
-			.ring_test = &r600_uvd_ring_test,
-			.ib_test = &r600_uvd_ib_test,
-			.is_lockup = &radeon_ring_test_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		}
+		[RADEON_RING_TYPE_GFX_INDEX] = &r600_gfx_ring,
+		[R600_RING_TYPE_DMA_INDEX] = &r600_dma_ring,
+		[R600_RING_TYPE_UVD_INDEX] = &rv770_uvd_ring,
 	},
 	.irq = {
 		.set = &r600_irq_set,
@@ -1347,7 +1197,7 @@
 		.hdmi_setmode = &r600_hdmi_setmode,
 	},
 	.copy = {
-		.blit = &r600_copy_blit,
+		.blit = &r600_copy_cpdma,
 		.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
 		.dma = &rv770_copy_dma,
 		.dma_ring_index = R600_RING_TYPE_DMA_INDEX,
@@ -1404,6 +1254,32 @@
 	},
 };
 
+static struct radeon_asic_ring evergreen_gfx_ring = {
+	.ib_execute = &evergreen_ring_ib_execute,
+	.emit_fence = &r600_fence_ring_emit,
+	.emit_semaphore = &r600_semaphore_ring_emit,
+	.cs_parse = &evergreen_cs_parse,
+	.ring_test = &r600_ring_test,
+	.ib_test = &r600_ib_test,
+	.is_lockup = &evergreen_gfx_is_lockup,
+	.get_rptr = &radeon_ring_generic_get_rptr,
+	.get_wptr = &radeon_ring_generic_get_wptr,
+	.set_wptr = &radeon_ring_generic_set_wptr,
+};
+
+static struct radeon_asic_ring evergreen_dma_ring = {
+	.ib_execute = &evergreen_dma_ring_ib_execute,
+	.emit_fence = &evergreen_dma_fence_ring_emit,
+	.emit_semaphore = &r600_dma_semaphore_ring_emit,
+	.cs_parse = &evergreen_dma_cs_parse,
+	.ring_test = &r600_dma_ring_test,
+	.ib_test = &r600_dma_ib_test,
+	.is_lockup = &evergreen_dma_is_lockup,
+	.get_rptr = &r600_dma_get_rptr,
+	.get_wptr = &r600_dma_get_wptr,
+	.set_wptr = &r600_dma_set_wptr,
+};
+
 static struct radeon_asic evergreen_asic = {
 	.init = &evergreen_init,
 	.fini = &evergreen_fini,
@@ -1421,42 +1297,9 @@
 		.set_page = &rs600_gart_set_page,
 	},
 	.ring = {
-		[RADEON_RING_TYPE_GFX_INDEX] = {
-			.ib_execute = &evergreen_ring_ib_execute,
-			.emit_fence = &r600_fence_ring_emit,
-			.emit_semaphore = &r600_semaphore_ring_emit,
-			.cs_parse = &evergreen_cs_parse,
-			.ring_test = &r600_ring_test,
-			.ib_test = &r600_ib_test,
-			.is_lockup = &evergreen_gfx_is_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[R600_RING_TYPE_DMA_INDEX] = {
-			.ib_execute = &evergreen_dma_ring_ib_execute,
-			.emit_fence = &evergreen_dma_fence_ring_emit,
-			.emit_semaphore = &r600_dma_semaphore_ring_emit,
-			.cs_parse = &evergreen_dma_cs_parse,
-			.ring_test = &r600_dma_ring_test,
-			.ib_test = &r600_dma_ib_test,
-			.is_lockup = &evergreen_dma_is_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[R600_RING_TYPE_UVD_INDEX] = {
-			.ib_execute = &r600_uvd_ib_execute,
-			.emit_fence = &r600_uvd_fence_emit,
-			.emit_semaphore = &r600_uvd_semaphore_emit,
-			.cs_parse = &radeon_uvd_cs_parse,
-			.ring_test = &r600_uvd_ring_test,
-			.ib_test = &r600_uvd_ib_test,
-			.is_lockup = &radeon_ring_test_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		}
+		[RADEON_RING_TYPE_GFX_INDEX] = &evergreen_gfx_ring,
+		[R600_RING_TYPE_DMA_INDEX] = &evergreen_dma_ring,
+		[R600_RING_TYPE_UVD_INDEX] = &rv770_uvd_ring,
 	},
 	.irq = {
 		.set = &evergreen_irq_set,
@@ -1472,7 +1315,7 @@
 		.hdmi_setmode = &evergreen_hdmi_setmode,
 	},
 	.copy = {
-		.blit = &r600_copy_blit,
+		.blit = &r600_copy_cpdma,
 		.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
 		.dma = &evergreen_copy_dma,
 		.dma_ring_index = R600_RING_TYPE_DMA_INDEX,
@@ -1546,42 +1389,9 @@
 		.set_page = &rs600_gart_set_page,
 	},
 	.ring = {
-		[RADEON_RING_TYPE_GFX_INDEX] = {
-			.ib_execute = &evergreen_ring_ib_execute,
-			.emit_fence = &r600_fence_ring_emit,
-			.emit_semaphore = &r600_semaphore_ring_emit,
-			.cs_parse = &evergreen_cs_parse,
-			.ring_test = &r600_ring_test,
-			.ib_test = &r600_ib_test,
-			.is_lockup = &evergreen_gfx_is_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[R600_RING_TYPE_DMA_INDEX] = {
-			.ib_execute = &evergreen_dma_ring_ib_execute,
-			.emit_fence = &evergreen_dma_fence_ring_emit,
-			.emit_semaphore = &r600_dma_semaphore_ring_emit,
-			.cs_parse = &evergreen_dma_cs_parse,
-			.ring_test = &r600_dma_ring_test,
-			.ib_test = &r600_dma_ib_test,
-			.is_lockup = &evergreen_dma_is_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[R600_RING_TYPE_UVD_INDEX] = {
-			.ib_execute = &r600_uvd_ib_execute,
-			.emit_fence = &r600_uvd_fence_emit,
-			.emit_semaphore = &r600_uvd_semaphore_emit,
-			.cs_parse = &radeon_uvd_cs_parse,
-			.ring_test = &r600_uvd_ring_test,
-			.ib_test = &r600_uvd_ib_test,
-			.is_lockup = &radeon_ring_test_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		}
+		[RADEON_RING_TYPE_GFX_INDEX] = &evergreen_gfx_ring,
+		[R600_RING_TYPE_DMA_INDEX] = &evergreen_dma_ring,
+		[R600_RING_TYPE_UVD_INDEX] = &rv770_uvd_ring,
 	},
 	.irq = {
 		.set = &evergreen_irq_set,
@@ -1597,7 +1407,7 @@
 		.hdmi_setmode = &evergreen_hdmi_setmode,
 	},
 	.copy = {
-		.blit = &r600_copy_blit,
+		.blit = &r600_copy_cpdma,
 		.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
 		.dma = &evergreen_copy_dma,
 		.dma_ring_index = R600_RING_TYPE_DMA_INDEX,
@@ -1670,42 +1480,9 @@
 		.set_page = &rs600_gart_set_page,
 	},
 	.ring = {
-		[RADEON_RING_TYPE_GFX_INDEX] = {
-			.ib_execute = &evergreen_ring_ib_execute,
-			.emit_fence = &r600_fence_ring_emit,
-			.emit_semaphore = &r600_semaphore_ring_emit,
-			.cs_parse = &evergreen_cs_parse,
-			.ring_test = &r600_ring_test,
-			.ib_test = &r600_ib_test,
-			.is_lockup = &evergreen_gfx_is_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[R600_RING_TYPE_DMA_INDEX] = {
-			.ib_execute = &evergreen_dma_ring_ib_execute,
-			.emit_fence = &evergreen_dma_fence_ring_emit,
-			.emit_semaphore = &r600_dma_semaphore_ring_emit,
-			.cs_parse = &evergreen_dma_cs_parse,
-			.ring_test = &r600_dma_ring_test,
-			.ib_test = &r600_dma_ib_test,
-			.is_lockup = &evergreen_dma_is_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[R600_RING_TYPE_UVD_INDEX] = {
-			.ib_execute = &r600_uvd_ib_execute,
-			.emit_fence = &r600_uvd_fence_emit,
-			.emit_semaphore = &r600_uvd_semaphore_emit,
-			.cs_parse = &radeon_uvd_cs_parse,
-			.ring_test = &r600_uvd_ring_test,
-			.ib_test = &r600_uvd_ib_test,
-			.is_lockup = &radeon_ring_test_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		}
+		[RADEON_RING_TYPE_GFX_INDEX] = &evergreen_gfx_ring,
+		[R600_RING_TYPE_DMA_INDEX] = &evergreen_dma_ring,
+		[R600_RING_TYPE_UVD_INDEX] = &rv770_uvd_ring,
 	},
 	.irq = {
 		.set = &evergreen_irq_set,
@@ -1721,7 +1498,7 @@
 		.hdmi_setmode = &evergreen_hdmi_setmode,
 	},
 	.copy = {
-		.blit = &r600_copy_blit,
+		.blit = &r600_copy_cpdma,
 		.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
 		.dma = &evergreen_copy_dma,
 		.dma_ring_index = R600_RING_TYPE_DMA_INDEX,
@@ -1778,6 +1555,49 @@
 	},
 };
 
+static struct radeon_asic_ring cayman_gfx_ring = {
+	.ib_execute = &cayman_ring_ib_execute,
+	.ib_parse = &evergreen_ib_parse,
+	.emit_fence = &cayman_fence_ring_emit,
+	.emit_semaphore = &r600_semaphore_ring_emit,
+	.cs_parse = &evergreen_cs_parse,
+	.ring_test = &r600_ring_test,
+	.ib_test = &r600_ib_test,
+	.is_lockup = &cayman_gfx_is_lockup,
+	.vm_flush = &cayman_vm_flush,
+	.get_rptr = &radeon_ring_generic_get_rptr,
+	.get_wptr = &radeon_ring_generic_get_wptr,
+	.set_wptr = &radeon_ring_generic_set_wptr,
+};
+
+static struct radeon_asic_ring cayman_dma_ring = {
+	.ib_execute = &cayman_dma_ring_ib_execute,
+	.ib_parse = &evergreen_dma_ib_parse,
+	.emit_fence = &evergreen_dma_fence_ring_emit,
+	.emit_semaphore = &r600_dma_semaphore_ring_emit,
+	.cs_parse = &evergreen_dma_cs_parse,
+	.ring_test = &r600_dma_ring_test,
+	.ib_test = &r600_dma_ib_test,
+	.is_lockup = &cayman_dma_is_lockup,
+	.vm_flush = &cayman_dma_vm_flush,
+	.get_rptr = &r600_dma_get_rptr,
+	.get_wptr = &r600_dma_get_wptr,
+	.set_wptr = &r600_dma_set_wptr
+};
+
+static struct radeon_asic_ring cayman_uvd_ring = {
+	.ib_execute = &uvd_v1_0_ib_execute,
+	.emit_fence = &uvd_v2_2_fence_emit,
+	.emit_semaphore = &uvd_v3_1_semaphore_emit,
+	.cs_parse = &radeon_uvd_cs_parse,
+	.ring_test = &uvd_v1_0_ring_test,
+	.ib_test = &uvd_v1_0_ib_test,
+	.is_lockup = &radeon_ring_test_lockup,
+	.get_rptr = &uvd_v1_0_get_rptr,
+	.get_wptr = &uvd_v1_0_get_wptr,
+	.set_wptr = &uvd_v1_0_set_wptr,
+};
+
 static struct radeon_asic cayman_asic = {
 	.init = &cayman_init,
 	.fini = &cayman_fini,
@@ -1801,88 +1621,12 @@
 		.set_page = &cayman_vm_set_page,
 	},
 	.ring = {
-		[RADEON_RING_TYPE_GFX_INDEX] = {
-			.ib_execute = &cayman_ring_ib_execute,
-			.ib_parse = &evergreen_ib_parse,
-			.emit_fence = &cayman_fence_ring_emit,
-			.emit_semaphore = &r600_semaphore_ring_emit,
-			.cs_parse = &evergreen_cs_parse,
-			.ring_test = &r600_ring_test,
-			.ib_test = &r600_ib_test,
-			.is_lockup = &cayman_gfx_is_lockup,
-			.vm_flush = &cayman_vm_flush,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[CAYMAN_RING_TYPE_CP1_INDEX] = {
-			.ib_execute = &cayman_ring_ib_execute,
-			.ib_parse = &evergreen_ib_parse,
-			.emit_fence = &cayman_fence_ring_emit,
-			.emit_semaphore = &r600_semaphore_ring_emit,
-			.cs_parse = &evergreen_cs_parse,
-			.ring_test = &r600_ring_test,
-			.ib_test = &r600_ib_test,
-			.is_lockup = &cayman_gfx_is_lockup,
-			.vm_flush = &cayman_vm_flush,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[CAYMAN_RING_TYPE_CP2_INDEX] = {
-			.ib_execute = &cayman_ring_ib_execute,
-			.ib_parse = &evergreen_ib_parse,
-			.emit_fence = &cayman_fence_ring_emit,
-			.emit_semaphore = &r600_semaphore_ring_emit,
-			.cs_parse = &evergreen_cs_parse,
-			.ring_test = &r600_ring_test,
-			.ib_test = &r600_ib_test,
-			.is_lockup = &cayman_gfx_is_lockup,
-			.vm_flush = &cayman_vm_flush,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[R600_RING_TYPE_DMA_INDEX] = {
-			.ib_execute = &cayman_dma_ring_ib_execute,
-			.ib_parse = &evergreen_dma_ib_parse,
-			.emit_fence = &evergreen_dma_fence_ring_emit,
-			.emit_semaphore = &r600_dma_semaphore_ring_emit,
-			.cs_parse = &evergreen_dma_cs_parse,
-			.ring_test = &r600_dma_ring_test,
-			.ib_test = &r600_dma_ib_test,
-			.is_lockup = &cayman_dma_is_lockup,
-			.vm_flush = &cayman_dma_vm_flush,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[CAYMAN_RING_TYPE_DMA1_INDEX] = {
-			.ib_execute = &cayman_dma_ring_ib_execute,
-			.ib_parse = &evergreen_dma_ib_parse,
-			.emit_fence = &evergreen_dma_fence_ring_emit,
-			.emit_semaphore = &r600_dma_semaphore_ring_emit,
-			.cs_parse = &evergreen_dma_cs_parse,
-			.ring_test = &r600_dma_ring_test,
-			.ib_test = &r600_dma_ib_test,
-			.is_lockup = &cayman_dma_is_lockup,
-			.vm_flush = &cayman_dma_vm_flush,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[R600_RING_TYPE_UVD_INDEX] = {
-			.ib_execute = &r600_uvd_ib_execute,
-			.emit_fence = &r600_uvd_fence_emit,
-			.emit_semaphore = &cayman_uvd_semaphore_emit,
-			.cs_parse = &radeon_uvd_cs_parse,
-			.ring_test = &r600_uvd_ring_test,
-			.ib_test = &r600_uvd_ib_test,
-			.is_lockup = &radeon_ring_test_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		}
+		[RADEON_RING_TYPE_GFX_INDEX] = &cayman_gfx_ring,
+		[CAYMAN_RING_TYPE_CP1_INDEX] = &cayman_gfx_ring,
+		[CAYMAN_RING_TYPE_CP2_INDEX] = &cayman_gfx_ring,
+		[R600_RING_TYPE_DMA_INDEX] = &cayman_dma_ring,
+		[CAYMAN_RING_TYPE_DMA1_INDEX] = &cayman_dma_ring,
+		[R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring,
 	},
 	.irq = {
 		.set = &evergreen_irq_set,
@@ -1898,7 +1642,7 @@
 		.hdmi_setmode = &evergreen_hdmi_setmode,
 	},
 	.copy = {
-		.blit = &r600_copy_blit,
+		.blit = &r600_copy_cpdma,
 		.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
 		.dma = &evergreen_copy_dma,
 		.dma_ring_index = R600_RING_TYPE_DMA_INDEX,
@@ -1978,88 +1722,12 @@
 		.set_page = &cayman_vm_set_page,
 	},
 	.ring = {
-		[RADEON_RING_TYPE_GFX_INDEX] = {
-			.ib_execute = &cayman_ring_ib_execute,
-			.ib_parse = &evergreen_ib_parse,
-			.emit_fence = &cayman_fence_ring_emit,
-			.emit_semaphore = &r600_semaphore_ring_emit,
-			.cs_parse = &evergreen_cs_parse,
-			.ring_test = &r600_ring_test,
-			.ib_test = &r600_ib_test,
-			.is_lockup = &cayman_gfx_is_lockup,
-			.vm_flush = &cayman_vm_flush,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[CAYMAN_RING_TYPE_CP1_INDEX] = {
-			.ib_execute = &cayman_ring_ib_execute,
-			.ib_parse = &evergreen_ib_parse,
-			.emit_fence = &cayman_fence_ring_emit,
-			.emit_semaphore = &r600_semaphore_ring_emit,
-			.cs_parse = &evergreen_cs_parse,
-			.ring_test = &r600_ring_test,
-			.ib_test = &r600_ib_test,
-			.is_lockup = &cayman_gfx_is_lockup,
-			.vm_flush = &cayman_vm_flush,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[CAYMAN_RING_TYPE_CP2_INDEX] = {
-			.ib_execute = &cayman_ring_ib_execute,
-			.ib_parse = &evergreen_ib_parse,
-			.emit_fence = &cayman_fence_ring_emit,
-			.emit_semaphore = &r600_semaphore_ring_emit,
-			.cs_parse = &evergreen_cs_parse,
-			.ring_test = &r600_ring_test,
-			.ib_test = &r600_ib_test,
-			.is_lockup = &cayman_gfx_is_lockup,
-			.vm_flush = &cayman_vm_flush,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[R600_RING_TYPE_DMA_INDEX] = {
-			.ib_execute = &cayman_dma_ring_ib_execute,
-			.ib_parse = &evergreen_dma_ib_parse,
-			.emit_fence = &evergreen_dma_fence_ring_emit,
-			.emit_semaphore = &r600_dma_semaphore_ring_emit,
-			.cs_parse = &evergreen_dma_cs_parse,
-			.ring_test = &r600_dma_ring_test,
-			.ib_test = &r600_dma_ib_test,
-			.is_lockup = &cayman_dma_is_lockup,
-			.vm_flush = &cayman_dma_vm_flush,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[CAYMAN_RING_TYPE_DMA1_INDEX] = {
-			.ib_execute = &cayman_dma_ring_ib_execute,
-			.ib_parse = &evergreen_dma_ib_parse,
-			.emit_fence = &evergreen_dma_fence_ring_emit,
-			.emit_semaphore = &r600_dma_semaphore_ring_emit,
-			.cs_parse = &evergreen_dma_cs_parse,
-			.ring_test = &r600_dma_ring_test,
-			.ib_test = &r600_dma_ib_test,
-			.is_lockup = &cayman_dma_is_lockup,
-			.vm_flush = &cayman_dma_vm_flush,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[R600_RING_TYPE_UVD_INDEX] = {
-			.ib_execute = &r600_uvd_ib_execute,
-			.emit_fence = &r600_uvd_fence_emit,
-			.emit_semaphore = &cayman_uvd_semaphore_emit,
-			.cs_parse = &radeon_uvd_cs_parse,
-			.ring_test = &r600_uvd_ring_test,
-			.ib_test = &r600_uvd_ib_test,
-			.is_lockup = &radeon_ring_test_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		}
+		[RADEON_RING_TYPE_GFX_INDEX] = &cayman_gfx_ring,
+		[CAYMAN_RING_TYPE_CP1_INDEX] = &cayman_gfx_ring,
+		[CAYMAN_RING_TYPE_CP2_INDEX] = &cayman_gfx_ring,
+		[R600_RING_TYPE_DMA_INDEX] = &cayman_dma_ring,
+		[CAYMAN_RING_TYPE_DMA1_INDEX] = &cayman_dma_ring,
+		[R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring,
 	},
 	.irq = {
 		.set = &evergreen_irq_set,
@@ -2071,9 +1739,11 @@
 		.wait_for_vblank = &dce4_wait_for_vblank,
 		.set_backlight_level = &atombios_set_backlight_level,
 		.get_backlight_level = &atombios_get_backlight_level,
+		.hdmi_enable = &evergreen_hdmi_enable,
+		.hdmi_setmode = &evergreen_hdmi_setmode,
 	},
 	.copy = {
-		.blit = &r600_copy_blit,
+		.blit = &r600_copy_cpdma,
 		.blit_ring_index = RADEON_RING_TYPE_GFX_INDEX,
 		.dma = &evergreen_copy_dma,
 		.dma_ring_index = R600_RING_TYPE_DMA_INDEX,
@@ -2129,6 +1799,36 @@
 	},
 };
 
+static struct radeon_asic_ring si_gfx_ring = {
+	.ib_execute = &si_ring_ib_execute,
+	.ib_parse = &si_ib_parse,
+	.emit_fence = &si_fence_ring_emit,
+	.emit_semaphore = &r600_semaphore_ring_emit,
+	.cs_parse = NULL,
+	.ring_test = &r600_ring_test,
+	.ib_test = &r600_ib_test,
+	.is_lockup = &si_gfx_is_lockup,
+	.vm_flush = &si_vm_flush,
+	.get_rptr = &radeon_ring_generic_get_rptr,
+	.get_wptr = &radeon_ring_generic_get_wptr,
+	.set_wptr = &radeon_ring_generic_set_wptr,
+};
+
+static struct radeon_asic_ring si_dma_ring = {
+	.ib_execute = &cayman_dma_ring_ib_execute,
+	.ib_parse = &evergreen_dma_ib_parse,
+	.emit_fence = &evergreen_dma_fence_ring_emit,
+	.emit_semaphore = &r600_dma_semaphore_ring_emit,
+	.cs_parse = NULL,
+	.ring_test = &r600_dma_ring_test,
+	.ib_test = &r600_dma_ib_test,
+	.is_lockup = &si_dma_is_lockup,
+	.vm_flush = &si_dma_vm_flush,
+	.get_rptr = &r600_dma_get_rptr,
+	.get_wptr = &r600_dma_get_wptr,
+	.set_wptr = &r600_dma_set_wptr,
+};
+
 static struct radeon_asic si_asic = {
 	.init = &si_init,
 	.fini = &si_fini,
@@ -2152,88 +1852,12 @@
 		.set_page = &si_vm_set_page,
 	},
 	.ring = {
-		[RADEON_RING_TYPE_GFX_INDEX] = {
-			.ib_execute = &si_ring_ib_execute,
-			.ib_parse = &si_ib_parse,
-			.emit_fence = &si_fence_ring_emit,
-			.emit_semaphore = &r600_semaphore_ring_emit,
-			.cs_parse = NULL,
-			.ring_test = &r600_ring_test,
-			.ib_test = &r600_ib_test,
-			.is_lockup = &si_gfx_is_lockup,
-			.vm_flush = &si_vm_flush,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[CAYMAN_RING_TYPE_CP1_INDEX] = {
-			.ib_execute = &si_ring_ib_execute,
-			.ib_parse = &si_ib_parse,
-			.emit_fence = &si_fence_ring_emit,
-			.emit_semaphore = &r600_semaphore_ring_emit,
-			.cs_parse = NULL,
-			.ring_test = &r600_ring_test,
-			.ib_test = &r600_ib_test,
-			.is_lockup = &si_gfx_is_lockup,
-			.vm_flush = &si_vm_flush,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[CAYMAN_RING_TYPE_CP2_INDEX] = {
-			.ib_execute = &si_ring_ib_execute,
-			.ib_parse = &si_ib_parse,
-			.emit_fence = &si_fence_ring_emit,
-			.emit_semaphore = &r600_semaphore_ring_emit,
-			.cs_parse = NULL,
-			.ring_test = &r600_ring_test,
-			.ib_test = &r600_ib_test,
-			.is_lockup = &si_gfx_is_lockup,
-			.vm_flush = &si_vm_flush,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[R600_RING_TYPE_DMA_INDEX] = {
-			.ib_execute = &cayman_dma_ring_ib_execute,
-			.ib_parse = &evergreen_dma_ib_parse,
-			.emit_fence = &evergreen_dma_fence_ring_emit,
-			.emit_semaphore = &r600_dma_semaphore_ring_emit,
-			.cs_parse = NULL,
-			.ring_test = &r600_dma_ring_test,
-			.ib_test = &r600_dma_ib_test,
-			.is_lockup = &si_dma_is_lockup,
-			.vm_flush = &si_dma_vm_flush,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[CAYMAN_RING_TYPE_DMA1_INDEX] = {
-			.ib_execute = &cayman_dma_ring_ib_execute,
-			.ib_parse = &evergreen_dma_ib_parse,
-			.emit_fence = &evergreen_dma_fence_ring_emit,
-			.emit_semaphore = &r600_dma_semaphore_ring_emit,
-			.cs_parse = NULL,
-			.ring_test = &r600_dma_ring_test,
-			.ib_test = &r600_dma_ib_test,
-			.is_lockup = &si_dma_is_lockup,
-			.vm_flush = &si_dma_vm_flush,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[R600_RING_TYPE_UVD_INDEX] = {
-			.ib_execute = &r600_uvd_ib_execute,
-			.emit_fence = &r600_uvd_fence_emit,
-			.emit_semaphore = &cayman_uvd_semaphore_emit,
-			.cs_parse = &radeon_uvd_cs_parse,
-			.ring_test = &r600_uvd_ring_test,
-			.ib_test = &r600_uvd_ib_test,
-			.is_lockup = &radeon_ring_test_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		}
+		[RADEON_RING_TYPE_GFX_INDEX] = &si_gfx_ring,
+		[CAYMAN_RING_TYPE_CP1_INDEX] = &si_gfx_ring,
+		[CAYMAN_RING_TYPE_CP2_INDEX] = &si_gfx_ring,
+		[R600_RING_TYPE_DMA_INDEX] = &si_dma_ring,
+		[CAYMAN_RING_TYPE_DMA1_INDEX] = &si_dma_ring,
+		[R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring,
 	},
 	.irq = {
 		.set = &si_irq_set,
@@ -2245,6 +1869,8 @@
 		.wait_for_vblank = &dce4_wait_for_vblank,
 		.set_backlight_level = &atombios_set_backlight_level,
 		.get_backlight_level = &atombios_get_backlight_level,
+		.hdmi_enable = &evergreen_hdmi_enable,
+		.hdmi_setmode = &evergreen_hdmi_setmode,
 	},
 	.copy = {
 		.blit = NULL,
@@ -2304,6 +1930,51 @@
 	},
 };
 
+static struct radeon_asic_ring ci_gfx_ring = {
+	.ib_execute = &cik_ring_ib_execute,
+	.ib_parse = &cik_ib_parse,
+	.emit_fence = &cik_fence_gfx_ring_emit,
+	.emit_semaphore = &cik_semaphore_ring_emit,
+	.cs_parse = NULL,
+	.ring_test = &cik_ring_test,
+	.ib_test = &cik_ib_test,
+	.is_lockup = &cik_gfx_is_lockup,
+	.vm_flush = &cik_vm_flush,
+	.get_rptr = &radeon_ring_generic_get_rptr,
+	.get_wptr = &radeon_ring_generic_get_wptr,
+	.set_wptr = &radeon_ring_generic_set_wptr,
+};
+
+static struct radeon_asic_ring ci_cp_ring = {
+	.ib_execute = &cik_ring_ib_execute,
+	.ib_parse = &cik_ib_parse,
+	.emit_fence = &cik_fence_compute_ring_emit,
+	.emit_semaphore = &cik_semaphore_ring_emit,
+	.cs_parse = NULL,
+	.ring_test = &cik_ring_test,
+	.ib_test = &cik_ib_test,
+	.is_lockup = &cik_gfx_is_lockup,
+	.vm_flush = &cik_vm_flush,
+	.get_rptr = &cik_compute_ring_get_rptr,
+	.get_wptr = &cik_compute_ring_get_wptr,
+	.set_wptr = &cik_compute_ring_set_wptr,
+};
+
+static struct radeon_asic_ring ci_dma_ring = {
+	.ib_execute = &cik_sdma_ring_ib_execute,
+	.ib_parse = &cik_ib_parse,
+	.emit_fence = &cik_sdma_fence_ring_emit,
+	.emit_semaphore = &cik_sdma_semaphore_ring_emit,
+	.cs_parse = NULL,
+	.ring_test = &cik_sdma_ring_test,
+	.ib_test = &cik_sdma_ib_test,
+	.is_lockup = &cik_sdma_is_lockup,
+	.vm_flush = &cik_dma_vm_flush,
+	.get_rptr = &r600_dma_get_rptr,
+	.get_wptr = &r600_dma_get_wptr,
+	.set_wptr = &r600_dma_set_wptr,
+};
+
 static struct radeon_asic ci_asic = {
 	.init = &cik_init,
 	.fini = &cik_fini,
@@ -2327,88 +1998,12 @@
 		.set_page = &cik_vm_set_page,
 	},
 	.ring = {
-		[RADEON_RING_TYPE_GFX_INDEX] = {
-			.ib_execute = &cik_ring_ib_execute,
-			.ib_parse = &cik_ib_parse,
-			.emit_fence = &cik_fence_gfx_ring_emit,
-			.emit_semaphore = &cik_semaphore_ring_emit,
-			.cs_parse = NULL,
-			.ring_test = &cik_ring_test,
-			.ib_test = &cik_ib_test,
-			.is_lockup = &cik_gfx_is_lockup,
-			.vm_flush = &cik_vm_flush,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[CAYMAN_RING_TYPE_CP1_INDEX] = {
-			.ib_execute = &cik_ring_ib_execute,
-			.ib_parse = &cik_ib_parse,
-			.emit_fence = &cik_fence_compute_ring_emit,
-			.emit_semaphore = &cik_semaphore_ring_emit,
-			.cs_parse = NULL,
-			.ring_test = &cik_ring_test,
-			.ib_test = &cik_ib_test,
-			.is_lockup = &cik_gfx_is_lockup,
-			.vm_flush = &cik_vm_flush,
-			.get_rptr = &cik_compute_ring_get_rptr,
-			.get_wptr = &cik_compute_ring_get_wptr,
-			.set_wptr = &cik_compute_ring_set_wptr,
-		},
-		[CAYMAN_RING_TYPE_CP2_INDEX] = {
-			.ib_execute = &cik_ring_ib_execute,
-			.ib_parse = &cik_ib_parse,
-			.emit_fence = &cik_fence_compute_ring_emit,
-			.emit_semaphore = &cik_semaphore_ring_emit,
-			.cs_parse = NULL,
-			.ring_test = &cik_ring_test,
-			.ib_test = &cik_ib_test,
-			.is_lockup = &cik_gfx_is_lockup,
-			.vm_flush = &cik_vm_flush,
-			.get_rptr = &cik_compute_ring_get_rptr,
-			.get_wptr = &cik_compute_ring_get_wptr,
-			.set_wptr = &cik_compute_ring_set_wptr,
-		},
-		[R600_RING_TYPE_DMA_INDEX] = {
-			.ib_execute = &cik_sdma_ring_ib_execute,
-			.ib_parse = &cik_ib_parse,
-			.emit_fence = &cik_sdma_fence_ring_emit,
-			.emit_semaphore = &cik_sdma_semaphore_ring_emit,
-			.cs_parse = NULL,
-			.ring_test = &cik_sdma_ring_test,
-			.ib_test = &cik_sdma_ib_test,
-			.is_lockup = &cik_sdma_is_lockup,
-			.vm_flush = &cik_dma_vm_flush,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[CAYMAN_RING_TYPE_DMA1_INDEX] = {
-			.ib_execute = &cik_sdma_ring_ib_execute,
-			.ib_parse = &cik_ib_parse,
-			.emit_fence = &cik_sdma_fence_ring_emit,
-			.emit_semaphore = &cik_sdma_semaphore_ring_emit,
-			.cs_parse = NULL,
-			.ring_test = &cik_sdma_ring_test,
-			.ib_test = &cik_sdma_ib_test,
-			.is_lockup = &cik_sdma_is_lockup,
-			.vm_flush = &cik_dma_vm_flush,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[R600_RING_TYPE_UVD_INDEX] = {
-			.ib_execute = &r600_uvd_ib_execute,
-			.emit_fence = &r600_uvd_fence_emit,
-			.emit_semaphore = &cayman_uvd_semaphore_emit,
-			.cs_parse = &radeon_uvd_cs_parse,
-			.ring_test = &r600_uvd_ring_test,
-			.ib_test = &r600_uvd_ib_test,
-			.is_lockup = &radeon_ring_test_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		}
+		[RADEON_RING_TYPE_GFX_INDEX] = &ci_gfx_ring,
+		[CAYMAN_RING_TYPE_CP1_INDEX] = &ci_cp_ring,
+		[CAYMAN_RING_TYPE_CP2_INDEX] = &ci_cp_ring,
+		[R600_RING_TYPE_DMA_INDEX] = &ci_dma_ring,
+		[CAYMAN_RING_TYPE_DMA1_INDEX] = &ci_dma_ring,
+		[R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring,
 	},
 	.irq = {
 		.set = &cik_irq_set,
@@ -2418,6 +2013,8 @@
 		.bandwidth_update = &dce8_bandwidth_update,
 		.get_vblank_counter = &evergreen_get_vblank_counter,
 		.wait_for_vblank = &dce4_wait_for_vblank,
+		.hdmi_enable = &evergreen_hdmi_enable,
+		.hdmi_setmode = &evergreen_hdmi_setmode,
 	},
 	.copy = {
 		.blit = NULL,
@@ -2451,6 +2048,25 @@
 		.set_pcie_lanes = NULL,
 		.set_clock_gating = NULL,
 		.set_uvd_clocks = &cik_set_uvd_clocks,
+		.get_temperature = &ci_get_temp,
+	},
+	.dpm = {
+		.init = &ci_dpm_init,
+		.setup_asic = &ci_dpm_setup_asic,
+		.enable = &ci_dpm_enable,
+		.disable = &ci_dpm_disable,
+		.pre_set_power_state = &ci_dpm_pre_set_power_state,
+		.set_power_state = &ci_dpm_set_power_state,
+		.post_set_power_state = &ci_dpm_post_set_power_state,
+		.display_configuration_changed = &ci_dpm_display_configuration_changed,
+		.fini = &ci_dpm_fini,
+		.get_sclk = &ci_dpm_get_sclk,
+		.get_mclk = &ci_dpm_get_mclk,
+		.print_power_state = &ci_dpm_print_power_state,
+		.debugfs_print_current_performance_level = &ci_dpm_debugfs_print_current_performance_level,
+		.force_performance_level = &ci_dpm_force_performance_level,
+		.vblank_too_short = &ci_dpm_vblank_too_short,
+		.powergate_uvd = &ci_dpm_powergate_uvd,
 	},
 	.pflip = {
 		.pre_page_flip = &evergreen_pre_page_flip,
@@ -2482,88 +2098,12 @@
 		.set_page = &cik_vm_set_page,
 	},
 	.ring = {
-		[RADEON_RING_TYPE_GFX_INDEX] = {
-			.ib_execute = &cik_ring_ib_execute,
-			.ib_parse = &cik_ib_parse,
-			.emit_fence = &cik_fence_gfx_ring_emit,
-			.emit_semaphore = &cik_semaphore_ring_emit,
-			.cs_parse = NULL,
-			.ring_test = &cik_ring_test,
-			.ib_test = &cik_ib_test,
-			.is_lockup = &cik_gfx_is_lockup,
-			.vm_flush = &cik_vm_flush,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[CAYMAN_RING_TYPE_CP1_INDEX] = {
-			.ib_execute = &cik_ring_ib_execute,
-			.ib_parse = &cik_ib_parse,
-			.emit_fence = &cik_fence_compute_ring_emit,
-			.emit_semaphore = &cik_semaphore_ring_emit,
-			.cs_parse = NULL,
-			.ring_test = &cik_ring_test,
-			.ib_test = &cik_ib_test,
-			.is_lockup = &cik_gfx_is_lockup,
-			.vm_flush = &cik_vm_flush,
-			.get_rptr = &cik_compute_ring_get_rptr,
-			.get_wptr = &cik_compute_ring_get_wptr,
-			.set_wptr = &cik_compute_ring_set_wptr,
-		},
-		[CAYMAN_RING_TYPE_CP2_INDEX] = {
-			.ib_execute = &cik_ring_ib_execute,
-			.ib_parse = &cik_ib_parse,
-			.emit_fence = &cik_fence_compute_ring_emit,
-			.emit_semaphore = &cik_semaphore_ring_emit,
-			.cs_parse = NULL,
-			.ring_test = &cik_ring_test,
-			.ib_test = &cik_ib_test,
-			.is_lockup = &cik_gfx_is_lockup,
-			.vm_flush = &cik_vm_flush,
-			.get_rptr = &cik_compute_ring_get_rptr,
-			.get_wptr = &cik_compute_ring_get_wptr,
-			.set_wptr = &cik_compute_ring_set_wptr,
-		},
-		[R600_RING_TYPE_DMA_INDEX] = {
-			.ib_execute = &cik_sdma_ring_ib_execute,
-			.ib_parse = &cik_ib_parse,
-			.emit_fence = &cik_sdma_fence_ring_emit,
-			.emit_semaphore = &cik_sdma_semaphore_ring_emit,
-			.cs_parse = NULL,
-			.ring_test = &cik_sdma_ring_test,
-			.ib_test = &cik_sdma_ib_test,
-			.is_lockup = &cik_sdma_is_lockup,
-			.vm_flush = &cik_dma_vm_flush,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[CAYMAN_RING_TYPE_DMA1_INDEX] = {
-			.ib_execute = &cik_sdma_ring_ib_execute,
-			.ib_parse = &cik_ib_parse,
-			.emit_fence = &cik_sdma_fence_ring_emit,
-			.emit_semaphore = &cik_sdma_semaphore_ring_emit,
-			.cs_parse = NULL,
-			.ring_test = &cik_sdma_ring_test,
-			.ib_test = &cik_sdma_ib_test,
-			.is_lockup = &cik_sdma_is_lockup,
-			.vm_flush = &cik_dma_vm_flush,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		},
-		[R600_RING_TYPE_UVD_INDEX] = {
-			.ib_execute = &r600_uvd_ib_execute,
-			.emit_fence = &r600_uvd_fence_emit,
-			.emit_semaphore = &cayman_uvd_semaphore_emit,
-			.cs_parse = &radeon_uvd_cs_parse,
-			.ring_test = &r600_uvd_ring_test,
-			.ib_test = &r600_uvd_ib_test,
-			.is_lockup = &radeon_ring_test_lockup,
-			.get_rptr = &radeon_ring_generic_get_rptr,
-			.get_wptr = &radeon_ring_generic_get_wptr,
-			.set_wptr = &radeon_ring_generic_set_wptr,
-		}
+		[RADEON_RING_TYPE_GFX_INDEX] = &ci_gfx_ring,
+		[CAYMAN_RING_TYPE_CP1_INDEX] = &ci_cp_ring,
+		[CAYMAN_RING_TYPE_CP2_INDEX] = &ci_cp_ring,
+		[R600_RING_TYPE_DMA_INDEX] = &ci_dma_ring,
+		[CAYMAN_RING_TYPE_DMA1_INDEX] = &ci_dma_ring,
+		[R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring,
 	},
 	.irq = {
 		.set = &cik_irq_set,
@@ -2573,6 +2113,8 @@
 		.bandwidth_update = &dce8_bandwidth_update,
 		.get_vblank_counter = &evergreen_get_vblank_counter,
 		.wait_for_vblank = &dce4_wait_for_vblank,
+		.hdmi_enable = &evergreen_hdmi_enable,
+		.hdmi_setmode = &evergreen_hdmi_setmode,
 	},
 	.copy = {
 		.blit = NULL,
@@ -2606,6 +2148,24 @@
 		.set_pcie_lanes = NULL,
 		.set_clock_gating = NULL,
 		.set_uvd_clocks = &cik_set_uvd_clocks,
+		.get_temperature = &kv_get_temp,
+	},
+	.dpm = {
+		.init = &kv_dpm_init,
+		.setup_asic = &kv_dpm_setup_asic,
+		.enable = &kv_dpm_enable,
+		.disable = &kv_dpm_disable,
+		.pre_set_power_state = &kv_dpm_pre_set_power_state,
+		.set_power_state = &kv_dpm_set_power_state,
+		.post_set_power_state = &kv_dpm_post_set_power_state,
+		.display_configuration_changed = &kv_dpm_display_configuration_changed,
+		.fini = &kv_dpm_fini,
+		.get_sclk = &kv_dpm_get_sclk,
+		.get_mclk = &kv_dpm_get_mclk,
+		.print_power_state = &kv_dpm_print_power_state,
+		.debugfs_print_current_performance_level = &kv_dpm_debugfs_print_current_performance_level,
+		.force_performance_level = &kv_dpm_force_performance_level,
+		.powergate_uvd = &kv_dpm_powergate_uvd,
 	},
 	.pflip = {
 		.pre_page_flip = &evergreen_pre_page_flip,
@@ -2775,19 +2335,188 @@
 			rdev->has_uvd = false;
 		else
 			rdev->has_uvd = true;
+		switch (rdev->family) {
+		case CHIP_TAHITI:
+			rdev->cg_flags =
+				RADEON_CG_SUPPORT_GFX_MGCG |
+				RADEON_CG_SUPPORT_GFX_MGLS |
+				/*RADEON_CG_SUPPORT_GFX_CGCG |*/
+				RADEON_CG_SUPPORT_GFX_CGLS |
+				RADEON_CG_SUPPORT_GFX_CGTS |
+				RADEON_CG_SUPPORT_GFX_CP_LS |
+				RADEON_CG_SUPPORT_MC_MGCG |
+				RADEON_CG_SUPPORT_SDMA_MGCG |
+				RADEON_CG_SUPPORT_BIF_LS |
+				RADEON_CG_SUPPORT_VCE_MGCG |
+				RADEON_CG_SUPPORT_UVD_MGCG |
+				RADEON_CG_SUPPORT_HDP_LS |
+				RADEON_CG_SUPPORT_HDP_MGCG;
+			rdev->pg_flags = 0;
+			break;
+		case CHIP_PITCAIRN:
+			rdev->cg_flags =
+				RADEON_CG_SUPPORT_GFX_MGCG |
+				RADEON_CG_SUPPORT_GFX_MGLS |
+				/*RADEON_CG_SUPPORT_GFX_CGCG |*/
+				RADEON_CG_SUPPORT_GFX_CGLS |
+				RADEON_CG_SUPPORT_GFX_CGTS |
+				RADEON_CG_SUPPORT_GFX_CP_LS |
+				RADEON_CG_SUPPORT_GFX_RLC_LS |
+				RADEON_CG_SUPPORT_MC_LS |
+				RADEON_CG_SUPPORT_MC_MGCG |
+				RADEON_CG_SUPPORT_SDMA_MGCG |
+				RADEON_CG_SUPPORT_BIF_LS |
+				RADEON_CG_SUPPORT_VCE_MGCG |
+				RADEON_CG_SUPPORT_UVD_MGCG |
+				RADEON_CG_SUPPORT_HDP_LS |
+				RADEON_CG_SUPPORT_HDP_MGCG;
+			rdev->pg_flags = 0;
+			break;
+		case CHIP_VERDE:
+			rdev->cg_flags =
+				RADEON_CG_SUPPORT_GFX_MGCG |
+				RADEON_CG_SUPPORT_GFX_MGLS |
+				/*RADEON_CG_SUPPORT_GFX_CGCG |*/
+				RADEON_CG_SUPPORT_GFX_CGLS |
+				RADEON_CG_SUPPORT_GFX_CGTS |
+				RADEON_CG_SUPPORT_GFX_CP_LS |
+				RADEON_CG_SUPPORT_GFX_RLC_LS |
+				RADEON_CG_SUPPORT_MC_LS |
+				RADEON_CG_SUPPORT_MC_MGCG |
+				RADEON_CG_SUPPORT_SDMA_MGCG |
+				RADEON_CG_SUPPORT_BIF_LS |
+				RADEON_CG_SUPPORT_VCE_MGCG |
+				RADEON_CG_SUPPORT_UVD_MGCG |
+				RADEON_CG_SUPPORT_HDP_LS |
+				RADEON_CG_SUPPORT_HDP_MGCG;
+			rdev->pg_flags = 0 |
+				/*RADEON_PG_SUPPORT_GFX_CG | */
+				RADEON_PG_SUPPORT_SDMA;
+			break;
+		case CHIP_OLAND:
+			rdev->cg_flags =
+				RADEON_CG_SUPPORT_GFX_MGCG |
+				RADEON_CG_SUPPORT_GFX_MGLS |
+				/*RADEON_CG_SUPPORT_GFX_CGCG |*/
+				RADEON_CG_SUPPORT_GFX_CGLS |
+				RADEON_CG_SUPPORT_GFX_CGTS |
+				RADEON_CG_SUPPORT_GFX_CP_LS |
+				RADEON_CG_SUPPORT_GFX_RLC_LS |
+				RADEON_CG_SUPPORT_MC_LS |
+				RADEON_CG_SUPPORT_MC_MGCG |
+				RADEON_CG_SUPPORT_SDMA_MGCG |
+				RADEON_CG_SUPPORT_BIF_LS |
+				RADEON_CG_SUPPORT_UVD_MGCG |
+				RADEON_CG_SUPPORT_HDP_LS |
+				RADEON_CG_SUPPORT_HDP_MGCG;
+			rdev->pg_flags = 0;
+			break;
+		case CHIP_HAINAN:
+			rdev->cg_flags =
+				RADEON_CG_SUPPORT_GFX_MGCG |
+				RADEON_CG_SUPPORT_GFX_MGLS |
+				/*RADEON_CG_SUPPORT_GFX_CGCG |*/
+				RADEON_CG_SUPPORT_GFX_CGLS |
+				RADEON_CG_SUPPORT_GFX_CGTS |
+				RADEON_CG_SUPPORT_GFX_CP_LS |
+				RADEON_CG_SUPPORT_GFX_RLC_LS |
+				RADEON_CG_SUPPORT_MC_LS |
+				RADEON_CG_SUPPORT_MC_MGCG |
+				RADEON_CG_SUPPORT_SDMA_MGCG |
+				RADEON_CG_SUPPORT_BIF_LS |
+				RADEON_CG_SUPPORT_HDP_LS |
+				RADEON_CG_SUPPORT_HDP_MGCG;
+			rdev->pg_flags = 0;
+			break;
+		default:
+			rdev->cg_flags = 0;
+			rdev->pg_flags = 0;
+			break;
+		}
 		break;
 	case CHIP_BONAIRE:
 		rdev->asic = &ci_asic;
 		rdev->num_crtc = 6;
+		rdev->has_uvd = true;
+		rdev->cg_flags =
+			RADEON_CG_SUPPORT_GFX_MGCG |
+			RADEON_CG_SUPPORT_GFX_MGLS |
+			/*RADEON_CG_SUPPORT_GFX_CGCG |*/
+			RADEON_CG_SUPPORT_GFX_CGLS |
+			RADEON_CG_SUPPORT_GFX_CGTS |
+			RADEON_CG_SUPPORT_GFX_CGTS_LS |
+			RADEON_CG_SUPPORT_GFX_CP_LS |
+			RADEON_CG_SUPPORT_MC_LS |
+			RADEON_CG_SUPPORT_MC_MGCG |
+			RADEON_CG_SUPPORT_SDMA_MGCG |
+			RADEON_CG_SUPPORT_SDMA_LS |
+			RADEON_CG_SUPPORT_BIF_LS |
+			RADEON_CG_SUPPORT_VCE_MGCG |
+			RADEON_CG_SUPPORT_UVD_MGCG |
+			RADEON_CG_SUPPORT_HDP_LS |
+			RADEON_CG_SUPPORT_HDP_MGCG;
+		rdev->pg_flags = 0;
 		break;
 	case CHIP_KAVERI:
 	case CHIP_KABINI:
 		rdev->asic = &kv_asic;
 		/* set num crtcs */
-		if (rdev->family == CHIP_KAVERI)
+		if (rdev->family == CHIP_KAVERI) {
 			rdev->num_crtc = 4;
-		else
+			rdev->cg_flags =
+				RADEON_CG_SUPPORT_GFX_MGCG |
+				RADEON_CG_SUPPORT_GFX_MGLS |
+				/*RADEON_CG_SUPPORT_GFX_CGCG |*/
+				RADEON_CG_SUPPORT_GFX_CGLS |
+				RADEON_CG_SUPPORT_GFX_CGTS |
+				RADEON_CG_SUPPORT_GFX_CGTS_LS |
+				RADEON_CG_SUPPORT_GFX_CP_LS |
+				RADEON_CG_SUPPORT_SDMA_MGCG |
+				RADEON_CG_SUPPORT_SDMA_LS |
+				RADEON_CG_SUPPORT_BIF_LS |
+				RADEON_CG_SUPPORT_VCE_MGCG |
+				RADEON_CG_SUPPORT_UVD_MGCG |
+				RADEON_CG_SUPPORT_HDP_LS |
+				RADEON_CG_SUPPORT_HDP_MGCG;
+			rdev->pg_flags = 0;
+				/*RADEON_PG_SUPPORT_GFX_CG |
+				RADEON_PG_SUPPORT_GFX_SMG |
+				RADEON_PG_SUPPORT_GFX_DMG |
+				RADEON_PG_SUPPORT_UVD |
+				RADEON_PG_SUPPORT_VCE |
+				RADEON_PG_SUPPORT_CP |
+				RADEON_PG_SUPPORT_GDS |
+				RADEON_PG_SUPPORT_RLC_SMU_HS |
+				RADEON_PG_SUPPORT_ACP |
+				RADEON_PG_SUPPORT_SAMU;*/
+		} else {
 			rdev->num_crtc = 2;
+			rdev->cg_flags =
+				RADEON_CG_SUPPORT_GFX_MGCG |
+				RADEON_CG_SUPPORT_GFX_MGLS |
+				/*RADEON_CG_SUPPORT_GFX_CGCG |*/
+				RADEON_CG_SUPPORT_GFX_CGLS |
+				RADEON_CG_SUPPORT_GFX_CGTS |
+				RADEON_CG_SUPPORT_GFX_CGTS_LS |
+				RADEON_CG_SUPPORT_GFX_CP_LS |
+				RADEON_CG_SUPPORT_SDMA_MGCG |
+				RADEON_CG_SUPPORT_SDMA_LS |
+				RADEON_CG_SUPPORT_BIF_LS |
+				RADEON_CG_SUPPORT_VCE_MGCG |
+				RADEON_CG_SUPPORT_UVD_MGCG |
+				RADEON_CG_SUPPORT_HDP_LS |
+				RADEON_CG_SUPPORT_HDP_MGCG;
+			rdev->pg_flags = 0;
+				/*RADEON_PG_SUPPORT_GFX_CG |
+				RADEON_PG_SUPPORT_GFX_SMG |
+				RADEON_PG_SUPPORT_UVD |
+				RADEON_PG_SUPPORT_VCE |
+				RADEON_PG_SUPPORT_CP |
+				RADEON_PG_SUPPORT_GDS |
+				RADEON_PG_SUPPORT_RLC_SMU_HS |
+				RADEON_PG_SUPPORT_SAMU;*/
+		}
+		rdev->has_uvd = true;
 		break;
 	default:
 		/* FIXME: not supported yet */
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index ca18957..818bbe6 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -336,10 +336,6 @@
 void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
 int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
 int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
-int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *ring);
-int r600_copy_blit(struct radeon_device *rdev,
-		   uint64_t src_offset, uint64_t dst_offset,
-		   unsigned num_gpu_pages, struct radeon_fence **fence);
 int r600_copy_cpdma(struct radeon_device *rdev,
 		    uint64_t src_offset, uint64_t dst_offset,
 		    unsigned num_gpu_pages, struct radeon_fence **fence);
@@ -371,8 +367,6 @@
 int r600_mc_wait_for_idle(struct radeon_device *rdev);
 int r600_pcie_gart_init(struct radeon_device *rdev);
 void r600_scratch_init(struct radeon_device *rdev);
-int r600_blit_init(struct radeon_device *rdev);
-void r600_blit_fini(struct radeon_device *rdev);
 int r600_init_microcode(struct radeon_device *rdev);
 /* r600 irq */
 int r600_irq_process(struct radeon_device *rdev);
@@ -385,28 +379,25 @@
 void r600_rlc_stop(struct radeon_device *rdev);
 /* r600 audio */
 int r600_audio_init(struct radeon_device *rdev);
-struct r600_audio r600_audio_status(struct radeon_device *rdev);
+struct r600_audio_pin r600_audio_status(struct radeon_device *rdev);
 void r600_audio_fini(struct radeon_device *rdev);
 int r600_hdmi_buffer_status_changed(struct drm_encoder *encoder);
 void r600_hdmi_update_audio_settings(struct drm_encoder *encoder);
 void r600_hdmi_enable(struct drm_encoder *encoder, bool enable);
 void r600_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode *mode);
-/* r600 blit */
-int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages,
-			   struct radeon_fence **fence, struct radeon_sa_bo **vb,
-			   struct radeon_semaphore **sem);
-void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence,
-			 struct radeon_sa_bo *vb, struct radeon_semaphore *sem);
-void r600_kms_blit_copy(struct radeon_device *rdev,
-			u64 src_gpu_addr, u64 dst_gpu_addr,
-			unsigned num_gpu_pages,
-			struct radeon_sa_bo *vb);
 int r600_mc_wait_for_idle(struct radeon_device *rdev);
 u32 r600_get_xclk(struct radeon_device *rdev);
 uint64_t r600_get_gpu_clock_counter(struct radeon_device *rdev);
 int rv6xx_get_temp(struct radeon_device *rdev);
 int r600_dpm_pre_set_power_state(struct radeon_device *rdev);
 void r600_dpm_post_set_power_state(struct radeon_device *rdev);
+/* r600 dma */
+uint32_t r600_dma_get_rptr(struct radeon_device *rdev,
+			   struct radeon_ring *ring);
+uint32_t r600_dma_get_wptr(struct radeon_device *rdev,
+			   struct radeon_ring *ring);
+void r600_dma_set_wptr(struct radeon_device *rdev,
+		       struct radeon_ring *ring);
 /* rv6xx dpm */
 int rv6xx_dpm_init(struct radeon_device *rdev);
 int rv6xx_dpm_enable(struct radeon_device *rdev);
@@ -421,6 +412,8 @@
 				 struct radeon_ps *ps);
 void rv6xx_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev,
 						       struct seq_file *m);
+int rv6xx_dpm_force_performance_level(struct radeon_device *rdev,
+				      enum radeon_dpm_forced_level level);
 /* rs780 dpm */
 int rs780_dpm_init(struct radeon_device *rdev);
 int rs780_dpm_enable(struct radeon_device *rdev);
@@ -436,19 +429,6 @@
 void rs780_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev,
 						       struct seq_file *m);
 
-/* uvd */
-int r600_uvd_init(struct radeon_device *rdev);
-int r600_uvd_rbc_start(struct radeon_device *rdev);
-void r600_uvd_rbc_stop(struct radeon_device *rdev);
-int r600_uvd_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
-void r600_uvd_fence_emit(struct radeon_device *rdev,
-			 struct radeon_fence *fence);
-void r600_uvd_semaphore_emit(struct radeon_device *rdev,
-			     struct radeon_ring *ring,
-			     struct radeon_semaphore *semaphore,
-			     bool emit_wait);
-void r600_uvd_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
-
 /*
  * rv770,rv730,rv710,rv740
  */
@@ -466,7 +446,6 @@
 		  unsigned num_gpu_pages,
 		   struct radeon_fence **fence);
 u32 rv770_get_xclk(struct radeon_device *rdev);
-int rv770_uvd_resume(struct radeon_device *rdev);
 int rv770_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);
 int rv770_get_temp(struct radeon_device *rdev);
 /* rv7xx pm */
@@ -528,7 +507,6 @@
 extern void evergreen_post_page_flip(struct radeon_device *rdev, int crtc);
 extern void dce4_wait_for_vblank(struct radeon_device *rdev, int crtc);
 void evergreen_disable_interrupt_state(struct radeon_device *rdev);
-int evergreen_blit_init(struct radeon_device *rdev);
 int evergreen_mc_wait_for_idle(struct radeon_device *rdev);
 void evergreen_dma_fence_ring_emit(struct radeon_device *rdev,
 				   struct radeon_fence *fence);
@@ -650,6 +628,8 @@
 
 /* DCE6 - SI */
 void dce6_bandwidth_update(struct radeon_device *rdev);
+int dce6_audio_init(struct radeon_device *rdev);
+void dce6_audio_fini(struct radeon_device *rdev);
 
 /*
  * si
@@ -710,7 +690,6 @@
 uint32_t cik_pciep_rreg(struct radeon_device *rdev, uint32_t reg);
 void cik_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);
-int cik_uvd_resume(struct radeon_device *rdev);
 void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
 			      struct radeon_fence *fence);
 void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
@@ -761,5 +740,81 @@
 			      struct radeon_ring *ring);
 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
 			       struct radeon_ring *ring);
+int ci_get_temp(struct radeon_device *rdev);
+int kv_get_temp(struct radeon_device *rdev);
+
+int ci_dpm_init(struct radeon_device *rdev);
+int ci_dpm_enable(struct radeon_device *rdev);
+void ci_dpm_disable(struct radeon_device *rdev);
+int ci_dpm_pre_set_power_state(struct radeon_device *rdev);
+int ci_dpm_set_power_state(struct radeon_device *rdev);
+void ci_dpm_post_set_power_state(struct radeon_device *rdev);
+void ci_dpm_setup_asic(struct radeon_device *rdev);
+void ci_dpm_display_configuration_changed(struct radeon_device *rdev);
+void ci_dpm_fini(struct radeon_device *rdev);
+u32 ci_dpm_get_sclk(struct radeon_device *rdev, bool low);
+u32 ci_dpm_get_mclk(struct radeon_device *rdev, bool low);
+void ci_dpm_print_power_state(struct radeon_device *rdev,
+			      struct radeon_ps *ps);
+void ci_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev,
+						    struct seq_file *m);
+int ci_dpm_force_performance_level(struct radeon_device *rdev,
+				   enum radeon_dpm_forced_level level);
+bool ci_dpm_vblank_too_short(struct radeon_device *rdev);
+void ci_dpm_powergate_uvd(struct radeon_device *rdev, bool gate);
+
+int kv_dpm_init(struct radeon_device *rdev);
+int kv_dpm_enable(struct radeon_device *rdev);
+void kv_dpm_disable(struct radeon_device *rdev);
+int kv_dpm_pre_set_power_state(struct radeon_device *rdev);
+int kv_dpm_set_power_state(struct radeon_device *rdev);
+void kv_dpm_post_set_power_state(struct radeon_device *rdev);
+void kv_dpm_setup_asic(struct radeon_device *rdev);
+void kv_dpm_display_configuration_changed(struct radeon_device *rdev);
+void kv_dpm_fini(struct radeon_device *rdev);
+u32 kv_dpm_get_sclk(struct radeon_device *rdev, bool low);
+u32 kv_dpm_get_mclk(struct radeon_device *rdev, bool low);
+void kv_dpm_print_power_state(struct radeon_device *rdev,
+			      struct radeon_ps *ps);
+void kv_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev,
+						    struct seq_file *m);
+int kv_dpm_force_performance_level(struct radeon_device *rdev,
+				   enum radeon_dpm_forced_level level);
+void kv_dpm_powergate_uvd(struct radeon_device *rdev, bool gate);
+
+/* uvd v1.0 */
+uint32_t uvd_v1_0_get_rptr(struct radeon_device *rdev,
+                           struct radeon_ring *ring);
+uint32_t uvd_v1_0_get_wptr(struct radeon_device *rdev,
+                           struct radeon_ring *ring);
+void uvd_v1_0_set_wptr(struct radeon_device *rdev,
+                       struct radeon_ring *ring);
+
+int uvd_v1_0_init(struct radeon_device *rdev);
+void uvd_v1_0_fini(struct radeon_device *rdev);
+int uvd_v1_0_start(struct radeon_device *rdev);
+void uvd_v1_0_stop(struct radeon_device *rdev);
+
+int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring);
+int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
+void uvd_v1_0_semaphore_emit(struct radeon_device *rdev,
+			     struct radeon_ring *ring,
+			     struct radeon_semaphore *semaphore,
+			     bool emit_wait);
+void uvd_v1_0_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
+
+/* uvd v2.2 */
+int uvd_v2_2_resume(struct radeon_device *rdev);
+void uvd_v2_2_fence_emit(struct radeon_device *rdev,
+			 struct radeon_fence *fence);
+
+/* uvd v3.1 */
+void uvd_v3_1_semaphore_emit(struct radeon_device *rdev,
+			     struct radeon_ring *ring,
+			     struct radeon_semaphore *semaphore,
+			     bool emit_wait);
+
+/* uvd v4.2 */
+int uvd_v4_2_resume(struct radeon_device *rdev);
 
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index e3f3e88..404e25d 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -163,8 +163,8 @@
 		num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) /
 			sizeof(ATOM_GPIO_I2C_ASSIGMENT);
 
+		gpio = &i2c_info->asGPIO_Info[0];
 		for (i = 0; i < num_indices; i++) {
-			gpio = &i2c_info->asGPIO_Info[i];
 
 			radeon_lookup_i2c_gpio_quirks(rdev, gpio, i);
 
@@ -172,6 +172,8 @@
 				i2c = radeon_get_bus_rec_for_i2c_gpio(gpio);
 				break;
 			}
+			gpio = (ATOM_GPIO_I2C_ASSIGMENT *)
+				((u8 *)gpio + sizeof(ATOM_GPIO_I2C_ASSIGMENT));
 		}
 	}
 
@@ -195,9 +197,8 @@
 		num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) /
 			sizeof(ATOM_GPIO_I2C_ASSIGMENT);
 
+		gpio = &i2c_info->asGPIO_Info[0];
 		for (i = 0; i < num_indices; i++) {
-			gpio = &i2c_info->asGPIO_Info[i];
-
 			radeon_lookup_i2c_gpio_quirks(rdev, gpio, i);
 
 			i2c = radeon_get_bus_rec_for_i2c_gpio(gpio);
@@ -206,12 +207,14 @@
 				sprintf(stmp, "0x%x", i2c.i2c_id);
 				rdev->i2c_bus[i] = radeon_i2c_create(rdev->ddev, &i2c, stmp);
 			}
+			gpio = (ATOM_GPIO_I2C_ASSIGMENT *)
+				((u8 *)gpio + sizeof(ATOM_GPIO_I2C_ASSIGMENT));
 		}
 	}
 }
 
 static struct radeon_gpio_rec radeon_lookup_gpio(struct radeon_device *rdev,
-							u8 id)
+						 u8 id)
 {
 	struct atom_context *ctx = rdev->mode_info.atom_context;
 	struct radeon_gpio_rec gpio;
@@ -230,8 +233,8 @@
 		num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) /
 			sizeof(ATOM_GPIO_PIN_ASSIGNMENT);
 
+		pin = gpio_info->asGPIO_Pin;
 		for (i = 0; i < num_indices; i++) {
-			pin = &gpio_info->asGPIO_Pin[i];
 			if (id == pin->ucGPIO_ID) {
 				gpio.id = pin->ucGPIO_ID;
 				gpio.reg = le16_to_cpu(pin->usGpioPin_AIndex) * 4;
@@ -239,6 +242,8 @@
 				gpio.valid = true;
 				break;
 			}
+			pin = (ATOM_GPIO_PIN_ASSIGNMENT *)
+				((u8 *)pin + sizeof(ATOM_GPIO_PIN_ASSIGNMENT));
 		}
 	}
 
@@ -711,13 +716,16 @@
 								(ATOM_SRC_DST_TABLE_FOR_ONE_OBJECT *)
 								(ctx->bios + data_offset +
 								 le16_to_cpu(router_obj->asObjects[k].usSrcDstTableOffset));
+							u8 *num_dst_objs = (u8 *)
+								((u8 *)router_src_dst_table + 1 +
+								 (router_src_dst_table->ucNumberOfSrc * 2));
+							u16 *dst_objs = (u16 *)(num_dst_objs + 1);
 							int enum_id;
 
 							router.router_id = router_obj_id;
-							for (enum_id = 0; enum_id < router_src_dst_table->ucNumberOfDst;
-							     enum_id++) {
+							for (enum_id = 0; enum_id < (*num_dst_objs); enum_id++) {
 								if (le16_to_cpu(path->usConnObjectId) ==
-								    le16_to_cpu(router_src_dst_table->usDstObjectID[enum_id]))
+								    le16_to_cpu(dst_objs[enum_id]))
 									break;
 							}
 
@@ -1480,6 +1488,15 @@
 	uint8_t frev, crev;
 	int i, num_indices;
 
+	if (id == ASIC_INTERNAL_MEMORY_SS) {
+		if (!(rdev->mode_info.firmware_flags & ATOM_BIOS_INFO_MEMORY_CLOCK_SS_SUPPORT))
+			return false;
+	}
+	if (id == ASIC_INTERNAL_ENGINE_SS) {
+		if (!(rdev->mode_info.firmware_flags & ATOM_BIOS_INFO_ENGINE_CLOCK_SS_SUPPORT))
+			return false;
+	}
+
 	memset(ss, 0, sizeof(struct radeon_atom_ss));
 	if (atom_parse_data_header(mode_info->atom_context, index, &size,
 				   &frev, &crev, &data_offset)) {
@@ -1672,7 +1689,9 @@
 								kfree(edid);
 						}
 					}
-					record += sizeof(ATOM_FAKE_EDID_PATCH_RECORD);
+					record += fake_edid_record->ucFakeEDIDLength ?
+						fake_edid_record->ucFakeEDIDLength + 2 :
+						sizeof(ATOM_FAKE_EDID_PATCH_RECORD);
 					break;
 				case LCD_PANEL_RESOLUTION_RECORD_TYPE:
 					panel_res_record = (ATOM_PANEL_RESOLUTION_PATCH_RECORD *)record;
@@ -2237,6 +2256,11 @@
 				 (controller->ucFanParameters &
 				  ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with");
 			rdev->pm.int_thermal_type = THERMAL_TYPE_CI;
+		} else if (controller->ucType == ATOM_PP_THERMALCONTROLLER_KAVERI) {
+			DRM_INFO("Internal thermal controller %s fan control\n",
+				 (controller->ucFanParameters &
+				  ATOM_PP_FANPARAMETERS_NOFAN) ? "without" : "with");
+			rdev->pm.int_thermal_type = THERMAL_TYPE_KV;
 		} else if ((controller->ucType ==
 			    ATOM_PP_THERMALCONTROLLER_EXTERNAL_GPIO) ||
 			   (controller->ucType ==
@@ -2782,7 +2806,7 @@
 							     ATOM_PLL_CNTL_FLAG_PLL_POST_DIV_EN) ? true : false;
 				dividers->enable_dithen = (args.v3.ucCntlFlag &
 							   ATOM_PLL_CNTL_FLAG_FRACTION_DISABLE) ? false : true;
-				dividers->fb_div = le16_to_cpu(args.v3.ulFbDiv.usFbDiv);
+				dividers->whole_fb_div = le16_to_cpu(args.v3.ulFbDiv.usFbDiv);
 				dividers->frac_fb_div = le16_to_cpu(args.v3.ulFbDiv.usFbDivFrac);
 				dividers->ref_div = args.v3.ucRefDiv;
 				dividers->vco_mode = (args.v3.ucCntlFlag &
@@ -3077,6 +3101,121 @@
 	return radeon_atom_get_max_vddc(rdev, VOLTAGE_TYPE_VDDC, leakage_idx, voltage);
 }
 
+int radeon_atom_get_leakage_id_from_vbios(struct radeon_device *rdev,
+					  u16 *leakage_id)
+{
+	union set_voltage args;
+	int index = GetIndexIntoMasterTable(COMMAND, SetVoltage);
+	u8 frev, crev;
+
+	if (!atom_parse_cmd_header(rdev->mode_info.atom_context, index, &frev, &crev))
+		return -EINVAL;
+
+	switch (crev) {
+	case 3:
+	case 4:
+		args.v3.ucVoltageType = 0;
+		args.v3.ucVoltageMode = ATOM_GET_LEAKAGE_ID;
+		args.v3.usVoltageLevel = 0;
+
+		atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+
+		*leakage_id = le16_to_cpu(args.v3.usVoltageLevel);
+		break;
+	default:
+		DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int radeon_atom_get_leakage_vddc_based_on_leakage_params(struct radeon_device *rdev,
+							 u16 *vddc, u16 *vddci,
+							 u16 virtual_voltage_id,
+							 u16 vbios_voltage_id)
+{
+	int index = GetIndexIntoMasterTable(DATA, ASIC_ProfilingInfo);
+	u8 frev, crev;
+	u16 data_offset, size;
+	int i, j;
+	ATOM_ASIC_PROFILING_INFO_V2_1 *profile;
+	u16 *leakage_bin, *vddc_id_buf, *vddc_buf, *vddci_id_buf, *vddci_buf;
+
+	*vddc = 0;
+	*vddci = 0;
+
+	if (!atom_parse_data_header(rdev->mode_info.atom_context, index, &size,
+				    &frev, &crev, &data_offset))
+		return -EINVAL;
+
+	profile = (ATOM_ASIC_PROFILING_INFO_V2_1 *)
+		(rdev->mode_info.atom_context->bios + data_offset);
+
+	switch (frev) {
+	case 1:
+		return -EINVAL;
+	case 2:
+		switch (crev) {
+		case 1:
+			if (size < sizeof(ATOM_ASIC_PROFILING_INFO_V2_1))
+				return -EINVAL;
+			leakage_bin = (u16 *)
+				(rdev->mode_info.atom_context->bios + data_offset +
+				 le16_to_cpu(profile->usLeakageBinArrayOffset));
+			vddc_id_buf = (u16 *)
+				(rdev->mode_info.atom_context->bios + data_offset +
+				 le16_to_cpu(profile->usElbVDDC_IdArrayOffset));
+			vddc_buf = (u16 *)
+				(rdev->mode_info.atom_context->bios + data_offset +
+				 le16_to_cpu(profile->usElbVDDC_LevelArrayOffset));
+			vddci_id_buf = (u16 *)
+				(rdev->mode_info.atom_context->bios + data_offset +
+				 le16_to_cpu(profile->usElbVDDCI_IdArrayOffset));
+			vddci_buf = (u16 *)
+				(rdev->mode_info.atom_context->bios + data_offset +
+				 le16_to_cpu(profile->usElbVDDCI_LevelArrayOffset));
+
+			if (profile->ucElbVDDC_Num > 0) {
+				for (i = 0; i < profile->ucElbVDDC_Num; i++) {
+					if (vddc_id_buf[i] == virtual_voltage_id) {
+						for (j = 0; j < profile->ucLeakageBinNum; j++) {
+							if (vbios_voltage_id <= leakage_bin[j]) {
+								*vddc = vddc_buf[j * profile->ucElbVDDC_Num + i];
+								break;
+							}
+						}
+						break;
+					}
+				}
+			}
+			if (profile->ucElbVDDCI_Num > 0) {
+				for (i = 0; i < profile->ucElbVDDCI_Num; i++) {
+					if (vddci_id_buf[i] == virtual_voltage_id) {
+						for (j = 0; j < profile->ucLeakageBinNum; j++) {
+							if (vbios_voltage_id <= leakage_bin[j]) {
+								*vddci = vddci_buf[j * profile->ucElbVDDCI_Num + i];
+								break;
+							}
+						}
+						break;
+					}
+				}
+			}
+			break;
+		default:
+			DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
+			return -EINVAL;
+		}
+		break;
+	default:
+		DRM_ERROR("Unknown table version %d, %d\n", frev, crev);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 int radeon_atom_get_voltage_gpio_settings(struct radeon_device *rdev,
 					  u16 voltage_level, u8 voltage_type,
 					  u32 *gpio_value, u32 *gpio_mask)
@@ -3279,10 +3418,11 @@
 				ATOM_VOLTAGE_FORMULA_V2 *formula =
 					&voltage_object->v2.asFormula;
 				if (formula->ucNumOfVoltageEntries) {
+					VOLTAGE_LUT_ENTRY *lut = (VOLTAGE_LUT_ENTRY *)
+						((u8 *)&formula->asVIDAdjustEntries[0] +
+						 (sizeof(VOLTAGE_LUT_ENTRY) * (formula->ucNumOfVoltageEntries - 1)));
 					*max_voltage =
-						le16_to_cpu(formula->asVIDAdjustEntries[
-								    formula->ucNumOfVoltageEntries - 1
-								    ].usVoltageValue);
+						le16_to_cpu(lut->usVoltageValue);
 					return 0;
 				}
 			}
@@ -3442,11 +3582,13 @@
 				if (voltage_object) {
 					ATOM_VOLTAGE_FORMULA_V2 *formula =
 						&voltage_object->v2.asFormula;
+					VOLTAGE_LUT_ENTRY *lut;
 					if (formula->ucNumOfVoltageEntries > MAX_VOLTAGE_ENTRIES)
 						return -EINVAL;
+					lut = &formula->asVIDAdjustEntries[0];
 					for (i = 0; i < formula->ucNumOfVoltageEntries; i++) {
 						voltage_table->entries[i].value =
-							le16_to_cpu(formula->asVIDAdjustEntries[i].usVoltageValue);
+							le16_to_cpu(lut->usVoltageValue);
 						ret = radeon_atom_get_voltage_gpio_settings(rdev,
 											    voltage_table->entries[i].value,
 											    voltage_type,
@@ -3454,6 +3596,8 @@
 											    &voltage_table->mask_low);
 						if (ret)
 							return ret;
+						lut = (VOLTAGE_LUT_ENTRY *)
+							((u8 *)lut + sizeof(VOLTAGE_LUT_ENTRY));
 					}
 					voltage_table->count = formula->ucNumOfVoltageEntries;
 					return 0;
@@ -3473,13 +3617,17 @@
 				if (voltage_object) {
 					ATOM_GPIO_VOLTAGE_OBJECT_V3 *gpio =
 						&voltage_object->v3.asGpioVoltageObj;
+					VOLTAGE_LUT_ENTRY_V2 *lut;
 					if (gpio->ucGpioEntryNum > MAX_VOLTAGE_ENTRIES)
 						return -EINVAL;
+					lut = &gpio->asVolGpioLut[0];
 					for (i = 0; i < gpio->ucGpioEntryNum; i++) {
 						voltage_table->entries[i].value =
-							le16_to_cpu(gpio->asVolGpioLut[i].usVoltageValue);
+							le16_to_cpu(lut->usVoltageValue);
 						voltage_table->entries[i].smio_low =
-							le32_to_cpu(gpio->asVolGpioLut[i].ulVoltageId);
+							le32_to_cpu(lut->ulVoltageId);
+						lut = (VOLTAGE_LUT_ENTRY_V2 *)
+							((u8 *)lut + sizeof(VOLTAGE_LUT_ENTRY_V2));
 					}
 					voltage_table->mask_low = le32_to_cpu(gpio->ulGpioMaskVal);
 					voltage_table->count = gpio->ucGpioEntryNum;
@@ -3605,7 +3753,6 @@
 	union vram_info *vram_info;
 	u32 mem_timing_size = gddr5 ?
 		sizeof(ATOM_MEMORY_TIMING_FORMAT_V2) : sizeof(ATOM_MEMORY_TIMING_FORMAT);
-	u8 *p;
 
 	memset(mclk_range_table, 0, sizeof(struct atom_memory_clock_range_table));
 
@@ -3624,6 +3771,7 @@
 				if (module_index < vram_info->v1_4.ucNumOfVRAMModule) {
 					ATOM_VRAM_MODULE_V4 *vram_module =
 						(ATOM_VRAM_MODULE_V4 *)vram_info->v1_4.aVramInfo;
+					ATOM_MEMORY_TIMING_FORMAT *format;
 
 					for (i = 0; i < module_index; i++) {
 						if (le16_to_cpu(vram_module->usModuleSize) == 0)
@@ -3634,11 +3782,11 @@
 					mclk_range_table->num_entries = (u8)
 						((le16_to_cpu(vram_module->usModuleSize) - offsetof(ATOM_VRAM_MODULE_V4, asMemTiming)) /
 						 mem_timing_size);
-					p = (u8 *)&vram_module->asMemTiming[0];
+					format = &vram_module->asMemTiming[0];
 					for (i = 0; i < mclk_range_table->num_entries; i++) {
-						ATOM_MEMORY_TIMING_FORMAT *format = (ATOM_MEMORY_TIMING_FORMAT *)p;
 						mclk_range_table->mclk[i] = le32_to_cpu(format->ulClkRange);
-						p += mem_timing_size;
+						format = (ATOM_MEMORY_TIMING_FORMAT *)
+							((u8 *)format + mem_timing_size);
 					}
 				} else
 					return -EINVAL;
diff --git a/drivers/gpu/drm/radeon/radeon_blit_common.h b/drivers/gpu/drm/radeon/radeon_blit_common.h
deleted file mode 100644
index 4ecbe72..0000000
--- a/drivers/gpu/drm/radeon/radeon_blit_common.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright 2009 Advanced Micro Devices, Inc.
- * Copyright 2009 Red Hat Inc.
- * Copyright 2012 Alcatel-Lucent, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef __RADEON_BLIT_COMMON_H__
-
-#define DI_PT_RECTLIST        0x11
-#define DI_INDEX_SIZE_16_BIT  0x0
-#define DI_SRC_SEL_AUTO_INDEX 0x2
-
-#define FMT_8                 0x1
-#define FMT_5_6_5             0x8
-#define FMT_8_8_8_8           0x1a
-#define COLOR_8               0x1
-#define COLOR_5_6_5           0x8
-#define COLOR_8_8_8_8         0x1a
-
-#define RECT_UNIT_H           32
-#define RECT_UNIT_W           (RADEON_GPU_PAGE_SIZE / 4 / RECT_UNIT_H)
-
-#define __RADEON_BLIT_COMMON_H__
-#endif
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index 78edadc..68ce360 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -147,7 +147,7 @@
 					 enum radeon_combios_table_offset table)
 {
 	struct radeon_device *rdev = dev->dev_private;
-	int rev;
+	int rev, size;
 	uint16_t offset = 0, check_offset;
 
 	if (!rdev->bios)
@@ -156,174 +156,106 @@
 	switch (table) {
 		/* absolute offset tables */
 	case COMBIOS_ASIC_INIT_1_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0xc);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0xc;
 		break;
 	case COMBIOS_BIOS_SUPPORT_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x14);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x14;
 		break;
 	case COMBIOS_DAC_PROGRAMMING_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x2a);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x2a;
 		break;
 	case COMBIOS_MAX_COLOR_DEPTH_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x2c);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x2c;
 		break;
 	case COMBIOS_CRTC_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x2e);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x2e;
 		break;
 	case COMBIOS_PLL_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x30);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x30;
 		break;
 	case COMBIOS_TV_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x32);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x32;
 		break;
 	case COMBIOS_DFP_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x34);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x34;
 		break;
 	case COMBIOS_HW_CONFIG_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x36);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x36;
 		break;
 	case COMBIOS_MULTIMEDIA_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x38);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x38;
 		break;
 	case COMBIOS_TV_STD_PATCH_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x3e);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x3e;
 		break;
 	case COMBIOS_LCD_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x40);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x40;
 		break;
 	case COMBIOS_MOBILE_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x42);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x42;
 		break;
 	case COMBIOS_PLL_INIT_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x46);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x46;
 		break;
 	case COMBIOS_MEM_CONFIG_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x48);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x48;
 		break;
 	case COMBIOS_SAVE_MASK_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x4a);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x4a;
 		break;
 	case COMBIOS_HARDCODED_EDID_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x4c);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x4c;
 		break;
 	case COMBIOS_ASIC_INIT_2_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x4e);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x4e;
 		break;
 	case COMBIOS_CONNECTOR_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x50);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x50;
 		break;
 	case COMBIOS_DYN_CLK_1_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x52);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x52;
 		break;
 	case COMBIOS_RESERVED_MEM_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x54);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x54;
 		break;
 	case COMBIOS_EXT_TMDS_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x58);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x58;
 		break;
 	case COMBIOS_MEM_CLK_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x5a);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x5a;
 		break;
 	case COMBIOS_EXT_DAC_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x5c);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x5c;
 		break;
 	case COMBIOS_MISC_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x5e);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x5e;
 		break;
 	case COMBIOS_CRT_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x60);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x60;
 		break;
 	case COMBIOS_INTEGRATED_SYSTEM_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x62);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x62;
 		break;
 	case COMBIOS_COMPONENT_VIDEO_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x64);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x64;
 		break;
 	case COMBIOS_FAN_SPEED_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x66);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x66;
 		break;
 	case COMBIOS_OVERDRIVE_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x68);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x68;
 		break;
 	case COMBIOS_OEM_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x6a);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x6a;
 		break;
 	case COMBIOS_DYN_CLK_2_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x6c);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x6c;
 		break;
 	case COMBIOS_POWER_CONNECTOR_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x6e);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x6e;
 		break;
 	case COMBIOS_I2C_INFO_TABLE:
-		check_offset = RBIOS16(rdev->bios_header_start + 0x70);
-		if (check_offset)
-			offset = check_offset;
+		check_offset = 0x70;
 		break;
 		/* relative offset tables */
 	case COMBIOS_ASIC_INIT_3_TABLE:	/* offset from misc info */
@@ -439,11 +371,16 @@
 		}
 		break;
 	default:
+		check_offset = 0;
 		break;
 	}
 
-	return offset;
+	size = RBIOS8(rdev->bios_header_start + 0x6);
+	/* check absolute offset tables */
+	if (table < COMBIOS_ASIC_INIT_3_TABLE && check_offset && check_offset < size)
+		offset = RBIOS16(rdev->bios_header_start + check_offset);
 
+	return offset;
 }
 
 bool radeon_combios_check_hardcoded_edid(struct radeon_device *rdev)
@@ -965,16 +902,22 @@
 			dac = RBIOS8(dac_info + 0x3) & 0xf;
 			p_dac->ps2_pdac_adj = (bg << 8) | (dac);
 		}
-		/* if the values are all zeros, use the table */
-		if (p_dac->ps2_pdac_adj)
+		/* if the values are zeros, use the table */
+		if ((dac == 0) || (bg == 0))
+			found = 0;
+		else
 			found = 1;
 	}
 
 	/* quirks */
-	/* Radeon 9100 (R200) */
-	if ((dev->pdev->device == 0x514D) &&
+	/* Radeon 7000 (RV100) */
+	if (((dev->pdev->device == 0x5159) &&
 	    (dev->pdev->subsystem_vendor == 0x174B) &&
-	    (dev->pdev->subsystem_device == 0x7149)) {
+	    (dev->pdev->subsystem_device == 0x7c28)) ||
+	/* Radeon 9100 (R200) */
+	   ((dev->pdev->device == 0x514D) &&
+	    (dev->pdev->subsystem_vendor == 0x174B) &&
+	    (dev->pdev->subsystem_device == 0x7149))) {
 		/* vbios value is bad, use the default */
 		found = 0;
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_cp.c b/drivers/gpu/drm/radeon/radeon_cp.c
index efc4f64..3cae2bb 100644
--- a/drivers/gpu/drm/radeon/radeon_cp.c
+++ b/drivers/gpu/drm/radeon/radeon_cp.c
@@ -1444,13 +1444,13 @@
 	dev_priv->ring.end = ((u32 *) dev_priv->cp_ring->handle
 			      + init->ring_size / sizeof(u32));
 	dev_priv->ring.size = init->ring_size;
-	dev_priv->ring.size_l2qw = drm_order(init->ring_size / 8);
+	dev_priv->ring.size_l2qw = order_base_2(init->ring_size / 8);
 
 	dev_priv->ring.rptr_update = /* init->rptr_update */ 4096;
-	dev_priv->ring.rptr_update_l2qw = drm_order( /* init->rptr_update */ 4096 / 8);
+	dev_priv->ring.rptr_update_l2qw = order_base_2( /* init->rptr_update */ 4096 / 8);
 
 	dev_priv->ring.fetch_size = /* init->fetch_size */ 32;
-	dev_priv->ring.fetch_size_l2ow = drm_order( /* init->fetch_size */ 32 / 16);
+	dev_priv->ring.fetch_size_l2ow = order_base_2( /* init->fetch_size */ 32 / 16);
 	dev_priv->ring.tail_mask = (dev_priv->ring.size / sizeof(u32)) - 1;
 
 	dev_priv->ring.high_mark = RADEON_RING_HIGH_MARK;
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 13a130f..a560844 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -268,7 +268,7 @@
 			return -EINVAL;
 
 		/* we only support VM on some SI+ rings */
-		if ((p->rdev->asic->ring[p->ring].cs_parse == NULL) &&
+		if ((p->rdev->asic->ring[p->ring]->cs_parse == NULL) &&
 		   ((p->cs_flags & RADEON_CS_USE_VM) == 0)) {
 			DRM_ERROR("Ring %d requires VM!\n", p->ring);
 			return -EINVAL;
@@ -383,6 +383,10 @@
 		DRM_ERROR("Invalid command stream !\n");
 		return r;
 	}
+
+	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
+		radeon_uvd_note_usage(rdev);
+
 	radeon_cs_sync_rings(parser);
 	r = radeon_ib_schedule(rdev, &parser->ib, NULL);
 	if (r) {
@@ -474,6 +478,9 @@
 		return r;
 	}
 
+	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
+		radeon_uvd_note_usage(rdev);
+
 	mutex_lock(&rdev->vm_manager.lock);
 	mutex_lock(&vm->mutex);
 	r = radeon_vm_alloc_pt(rdev, vm);
@@ -552,10 +559,6 @@
 		return r;
 	}
 
-	/* XXX pick SD/HD/MVC */
-	if (parser.ring == R600_RING_TYPE_UVD_INDEX)
-		radeon_uvd_note_usage(rdev);
-
 	r = radeon_cs_ib_chunk(rdev, &parser);
 	if (r) {
 		goto out;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 82335e3..16cb879 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1003,16 +1003,28 @@
 		radeon_vram_limit = 0;
 	}
 
+	if (radeon_gart_size == -1) {
+		/* default to a larger gart size on newer asics */
+		if (rdev->family >= CHIP_RV770)
+			radeon_gart_size = 1024;
+		else
+			radeon_gart_size = 512;
+	}
 	/* gtt size must be power of two and greater or equal to 32M */
 	if (radeon_gart_size < 32) {
-		dev_warn(rdev->dev, "gart size (%d) too small forcing to 512M\n",
+		dev_warn(rdev->dev, "gart size (%d) too small\n",
 				radeon_gart_size);
-		radeon_gart_size = 512;
-
+		if (rdev->family >= CHIP_RV770)
+			radeon_gart_size = 1024;
+		else
+			radeon_gart_size = 512;
 	} else if (!radeon_check_pot_argument(radeon_gart_size)) {
 		dev_warn(rdev->dev, "gart size (%d) must be a power of 2\n",
 				radeon_gart_size);
-		radeon_gart_size = 512;
+		if (rdev->family >= CHIP_RV770)
+			radeon_gart_size = 1024;
+		else
+			radeon_gart_size = 512;
 	}
 	rdev->mc.gtt_size = (uint64_t)radeon_gart_size << 20;
 
@@ -1144,7 +1156,7 @@
 	rdev->family = flags & RADEON_FAMILY_MASK;
 	rdev->is_atom_bios = false;
 	rdev->usec_timeout = RADEON_MAX_USEC_TIMEOUT;
-	rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
+	rdev->mc.gtt_size = 512 * 1024 * 1024;
 	rdev->accel_working = false;
 	/* set up ring ids */
 	for (i = 0; i < RADEON_NUM_RINGS; i++) {
@@ -1163,6 +1175,7 @@
 	mutex_init(&rdev->gem.mutex);
 	mutex_init(&rdev->pm.mutex);
 	mutex_init(&rdev->gpu_clock_mutex);
+	mutex_init(&rdev->srbm_mutex);
 	init_rwsem(&rdev->pm.mclk_lock);
 	init_rwsem(&rdev->exclusive_lock);
 	init_waitqueue_head(&rdev->irq.vblank_queue);
@@ -1269,7 +1282,7 @@
 	/* this will fail for cards that aren't VGA class devices, just
 	 * ignore it */
 	vga_client_register(rdev->pdev, rdev, NULL, radeon_vga_set_decode);
-	vga_switcheroo_register_client(rdev->pdev, &radeon_switcheroo_ops);
+	vga_switcheroo_register_client(rdev->pdev, &radeon_switcheroo_ops, false);
 
 	r = radeon_init(rdev);
 	if (r)
@@ -1519,6 +1532,7 @@
 	radeon_save_bios_scratch_regs(rdev);
 	/* block TTM */
 	resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev);
+	radeon_pm_suspend(rdev);
 	radeon_suspend(rdev);
 
 	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
@@ -1564,6 +1578,7 @@
 		}
 	}
 
+	radeon_pm_resume(rdev);
 	drm_helper_resume_force_mode(rdev->ddev);
 
 	ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index c2b67b4..b055bdd 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -345,7 +345,8 @@
 
 static int radeon_crtc_page_flip(struct drm_crtc *crtc,
 				 struct drm_framebuffer *fb,
-				 struct drm_pending_vblank_event *event)
+				 struct drm_pending_vblank_event *event,
+				 uint32_t page_flip_flags)
 {
 	struct drm_device *dev = crtc->dev;
 	struct radeon_device *rdev = dev->dev_private;
@@ -1254,41 +1255,41 @@
 	for (i = 0; i < RADEON_MAX_AFMT_BLOCKS; i++)
 		rdev->mode_info.afmt[i] = NULL;
 
-	if (ASIC_IS_DCE6(rdev)) {
-		/* todo */
+	if (ASIC_IS_NODCE(rdev)) {
+		/* nothing to do */
 	} else if (ASIC_IS_DCE4(rdev)) {
+		static uint32_t eg_offsets[] = {
+			EVERGREEN_CRTC0_REGISTER_OFFSET,
+			EVERGREEN_CRTC1_REGISTER_OFFSET,
+			EVERGREEN_CRTC2_REGISTER_OFFSET,
+			EVERGREEN_CRTC3_REGISTER_OFFSET,
+			EVERGREEN_CRTC4_REGISTER_OFFSET,
+			EVERGREEN_CRTC5_REGISTER_OFFSET,
+			0x13830 - 0x7030,
+		};
+		int num_afmt;
+
+		/* DCE8 has 7 audio blocks tied to DIG encoders */
+		/* DCE6 has 6 audio blocks tied to DIG encoders */
 		/* DCE4/5 has 6 audio blocks tied to DIG encoders */
 		/* DCE4.1 has 2 audio blocks tied to DIG encoders */
-		rdev->mode_info.afmt[0] = kzalloc(sizeof(struct radeon_afmt), GFP_KERNEL);
-		if (rdev->mode_info.afmt[0]) {
-			rdev->mode_info.afmt[0]->offset = EVERGREEN_CRTC0_REGISTER_OFFSET;
-			rdev->mode_info.afmt[0]->id = 0;
-		}
-		rdev->mode_info.afmt[1] = kzalloc(sizeof(struct radeon_afmt), GFP_KERNEL);
-		if (rdev->mode_info.afmt[1]) {
-			rdev->mode_info.afmt[1]->offset = EVERGREEN_CRTC1_REGISTER_OFFSET;
-			rdev->mode_info.afmt[1]->id = 1;
-		}
-		if (!ASIC_IS_DCE41(rdev)) {
-			rdev->mode_info.afmt[2] = kzalloc(sizeof(struct radeon_afmt), GFP_KERNEL);
-			if (rdev->mode_info.afmt[2]) {
-				rdev->mode_info.afmt[2]->offset = EVERGREEN_CRTC2_REGISTER_OFFSET;
-				rdev->mode_info.afmt[2]->id = 2;
-			}
-			rdev->mode_info.afmt[3] = kzalloc(sizeof(struct radeon_afmt), GFP_KERNEL);
-			if (rdev->mode_info.afmt[3]) {
-				rdev->mode_info.afmt[3]->offset = EVERGREEN_CRTC3_REGISTER_OFFSET;
-				rdev->mode_info.afmt[3]->id = 3;
-			}
-			rdev->mode_info.afmt[4] = kzalloc(sizeof(struct radeon_afmt), GFP_KERNEL);
-			if (rdev->mode_info.afmt[4]) {
-				rdev->mode_info.afmt[4]->offset = EVERGREEN_CRTC4_REGISTER_OFFSET;
-				rdev->mode_info.afmt[4]->id = 4;
-			}
-			rdev->mode_info.afmt[5] = kzalloc(sizeof(struct radeon_afmt), GFP_KERNEL);
-			if (rdev->mode_info.afmt[5]) {
-				rdev->mode_info.afmt[5]->offset = EVERGREEN_CRTC5_REGISTER_OFFSET;
-				rdev->mode_info.afmt[5]->id = 5;
+		if (ASIC_IS_DCE8(rdev))
+			num_afmt = 7;
+		else if (ASIC_IS_DCE6(rdev))
+			num_afmt = 6;
+		else if (ASIC_IS_DCE5(rdev))
+			num_afmt = 6;
+		else if (ASIC_IS_DCE41(rdev))
+			num_afmt = 2;
+		else /* DCE4 */
+			num_afmt = 6;
+
+		BUG_ON(num_afmt > ARRAY_SIZE(eg_offsets));
+		for (i = 0; i < num_afmt; i++) {
+			rdev->mode_info.afmt[i] = kzalloc(sizeof(struct radeon_afmt), GFP_KERNEL);
+			if (rdev->mode_info.afmt[i]) {
+				rdev->mode_info.afmt[i]->offset = eg_offsets[i];
+				rdev->mode_info.afmt[i]->id = i;
 			}
 		}
 	} else if (ASIC_IS_DCE3(rdev)) {
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 29876b1..cb4445f 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -81,7 +81,6 @@
 #define KMS_DRIVER_PATCHLEVEL	0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
-int radeon_driver_firstopen_kms(struct drm_device *dev);
 void radeon_driver_lastclose_kms(struct drm_device *dev);
 int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv);
 void radeon_driver_postclose_kms(struct drm_device *dev,
@@ -101,8 +100,6 @@
 int radeon_driver_irq_postinstall_kms(struct drm_device *dev);
 void radeon_driver_irq_uninstall_kms(struct drm_device *dev);
 irqreturn_t radeon_driver_irq_handler_kms(DRM_IRQ_ARGS);
-int radeon_dma_ioctl_kms(struct drm_device *dev, void *data,
-			 struct drm_file *file_priv);
 int radeon_gem_object_init(struct drm_gem_object *obj);
 void radeon_gem_object_free(struct drm_gem_object *obj);
 int radeon_gem_object_open(struct drm_gem_object *obj,
@@ -111,7 +108,7 @@
 				struct drm_file *file_priv);
 extern int radeon_get_crtc_scanoutpos(struct drm_device *dev, int crtc,
 				      int *vpos, int *hpos);
-extern struct drm_ioctl_desc radeon_ioctls_kms[];
+extern const struct drm_ioctl_desc radeon_ioctls_kms[];
 extern int radeon_max_kms_ioctl;
 int radeon_mmap(struct file *filp, struct vm_area_struct *vma);
 int radeon_mode_dumb_mmap(struct drm_file *filp,
@@ -120,9 +117,6 @@
 int radeon_mode_dumb_create(struct drm_file *file_priv,
 			    struct drm_device *dev,
 			    struct drm_mode_create_dumb *args);
-int radeon_mode_dumb_destroy(struct drm_file *file_priv,
-			     struct drm_device *dev,
-			     uint32_t handle);
 struct sg_table *radeon_gem_prime_get_sg_table(struct drm_gem_object *obj);
 struct drm_gem_object *radeon_gem_prime_import_sg_table(struct drm_device *dev,
 							size_t size,
@@ -154,7 +148,7 @@
 int radeon_r4xx_atom = 0;
 int radeon_agpmode = 0;
 int radeon_vram_limit = 0;
-int radeon_gart_size = 512; /* default gart size */
+int radeon_gart_size = -1; /* auto */
 int radeon_benchmarking = 0;
 int radeon_testing = 0;
 int radeon_connector_table = 0;
@@ -187,7 +181,7 @@
 MODULE_PARM_DESC(agpmode, "AGP Mode (-1 == PCI)");
 module_param_named(agpmode, radeon_agpmode, int, 0444);
 
-MODULE_PARM_DESC(gartsize, "Size of PCIE/IGP gart to setup in megabytes (32, 64, etc)");
+MODULE_PARM_DESC(gartsize, "Size of PCIE/IGP gart to setup in megabytes (32, 64, etc., -1 = auto)");
 module_param_named(gartsize, radeon_gart_size, int, 0600);
 
 MODULE_PARM_DESC(benchmark, "Run benchmark");
@@ -272,7 +266,6 @@
 	.unlocked_ioctl = drm_ioctl,
 	.mmap = drm_mmap,
 	.poll = drm_poll,
-	.fasync = drm_fasync,
 	.read = drm_read,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = radeon_compat_ioctl,
@@ -282,7 +275,7 @@
 
 static struct drm_driver driver_old = {
 	.driver_features =
-	    DRIVER_USE_AGP | DRIVER_USE_MTRR | DRIVER_PCI_DMA | DRIVER_SG |
+	    DRIVER_USE_AGP | DRIVER_PCI_DMA | DRIVER_SG |
 	    DRIVER_HAVE_IRQ | DRIVER_HAVE_DMA | DRIVER_IRQ_SHARED,
 	.dev_priv_size = sizeof(drm_radeon_buf_priv_t),
 	.load = radeon_driver_load,
@@ -381,7 +374,6 @@
 	.unlocked_ioctl = drm_ioctl,
 	.mmap = radeon_mmap,
 	.poll = drm_poll,
-	.fasync = drm_fasync,
 	.read = drm_read,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = radeon_kms_compat_ioctl,
@@ -390,12 +382,11 @@
 
 static struct drm_driver kms_driver = {
 	.driver_features =
-	    DRIVER_USE_AGP | DRIVER_USE_MTRR | DRIVER_PCI_DMA | DRIVER_SG |
-	    DRIVER_HAVE_IRQ | DRIVER_HAVE_DMA | DRIVER_IRQ_SHARED | DRIVER_GEM |
-	    DRIVER_PRIME,
+	    DRIVER_USE_AGP |
+	    DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM |
+	    DRIVER_PRIME | DRIVER_RENDER,
 	.dev_priv_size = 0,
 	.load = radeon_driver_load_kms,
-	.firstopen = radeon_driver_firstopen_kms,
 	.open = radeon_driver_open_kms,
 	.preclose = radeon_driver_preclose_kms,
 	.postclose = radeon_driver_postclose_kms,
@@ -421,10 +412,9 @@
 	.gem_free_object = radeon_gem_object_free,
 	.gem_open_object = radeon_gem_object_open,
 	.gem_close_object = radeon_gem_object_close,
-	.dma_ioctl = radeon_dma_ioctl_kms,
 	.dumb_create = radeon_mode_dumb_create,
 	.dumb_map_offset = radeon_mode_dumb_mmap,
-	.dumb_destroy = radeon_mode_dumb_destroy,
+	.dumb_destroy = drm_gem_dumb_destroy,
 	.fops = &radeon_driver_kms_fops,
 
 	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 7ddb0ef..ddb8f8e 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -782,7 +782,7 @@
 
 		} else {
 			/* put fence directly behind firmware */
-			index = ALIGN(rdev->uvd.fw_size, 8);
+			index = ALIGN(rdev->uvd_fw->size, 8);
 			rdev->fence_drv[ring].cpu_addr = rdev->uvd.cpu_addr + index;
 			rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr + index;
 		}
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index d9d31a3..b990b1a 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -207,7 +207,6 @@
 	if (rdev->gart.robj == NULL) {
 		return;
 	}
-	radeon_gart_table_vram_unpin(rdev);
 	radeon_bo_unref(&rdev->gart.robj);
 }
 
@@ -466,7 +465,7 @@
 		size += rdev->vm_manager.max_pfn * 8;
 		size *= 2;
 		r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager,
-					      RADEON_VM_PTB_ALIGN(size),
+					      RADEON_GPU_PAGE_ALIGN(size),
 					      RADEON_VM_PTB_ALIGN_SIZE,
 					      RADEON_GEM_DOMAIN_VRAM);
 		if (r) {
@@ -621,7 +620,7 @@
 	}
 
 retry:
-	pd_size = RADEON_VM_PTB_ALIGN(radeon_vm_directory_size(rdev));
+	pd_size = radeon_vm_directory_size(rdev);
 	r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
 			     &vm->page_directory, pd_size,
 			     RADEON_VM_PTB_ALIGN_SIZE, false);
@@ -953,8 +952,8 @@
 retry:
 		r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
 				     &vm->page_tables[pt_idx],
-				     RADEON_VM_PTB_ALIGN(RADEON_VM_PTE_COUNT * 8),
-				     RADEON_VM_PTB_ALIGN_SIZE, false);
+				     RADEON_VM_PTE_COUNT * 8,
+				     RADEON_GPU_PAGE_SIZE, false);
 
 		if (r == -ENOMEM) {
 			r = radeon_vm_evict(rdev, vm);
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index aa79603..dce99c8 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -570,13 +570,6 @@
 	return 0;
 }
 
-int radeon_mode_dumb_destroy(struct drm_file *file_priv,
-			     struct drm_device *dev,
-			     uint32_t handle)
-{
-	return drm_gem_handle_delete(file_priv, handle);
-}
-
 #if defined(CONFIG_DEBUG_FS)
 static int radeon_debugfs_gem_info(struct seq_file *m, void *data)
 {
diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c
index 081886b..cc9e848 100644
--- a/drivers/gpu/drm/radeon/radeon_irq_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c
@@ -275,17 +275,19 @@
 			dev_info(rdev->dev, "radeon: using MSI.\n");
 		}
 	}
-	rdev->irq.installed = true;
-	r = drm_irq_install(rdev->ddev);
-	if (r) {
-		rdev->irq.installed = false;
-		return r;
-	}
 
 	INIT_WORK(&rdev->hotplug_work, radeon_hotplug_work_func);
 	INIT_WORK(&rdev->audio_work, r600_audio_update_hdmi);
 	INIT_WORK(&rdev->reset_work, radeon_irq_reset_work_func);
 
+	rdev->irq.installed = true;
+	r = drm_irq_install(rdev->ddev);
+	if (r) {
+		rdev->irq.installed = false;
+		flush_work(&rdev->hotplug_work);
+		return r;
+	}
+
 	DRM_INFO("radeon: irq initialized.\n");
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index 49ff3d1..61580dd 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -433,6 +433,9 @@
 			return -EINVAL;
 		}
 		break;
+	case RADEON_INFO_SI_CP_DMA_COMPUTE:
+		*value = 1;
+		break;
 	default:
 		DRM_DEBUG_KMS("Invalid request %d\n", info->request);
 		return -EINVAL;
@@ -449,19 +452,6 @@
  * Outdated mess for old drm with Xorg being in charge (void function now).
  */
 /**
- * radeon_driver_firstopen_kms - drm callback for first open
- *
- * @dev: drm dev pointer
- *
- * Nothing to be done for KMS (all asics).
- * Returns 0 on success.
- */
-int radeon_driver_firstopen_kms(struct drm_device *dev)
-{
-	return 0;
-}
-
-/**
  * radeon_driver_firstopen_kms - drm callback for last close
  *
  * @dev: drm dev pointer
@@ -683,16 +673,6 @@
 						     drmcrtc);
 }
 
-/*
- * IOCTL.
- */
-int radeon_dma_ioctl_kms(struct drm_device *dev, void *data,
-			 struct drm_file *file_priv)
-{
-	/* Not valid in KMS. */
-	return -EINVAL;
-}
-
 #define KMS_INVALID_IOCTL(name)						\
 int name(struct drm_device *dev, void *data, struct drm_file *file_priv)\
 {									\
@@ -732,7 +712,7 @@
 KMS_INVALID_IOCTL(radeon_surface_free_kms)
 
 
-struct drm_ioctl_desc radeon_ioctls_kms[] = {
+const struct drm_ioctl_desc radeon_ioctls_kms[] = {
 	DRM_IOCTL_DEF_DRV(RADEON_CP_INIT, radeon_cp_init_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(RADEON_CP_START, radeon_cp_start_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(RADEON_CP_STOP, radeon_cp_stop_kms, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
@@ -761,18 +741,18 @@
 	DRM_IOCTL_DEF_DRV(RADEON_SURF_ALLOC, radeon_surface_alloc_kms, DRM_AUTH),
 	DRM_IOCTL_DEF_DRV(RADEON_SURF_FREE, radeon_surface_free_kms, DRM_AUTH),
 	/* KMS */
-	DRM_IOCTL_DEF_DRV(RADEON_GEM_INFO, radeon_gem_info_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(RADEON_GEM_CREATE, radeon_gem_create_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(RADEON_GEM_MMAP, radeon_gem_mmap_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(RADEON_GEM_SET_DOMAIN, radeon_gem_set_domain_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_INFO, radeon_gem_info_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_CREATE, radeon_gem_create_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_MMAP, radeon_gem_mmap_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_SET_DOMAIN, radeon_gem_set_domain_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_PREAD, radeon_gem_pread_ioctl, DRM_AUTH|DRM_UNLOCKED),
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_PWRITE, radeon_gem_pwrite_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(RADEON_GEM_WAIT_IDLE, radeon_gem_wait_idle_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(RADEON_CS, radeon_cs_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(RADEON_INFO, radeon_info_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(RADEON_GEM_SET_TILING, radeon_gem_set_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_WAIT_IDLE, radeon_gem_wait_idle_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(RADEON_CS, radeon_cs_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(RADEON_INFO, radeon_info_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_SET_TILING, radeon_gem_set_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 };
 int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms);
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 8296632..d908d8d 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -225,6 +225,7 @@
 	int offset;
 	bool last_buffer_filled_status;
 	int id;
+	struct r600_audio_pin *pin;
 };
 
 struct radeon_mode_info {
@@ -233,7 +234,7 @@
 	enum radeon_connector_table connector_table;
 	bool mode_config_initialized;
 	struct radeon_crtc *crtcs[6];
-	struct radeon_afmt *afmt[6];
+	struct radeon_afmt *afmt[7];
 	/* DVI-I properties */
 	struct drm_property *coherent_mode_property;
 	/* DAC enable load detect */
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 2020bf4..c0fa4aa9 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -142,7 +142,6 @@
 		return r;
 	}
 	bo->rdev = rdev;
-	bo->gem_base.driver_private = NULL;
 	bo->surface_reg = -1;
 	INIT_LIST_HEAD(&bo->list);
 	INIT_LIST_HEAD(&bo->va);
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index 49c82c4..209b111 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -113,13 +113,10 @@
  * @bo:	radeon object for which we query the offset
  *
  * Returns mmap offset of the object.
- *
- * Note: addr_space_offset is constant after ttm bo init thus isn't protected
- * by any lock.
  */
 static inline u64 radeon_bo_mmap_offset(struct radeon_bo *bo)
 {
-	return bo->tbo.addr_space_offset;
+	return drm_vma_node_offset_addr(&bo->tbo.vma_node);
 }
 
 extern int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type,
diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index f374c46..d755536 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -569,6 +569,8 @@
 	case THERMAL_TYPE_NI:
 	case THERMAL_TYPE_SUMO:
 	case THERMAL_TYPE_SI:
+	case THERMAL_TYPE_CI:
+	case THERMAL_TYPE_KV:
 		if (rdev->asic->pm.get_temperature == NULL)
 			return err;
 		rdev->pm.int_hwmon_dev = hwmon_device_register(rdev->dev);
@@ -624,7 +626,15 @@
 			/* switch back the user state */
 			dpm_state = rdev->pm.dpm.user_state;
 	}
-	radeon_dpm_enable_power_state(rdev, dpm_state);
+	mutex_lock(&rdev->pm.mutex);
+	if (dpm_state == POWER_STATE_TYPE_INTERNAL_THERMAL)
+		rdev->pm.dpm.thermal_active = true;
+	else
+		rdev->pm.dpm.thermal_active = false;
+	rdev->pm.dpm.state = dpm_state;
+	mutex_unlock(&rdev->pm.mutex);
+
+	radeon_pm_compute_clocks(rdev);
 }
 
 static struct radeon_ps *radeon_dpm_pick_power_state(struct radeon_device *rdev,
@@ -687,7 +697,10 @@
 			break;
 		/* internal states */
 		case POWER_STATE_TYPE_INTERNAL_UVD:
-			return rdev->pm.dpm.uvd_ps;
+			if (rdev->pm.dpm.uvd_ps)
+				return rdev->pm.dpm.uvd_ps;
+			else
+				break;
 		case POWER_STATE_TYPE_INTERNAL_UVD_SD:
 			if (ps->class & ATOM_PPLIB_CLASSIFICATION_SDSTATE)
 				return ps;
@@ -729,10 +742,17 @@
 	/* use a fallback state if we didn't match */
 	switch (dpm_state) {
 	case POWER_STATE_TYPE_INTERNAL_UVD_SD:
+		dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_HD;
+		goto restart_search;
 	case POWER_STATE_TYPE_INTERNAL_UVD_HD:
 	case POWER_STATE_TYPE_INTERNAL_UVD_HD2:
 	case POWER_STATE_TYPE_INTERNAL_UVD_MVC:
-		return rdev->pm.dpm.uvd_ps;
+		if (rdev->pm.dpm.uvd_ps) {
+			return rdev->pm.dpm.uvd_ps;
+		} else {
+			dpm_state = POWER_STATE_TYPE_PERFORMANCE;
+			goto restart_search;
+		}
 	case POWER_STATE_TYPE_INTERNAL_THERMAL:
 		dpm_state = POWER_STATE_TYPE_INTERNAL_ACPI;
 		goto restart_search;
@@ -850,38 +870,51 @@
 
 	radeon_dpm_post_set_power_state(rdev);
 
+	/* force low perf level for thermal */
+	if (rdev->pm.dpm.thermal_active &&
+	    rdev->asic->dpm.force_performance_level) {
+		radeon_dpm_force_performance_level(rdev, RADEON_DPM_FORCED_LEVEL_LOW);
+	}
+
 done:
 	mutex_unlock(&rdev->ring_lock);
 	up_write(&rdev->pm.mclk_lock);
 	mutex_unlock(&rdev->ddev->struct_mutex);
 }
 
-void radeon_dpm_enable_power_state(struct radeon_device *rdev,
-				   enum radeon_pm_state_type dpm_state)
+void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable)
 {
-	if (!rdev->pm.dpm_enabled)
-		return;
+	enum radeon_pm_state_type dpm_state;
 
-	mutex_lock(&rdev->pm.mutex);
-	switch (dpm_state) {
-	case POWER_STATE_TYPE_INTERNAL_THERMAL:
-		rdev->pm.dpm.thermal_active = true;
-		break;
-	case POWER_STATE_TYPE_INTERNAL_UVD:
-	case POWER_STATE_TYPE_INTERNAL_UVD_SD:
-	case POWER_STATE_TYPE_INTERNAL_UVD_HD:
-	case POWER_STATE_TYPE_INTERNAL_UVD_HD2:
-	case POWER_STATE_TYPE_INTERNAL_UVD_MVC:
-		rdev->pm.dpm.uvd_active = true;
-		break;
-	default:
-		rdev->pm.dpm.thermal_active = false;
-		rdev->pm.dpm.uvd_active = false;
-		break;
+	if (rdev->asic->dpm.powergate_uvd) {
+		mutex_lock(&rdev->pm.mutex);
+		/* enable/disable UVD */
+		radeon_dpm_powergate_uvd(rdev, !enable);
+		mutex_unlock(&rdev->pm.mutex);
+	} else {
+		if (enable) {
+			mutex_lock(&rdev->pm.mutex);
+			rdev->pm.dpm.uvd_active = true;
+			if ((rdev->pm.dpm.sd == 1) && (rdev->pm.dpm.hd == 0))
+				dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_SD;
+			else if ((rdev->pm.dpm.sd == 2) && (rdev->pm.dpm.hd == 0))
+				dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_HD;
+			else if ((rdev->pm.dpm.sd == 0) && (rdev->pm.dpm.hd == 1))
+				dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_HD;
+			else if ((rdev->pm.dpm.sd == 0) && (rdev->pm.dpm.hd == 2))
+				dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_HD2;
+			else
+				dpm_state = POWER_STATE_TYPE_INTERNAL_UVD;
+			rdev->pm.dpm.state = dpm_state;
+			mutex_unlock(&rdev->pm.mutex);
+		} else {
+			mutex_lock(&rdev->pm.mutex);
+			rdev->pm.dpm.uvd_active = false;
+			mutex_unlock(&rdev->pm.mutex);
+		}
+
+		radeon_pm_compute_clocks(rdev);
 	}
-	rdev->pm.dpm.state = dpm_state;
-	mutex_unlock(&rdev->pm.mutex);
-	radeon_pm_compute_clocks(rdev);
 }
 
 static void radeon_pm_suspend_old(struct radeon_device *rdev)
@@ -1176,7 +1209,17 @@
 	case CHIP_VERDE:
 	case CHIP_OLAND:
 	case CHIP_HAINAN:
-		if (radeon_dpm == 1)
+	case CHIP_BONAIRE:
+	case CHIP_KABINI:
+	case CHIP_KAVERI:
+		/* DPM requires the RLC, RV770+ dGPU requires SMC */
+		if (!rdev->rlc_fw)
+			rdev->pm.pm_method = PM_METHOD_PROFILE;
+		else if ((rdev->family >= CHIP_RV770) &&
+			 (!(rdev->flags & RADEON_IS_IGP)) &&
+			 (!rdev->smc_fw))
+			rdev->pm.pm_method = PM_METHOD_PROFILE;
+		else if (radeon_dpm == 1)
 			rdev->pm.pm_method = PM_METHOD_DPM;
 		else
 			rdev->pm.pm_method = PM_METHOD_PROFILE;
diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c
index 65b9eab..2007456 100644
--- a/drivers/gpu/drm/radeon/radeon_prime.c
+++ b/drivers/gpu/drm/radeon/radeon_prime.c
@@ -68,7 +68,6 @@
 			       RADEON_GEM_DOMAIN_GTT, sg, &bo);
 	if (ret)
 		return ERR_PTR(ret);
-	bo->gem_base.driver_private = bo;
 
 	mutex_lock(&rdev->gem.mutex);
 	list_add_tail(&bo->list, &rdev->gem.objects);
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index fb5ea62..46a25f0 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -363,11 +363,10 @@
 {
 	u32 rptr;
 
-	if (rdev->wb.enabled && ring != &rdev->ring[R600_RING_TYPE_UVD_INDEX])
+	if (rdev->wb.enabled)
 		rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
 	else
 		rptr = RREG32(ring->rptr_reg);
-	rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
 
 	return rptr;
 }
@@ -378,7 +377,6 @@
 	u32 wptr;
 
 	wptr = RREG32(ring->wptr_reg);
-	wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
 
 	return wptr;
 }
@@ -386,7 +384,7 @@
 void radeon_ring_generic_set_wptr(struct radeon_device *rdev,
 				  struct radeon_ring *ring)
 {
-	WREG32(ring->wptr_reg, (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask);
+	WREG32(ring->wptr_reg, ring->wptr);
 	(void)RREG32(ring->wptr_reg);
 }
 
@@ -719,16 +717,13 @@
  * @rptr_offs: offset of the rptr writeback location in the WB buffer
  * @rptr_reg: MMIO offset of the rptr register
  * @wptr_reg: MMIO offset of the wptr register
- * @ptr_reg_shift: bit offset of the rptr/wptr values
- * @ptr_reg_mask: bit mask of the rptr/wptr values
  * @nop: nop packet for this ring
  *
  * Initialize the driver information for the selected ring (all asics).
  * Returns 0 on success, error on failure.
  */
 int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ring_size,
-		     unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg,
-		     u32 ptr_reg_shift, u32 ptr_reg_mask, u32 nop)
+		     unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg, u32 nop)
 {
 	int r;
 
@@ -736,8 +731,6 @@
 	ring->rptr_offs = rptr_offs;
 	ring->rptr_reg = rptr_reg;
 	ring->wptr_reg = wptr_reg;
-	ring->ptr_reg_shift = ptr_reg_shift;
-	ring->ptr_reg_mask = ptr_reg_mask;
 	ring->nop = nop;
 	/* Allocate ring buffer */
 	if (ring->ring_obj == NULL) {
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 6c0ce89..71245d6 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -203,7 +203,9 @@
 
 static int radeon_verify_access(struct ttm_buffer_object *bo, struct file *filp)
 {
-	return 0;
+	struct radeon_bo *rbo = container_of(bo, struct radeon_bo, tbo);
+
+	return drm_vma_node_verify_access(&rbo->gem_base.vma_node, filp);
 }
 
 static void radeon_move_null(struct ttm_buffer_object *bo,
diff --git a/drivers/gpu/drm/radeon/radeon_ucode.h b/drivers/gpu/drm/radeon/radeon_ucode.h
index d8b05f7..3385836 100644
--- a/drivers/gpu/drm/radeon/radeon_ucode.h
+++ b/drivers/gpu/drm/radeon/radeon_ucode.h
@@ -35,6 +35,12 @@
 #define SI_PFP_UCODE_SIZE            2144
 #define SI_PM4_UCODE_SIZE            2144
 #define SI_CE_UCODE_SIZE             2144
+#define CIK_PFP_UCODE_SIZE           2144
+#define CIK_ME_UCODE_SIZE            2144
+#define CIK_CE_UCODE_SIZE            2144
+
+/* MEC */
+#define CIK_MEC_UCODE_SIZE           4192
 
 /* RLC */
 #define R600_RLC_UCODE_SIZE          768
@@ -43,12 +49,20 @@
 #define CAYMAN_RLC_UCODE_SIZE        1024
 #define ARUBA_RLC_UCODE_SIZE         1536
 #define SI_RLC_UCODE_SIZE            2048
+#define BONAIRE_RLC_UCODE_SIZE       2048
+#define KB_RLC_UCODE_SIZE            2560
+#define KV_RLC_UCODE_SIZE            2560
 
 /* MC */
 #define BTC_MC_UCODE_SIZE            6024
 #define CAYMAN_MC_UCODE_SIZE         6037
 #define SI_MC_UCODE_SIZE             7769
 #define OLAND_MC_UCODE_SIZE          7863
+#define CIK_MC_UCODE_SIZE            7866
+
+/* SDMA */
+#define CIK_SDMA_UCODE_SIZE          1050
+#define CIK_SDMA_UCODE_VERSION       64
 
 /* SMC */
 #define RV770_SMC_UCODE_START        0x0100
@@ -126,4 +140,7 @@
 #define HAINAN_SMC_UCODE_START       0x10000
 #define HAINAN_SMC_UCODE_SIZE        0xe67C
 
+#define BONAIRE_SMC_UCODE_START      0x20000
+#define BONAIRE_SMC_UCODE_SIZE       0x1FDEC
+
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
index 414fd14..1a01bbf 100644
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@@ -56,7 +56,6 @@
 
 int radeon_uvd_init(struct radeon_device *rdev)
 {
-	const struct firmware *fw;
 	unsigned long bo_size;
 	const char *fw_name;
 	int i, r;
@@ -105,14 +104,14 @@
 		return -EINVAL;
 	}
 
-	r = request_firmware(&fw, fw_name, rdev->dev);
+	r = request_firmware(&rdev->uvd_fw, fw_name, rdev->dev);
 	if (r) {
 		dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n",
 			fw_name);
 		return r;
 	}
 
-	bo_size = RADEON_GPU_PAGE_ALIGN(fw->size + 8) +
+	bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 8) +
 		  RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE;
 	r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true,
 			     RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->uvd.vcpu_bo);
@@ -145,15 +144,10 @@
 
 	radeon_bo_unreserve(rdev->uvd.vcpu_bo);
 
-	rdev->uvd.fw_size = fw->size;
-	memset(rdev->uvd.cpu_addr, 0, bo_size);
-	memcpy(rdev->uvd.cpu_addr, fw->data, fw->size);
-
-	release_firmware(fw);
-
 	for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
 		atomic_set(&rdev->uvd.handles[i], 0);
 		rdev->uvd.filp[i] = NULL;
+		rdev->uvd.img_size[i] = 0;
 	}
 
 	return 0;
@@ -174,33 +168,60 @@
 	}
 
 	radeon_bo_unref(&rdev->uvd.vcpu_bo);
+
+	release_firmware(rdev->uvd_fw);
 }
 
 int radeon_uvd_suspend(struct radeon_device *rdev)
 {
 	unsigned size;
+	void *ptr;
+	int i;
 
 	if (rdev->uvd.vcpu_bo == NULL)
 		return 0;
 
+	for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i)
+		if (atomic_read(&rdev->uvd.handles[i]))
+			break;
+
+	if (i == RADEON_MAX_UVD_HANDLES)
+		return 0;
+
 	size = radeon_bo_size(rdev->uvd.vcpu_bo);
+	size -= rdev->uvd_fw->size;
+
+	ptr = rdev->uvd.cpu_addr;
+	ptr += rdev->uvd_fw->size;
+
 	rdev->uvd.saved_bo = kmalloc(size, GFP_KERNEL);
-	memcpy(rdev->uvd.saved_bo, rdev->uvd.cpu_addr, size);
+	memcpy(rdev->uvd.saved_bo, ptr, size);
 
 	return 0;
 }
 
 int radeon_uvd_resume(struct radeon_device *rdev)
 {
+	unsigned size;
+	void *ptr;
+
 	if (rdev->uvd.vcpu_bo == NULL)
 		return -EINVAL;
 
+	memcpy(rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size);
+
+	size = radeon_bo_size(rdev->uvd.vcpu_bo);
+	size -= rdev->uvd_fw->size;
+
+	ptr = rdev->uvd.cpu_addr;
+	ptr += rdev->uvd_fw->size;
+
 	if (rdev->uvd.saved_bo != NULL) {
-		unsigned size = radeon_bo_size(rdev->uvd.vcpu_bo);
-		memcpy(rdev->uvd.cpu_addr, rdev->uvd.saved_bo, size);
+		memcpy(ptr, rdev->uvd.saved_bo, size);
 		kfree(rdev->uvd.saved_bo);
 		rdev->uvd.saved_bo = NULL;
-	}
+	} else
+		memset(ptr, 0, size);
 
 	return 0;
 }
@@ -215,8 +236,8 @@
 {
 	int i, r;
 	for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
-		if (rdev->uvd.filp[i] == filp) {
-			uint32_t handle = atomic_read(&rdev->uvd.handles[i]);
+		uint32_t handle = atomic_read(&rdev->uvd.handles[i]);
+		if (handle != 0 && rdev->uvd.filp[i] == filp) {
 			struct radeon_fence *fence;
 
 			r = radeon_uvd_get_destroy_msg(rdev,
@@ -327,6 +348,7 @@
 			     unsigned offset, unsigned buf_sizes[])
 {
 	int32_t *msg, msg_type, handle;
+	unsigned img_size = 0;
 	void *ptr;
 
 	int i, r;
@@ -336,9 +358,19 @@
 		return -EINVAL;
 	}
 
+	if (bo->tbo.sync_obj) {
+		r = radeon_fence_wait(bo->tbo.sync_obj, false);
+		if (r) {
+			DRM_ERROR("Failed waiting for UVD message (%d)!\n", r);
+			return r;
+		}
+	}
+
 	r = radeon_bo_kmap(bo, &ptr);
-	if (r)
+	if (r) {
+		DRM_ERROR("Failed mapping the UVD message (%d)!\n", r);
 		return r;
+	}
 
 	msg = ptr + offset;
 
@@ -353,6 +385,8 @@
 	if (msg_type == 1) {
 		/* it's a decode msg, calc buffer sizes */
 		r = radeon_uvd_cs_msg_decode(msg, buf_sizes);
+		/* calc image size (width * height) */
+		img_size = msg[6] * msg[7];
 		radeon_bo_kunmap(bo);
 		if (r)
 			return r;
@@ -364,8 +398,16 @@
 		radeon_bo_kunmap(bo);
 		return 0;
 	} else {
-		/* it's a create msg, no special handling needed */
+		/* it's a create msg, calc image size (width * height) */
+		img_size = msg[7] * msg[8];
 		radeon_bo_kunmap(bo);
+
+		if (msg_type != 0) {
+			DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
+			return -EINVAL;
+		}
+
+		/* it's a create msg, no special handling needed */
 	}
 
 	/* create or decode, validate the handle */
@@ -378,6 +420,7 @@
 	for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
 		if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) {
 			p->rdev->uvd.filp[i] = p->filp;
+			p->rdev->uvd.img_size[i] = img_size;
 			return 0;
 		}
 	}
@@ -388,7 +431,7 @@
 
 static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p,
 			       int data0, int data1,
-			       unsigned buf_sizes[])
+			       unsigned buf_sizes[], bool *has_msg_cmd)
 {
 	struct radeon_cs_chunk *relocs_chunk;
 	struct radeon_cs_reloc *reloc;
@@ -417,7 +460,7 @@
 
 	if (cmd < 0x4) {
 		if ((end - start) < buf_sizes[cmd]) {
-			DRM_ERROR("buffer to small (%d / %d)!\n",
+			DRM_ERROR("buffer (%d) to small (%d / %d)!\n", cmd,
 				  (unsigned)(end - start), buf_sizes[cmd]);
 			return -EINVAL;
 		}
@@ -442,9 +485,17 @@
 	}
 
 	if (cmd == 0) {
+		if (*has_msg_cmd) {
+			DRM_ERROR("More than one message in a UVD-IB!\n");
+			return -EINVAL;
+		}
+		*has_msg_cmd = true;
 		r = radeon_uvd_cs_msg(p, reloc->robj, offset, buf_sizes);
 		if (r)
 			return r;
+	} else if (!*has_msg_cmd) {
+		DRM_ERROR("Message needed before other commands are send!\n");
+		return -EINVAL;
 	}
 
 	return 0;
@@ -453,7 +504,8 @@
 static int radeon_uvd_cs_reg(struct radeon_cs_parser *p,
 			     struct radeon_cs_packet *pkt,
 			     int *data0, int *data1,
-			     unsigned buf_sizes[])
+			     unsigned buf_sizes[],
+			     bool *has_msg_cmd)
 {
 	int i, r;
 
@@ -467,7 +519,8 @@
 			*data1 = p->idx;
 			break;
 		case UVD_GPCOM_VCPU_CMD:
-			r = radeon_uvd_cs_reloc(p, *data0, *data1, buf_sizes);
+			r = radeon_uvd_cs_reloc(p, *data0, *data1,
+						buf_sizes, has_msg_cmd);
 			if (r)
 				return r;
 			break;
@@ -488,6 +541,9 @@
 	struct radeon_cs_packet pkt;
 	int r, data0 = 0, data1 = 0;
 
+	/* does the IB has a msg command */
+	bool has_msg_cmd = false;
+
 	/* minimum buffer sizes */
 	unsigned buf_sizes[] = {
 		[0x00000000]	=	2048,
@@ -514,8 +570,8 @@
 			return r;
 		switch (pkt.type) {
 		case RADEON_PACKET_TYPE0:
-			r = radeon_uvd_cs_reg(p, &pkt, &data0,
-					      &data1, buf_sizes);
+			r = radeon_uvd_cs_reg(p, &pkt, &data0, &data1,
+					      buf_sizes, &has_msg_cmd);
 			if (r)
 				return r;
 			break;
@@ -527,6 +583,12 @@
 			return -EINVAL;
 		}
 	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+
+	if (!has_msg_cmd) {
+		DRM_ERROR("UVD-IBs need a msg command!\n");
+		return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -678,6 +740,34 @@
 	return radeon_uvd_send_msg(rdev, ring, bo, fence);
 }
 
+/**
+ * radeon_uvd_count_handles - count number of open streams
+ *
+ * @rdev: radeon_device pointer
+ * @sd: number of SD streams
+ * @hd: number of HD streams
+ *
+ * Count the number of open SD/HD streams as a hint for power mangement
+ */
+static void radeon_uvd_count_handles(struct radeon_device *rdev,
+				     unsigned *sd, unsigned *hd)
+{
+	unsigned i;
+
+	*sd = 0;
+	*hd = 0;
+
+	for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
+		if (!atomic_read(&rdev->uvd.handles[i]))
+			continue;
+
+		if (rdev->uvd.img_size[i] >= 720*576)
+			++(*hd);
+		else
+			++(*sd);
+	}
+}
+
 static void radeon_uvd_idle_work_handler(struct work_struct *work)
 {
 	struct radeon_device *rdev =
@@ -685,10 +775,7 @@
 
 	if (radeon_fence_count_emitted(rdev, R600_RING_TYPE_UVD_INDEX) == 0) {
 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
-			mutex_lock(&rdev->pm.mutex);
-			rdev->pm.dpm.uvd_active = false;
-			mutex_unlock(&rdev->pm.mutex);
-			radeon_pm_compute_clocks(rdev);
+			radeon_dpm_enable_uvd(rdev, false);
 		} else {
 			radeon_set_uvd_clocks(rdev, 0, 0);
 		}
@@ -700,13 +787,25 @@
 
 void radeon_uvd_note_usage(struct radeon_device *rdev)
 {
+	bool streams_changed = false;
 	bool set_clocks = !cancel_delayed_work_sync(&rdev->uvd.idle_work);
 	set_clocks &= schedule_delayed_work(&rdev->uvd.idle_work,
 					    msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS));
-	if (set_clocks) {
+
+	if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
+		unsigned hd = 0, sd = 0;
+		radeon_uvd_count_handles(rdev, &sd, &hd);
+		if ((rdev->pm.dpm.sd != sd) ||
+		    (rdev->pm.dpm.hd != hd)) {
+			rdev->pm.dpm.sd = sd;
+			rdev->pm.dpm.hd = hd;
+			streams_changed = true;
+		}
+	}
+
+	if (set_clocks || streams_changed) {
 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
-			/* XXX pick SD/HD/MVC */
-			radeon_dpm_enable_power_state(rdev, POWER_STATE_TYPE_INTERNAL_UVD);
+			radeon_dpm_enable_uvd(rdev, true);
 		} else {
 			radeon_set_uvd_clocks(rdev, 53300, 40000);
 		}
diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c
index 233a9b9..b8074a8 100644
--- a/drivers/gpu/drm/radeon/rs400.c
+++ b/drivers/gpu/drm/radeon/rs400.c
@@ -174,10 +174,13 @@
 	/* FIXME: according to doc we should set HIDE_MMCFG_BAR=0,
 	 * AGPMODE30=0 & AGP30ENHANCED=0 in NB_CNTL */
 	if ((rdev->family == CHIP_RS690) || (rdev->family == CHIP_RS740)) {
-		WREG32_MC(RS480_MC_MISC_CNTL,
-			  (RS480_GART_INDEX_REG_EN | RS690_BLOCK_GFX_D3_EN));
+		tmp = RREG32_MC(RS480_MC_MISC_CNTL);
+		tmp |= RS480_GART_INDEX_REG_EN | RS690_BLOCK_GFX_D3_EN;
+		WREG32_MC(RS480_MC_MISC_CNTL, tmp);
 	} else {
-		WREG32_MC(RS480_MC_MISC_CNTL, RS480_GART_INDEX_REG_EN);
+		tmp = RREG32_MC(RS480_MC_MISC_CNTL);
+		tmp |= RS480_GART_INDEX_REG_EN;
+		WREG32_MC(RS480_MC_MISC_CNTL, tmp);
 	}
 	/* Enable gart */
 	WREG32_MC(RS480_AGP_ADDRESS_SPACE_SIZE, (RS480_GART_EN | size_reg));
diff --git a/drivers/gpu/drm/radeon/rv6xx_dpm.c b/drivers/gpu/drm/radeon/rv6xx_dpm.c
index 65e33f3..ab1f201 100644
--- a/drivers/gpu/drm/radeon/rv6xx_dpm.c
+++ b/drivers/gpu/drm/radeon/rv6xx_dpm.c
@@ -819,7 +819,7 @@
 		 POWERMODE1(calculate_memory_refresh_rate(rdev,
 							  pi->hw.sclks[R600_POWER_LEVEL_MEDIUM])) |
 		 POWERMODE2(calculate_memory_refresh_rate(rdev,
-							  pi->hw.sclks[R600_POWER_LEVEL_MEDIUM])) |
+							  pi->hw.sclks[R600_POWER_LEVEL_HIGH])) |
 		 POWERMODE3(calculate_memory_refresh_rate(rdev,
 							  pi->hw.sclks[R600_POWER_LEVEL_HIGH])));
 	WREG32(ARB_RFSH_RATE, arb_refresh_rate);
@@ -1182,10 +1182,10 @@
 	u32 tmp = RREG32(CG_DISPLAY_GAP_CNTL);
 
 	tmp &= ~(DISP1_GAP_MCHG_MASK | DISP2_GAP_MCHG_MASK);
-	if (RREG32(AVIVO_D1CRTC_CONTROL) & AVIVO_CRTC_EN) {
+	if (rdev->pm.dpm.new_active_crtcs & 1) {
 		tmp |= DISP1_GAP_MCHG(R600_PM_DISPLAY_GAP_VBLANK);
 		tmp |= DISP2_GAP_MCHG(R600_PM_DISPLAY_GAP_IGNORE);
-	} else if (RREG32(AVIVO_D2CRTC_CONTROL) & AVIVO_CRTC_EN) {
+	} else if (rdev->pm.dpm.new_active_crtcs & 2) {
 		tmp |= DISP1_GAP_MCHG(R600_PM_DISPLAY_GAP_IGNORE);
 		tmp |= DISP2_GAP_MCHG(R600_PM_DISPLAY_GAP_VBLANK);
 	} else {
@@ -1670,6 +1670,8 @@
 	struct radeon_ps *old_ps = rdev->pm.dpm.current_ps;
 	int ret;
 
+	pi->restricted_levels = 0;
+
 	rv6xx_set_uvd_clock_before_set_eng_clock(rdev, new_ps, old_ps);
 
 	rv6xx_clear_vc(rdev);
@@ -1756,6 +1758,8 @@
 
 	rv6xx_set_uvd_clock_after_set_eng_clock(rdev, new_ps, old_ps);
 
+	rdev->pm.dpm.forced_level = RADEON_DPM_FORCED_LEVEL_AUTO;
+
 	return 0;
 }
 
@@ -1914,6 +1918,7 @@
 			 (power_state->v1.ucNonClockStateIndex *
 			  power_info->pplib.ucNonClockSize));
 		if (power_info->pplib.ucStateEntrySize - 1) {
+			u8 *idx;
 			ps = kzalloc(sizeof(struct rv6xx_ps), GFP_KERNEL);
 			if (ps == NULL) {
 				kfree(rdev->pm.dpm.ps);
@@ -1922,12 +1927,12 @@
 			rdev->pm.dpm.ps[i].ps_priv = ps;
 			rv6xx_parse_pplib_non_clock_info(rdev, &rdev->pm.dpm.ps[i],
 							 non_clock_info);
+			idx = (u8 *)&power_state->v1.ucClockStateIndices[0];
 			for (j = 0; j < (power_info->pplib.ucStateEntrySize - 1); j++) {
 				clock_info = (union pplib_clock_info *)
 					(mode_info->atom_context->bios + data_offset +
 					 le16_to_cpu(power_info->pplib.usClockInfoArrayOffset) +
-					 (power_state->v1.ucClockStateIndices[j] *
-					  power_info->pplib.ucClockInfoSize));
+					 (idx[j] * power_info->pplib.ucClockInfoSize));
 				rv6xx_parse_pplib_clock_info(rdev,
 							     &rdev->pm.dpm.ps[i], j,
 							     clock_info);
@@ -1940,9 +1945,7 @@
 
 int rv6xx_dpm_init(struct radeon_device *rdev)
 {
-	int index = GetIndexIntoMasterTable(DATA, ASIC_InternalSS_Info);
-	uint16_t data_offset, size;
-	uint8_t frev, crev;
+	struct radeon_atom_ss ss;
 	struct atom_clock_dividers dividers;
 	struct rv6xx_power_info *pi;
 	int ret;
@@ -1985,16 +1988,18 @@
 
 	pi->gfx_clock_gating = true;
 
-	if (atom_parse_data_header(rdev->mode_info.atom_context, index, &size,
-                                   &frev, &crev, &data_offset)) {
-		pi->sclk_ss = true;
-		pi->mclk_ss = true;
+	pi->sclk_ss = radeon_atombios_get_asic_ss_info(rdev, &ss,
+						       ASIC_INTERNAL_ENGINE_SS, 0);
+	pi->mclk_ss = radeon_atombios_get_asic_ss_info(rdev, &ss,
+						       ASIC_INTERNAL_MEMORY_SS, 0);
+
+	/* Disable sclk ss, causes hangs on a lot of systems */
+	pi->sclk_ss = false;
+
+	if (pi->sclk_ss || pi->mclk_ss)
 		pi->dynamic_ss = true;
-	} else {
-		pi->sclk_ss = false;
-		pi->mclk_ss = false;
+	else
 		pi->dynamic_ss = false;
-	}
 
 	pi->dynamic_pcie_gen2 = true;
 
@@ -2085,3 +2090,34 @@
 	else
 		return requested_state->high.mclk;
 }
+
+int rv6xx_dpm_force_performance_level(struct radeon_device *rdev,
+				      enum radeon_dpm_forced_level level)
+{
+	struct rv6xx_power_info *pi = rv6xx_get_pi(rdev);
+
+	if (level == RADEON_DPM_FORCED_LEVEL_HIGH) {
+		pi->restricted_levels = 3;
+	} else if (level == RADEON_DPM_FORCED_LEVEL_LOW) {
+		pi->restricted_levels = 2;
+	} else {
+		pi->restricted_levels = 0;
+	}
+
+	rv6xx_clear_vc(rdev);
+	r600_power_level_enable(rdev, R600_POWER_LEVEL_LOW, true);
+	r600_set_at(rdev, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF);
+	r600_wait_for_power_level(rdev, R600_POWER_LEVEL_LOW);
+	r600_power_level_enable(rdev, R600_POWER_LEVEL_HIGH, false);
+	r600_power_level_enable(rdev, R600_POWER_LEVEL_MEDIUM, false);
+	rv6xx_enable_medium(rdev);
+	rv6xx_enable_high(rdev);
+	if (pi->restricted_levels == 3)
+		r600_power_level_enable(rdev, R600_POWER_LEVEL_LOW, false);
+	rv6xx_program_vc(rdev);
+	rv6xx_program_at(rdev);
+
+	rdev->pm.dpm.forced_level = level;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 30ea14e..9f58467 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -744,10 +744,10 @@
 						 (const u32)ARRAY_SIZE(r7xx_golden_dyn_gpr_registers));
 		radeon_program_register_sequence(rdev,
 						 rv730_golden_registers,
-						 (const u32)ARRAY_SIZE(rv770_golden_registers));
+						 (const u32)ARRAY_SIZE(rv730_golden_registers));
 		radeon_program_register_sequence(rdev,
 						 rv730_mgcg_init,
-						 (const u32)ARRAY_SIZE(rv770_mgcg_init));
+						 (const u32)ARRAY_SIZE(rv730_mgcg_init));
 		break;
 	case CHIP_RV710:
 		radeon_program_register_sequence(rdev,
@@ -758,18 +758,18 @@
 						 (const u32)ARRAY_SIZE(r7xx_golden_dyn_gpr_registers));
 		radeon_program_register_sequence(rdev,
 						 rv710_golden_registers,
-						 (const u32)ARRAY_SIZE(rv770_golden_registers));
+						 (const u32)ARRAY_SIZE(rv710_golden_registers));
 		radeon_program_register_sequence(rdev,
 						 rv710_mgcg_init,
-						 (const u32)ARRAY_SIZE(rv770_mgcg_init));
+						 (const u32)ARRAY_SIZE(rv710_mgcg_init));
 		break;
 	case CHIP_RV740:
 		radeon_program_register_sequence(rdev,
 						 rv740_golden_registers,
-						 (const u32)ARRAY_SIZE(rv770_golden_registers));
+						 (const u32)ARRAY_SIZE(rv740_golden_registers));
 		radeon_program_register_sequence(rdev,
 						 rv740_mgcg_init,
-						 (const u32)ARRAY_SIZE(rv770_mgcg_init));
+						 (const u32)ARRAY_SIZE(rv740_mgcg_init));
 		break;
 	default:
 		break;
@@ -801,103 +801,6 @@
 	return reference_clock;
 }
 
-int rv770_uvd_resume(struct radeon_device *rdev)
-{
-	uint64_t addr;
-	uint32_t chip_id, size;
-	int r;
-
-	r = radeon_uvd_resume(rdev);
-	if (r)
-		return r;
-
-	/* programm the VCPU memory controller bits 0-27 */
-	addr = rdev->uvd.gpu_addr >> 3;
-	size = RADEON_GPU_PAGE_ALIGN(rdev->uvd.fw_size + 4) >> 3;
-	WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
-	WREG32(UVD_VCPU_CACHE_SIZE0, size);
-
-	addr += size;
-	size = RADEON_UVD_STACK_SIZE >> 3;
-	WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
-	WREG32(UVD_VCPU_CACHE_SIZE1, size);
-
-	addr += size;
-	size = RADEON_UVD_HEAP_SIZE >> 3;
-	WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
-	WREG32(UVD_VCPU_CACHE_SIZE2, size);
-
-	/* bits 28-31 */
-	addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
-	WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
-
-	/* bits 32-39 */
-	addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
-	WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
-
-	/* tell firmware which hardware it is running on */
-	switch (rdev->family) {
-	default:
-		return -EINVAL;
-	case CHIP_RV710:
-		chip_id = 0x01000005;
-		break;
-	case CHIP_RV730:
-		chip_id = 0x01000006;
-		break;
-	case CHIP_RV740:
-		chip_id = 0x01000007;
-		break;
-	case CHIP_CYPRESS:
-	case CHIP_HEMLOCK:
-		chip_id = 0x01000008;
-		break;
-	case CHIP_JUNIPER:
-		chip_id = 0x01000009;
-		break;
-	case CHIP_REDWOOD:
-		chip_id = 0x0100000a;
-		break;
-	case CHIP_CEDAR:
-		chip_id = 0x0100000b;
-		break;
-	case CHIP_SUMO:
-	case CHIP_SUMO2:
-		chip_id = 0x0100000c;
-		break;
-	case CHIP_PALM:
-		chip_id = 0x0100000e;
-		break;
-	case CHIP_CAYMAN:
-		chip_id = 0x0100000f;
-		break;
-	case CHIP_BARTS:
-		chip_id = 0x01000010;
-		break;
-	case CHIP_TURKS:
-		chip_id = 0x01000011;
-		break;
-	case CHIP_CAICOS:
-		chip_id = 0x01000012;
-		break;
-	case CHIP_TAHITI:
-		chip_id = 0x01000014;
-		break;
-	case CHIP_VERDE:
-		chip_id = 0x01000015;
-		break;
-	case CHIP_PITCAIRN:
-		chip_id = 0x01000016;
-		break;
-	case CHIP_ARUBA:
-		chip_id = 0x01000017;
-		break;
-	}
-	WREG32(UVD_VCPU_CHIP_ID, chip_id);
-
-	return 0;
-}
-
 u32 rv770_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
 {
 	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
@@ -1747,80 +1650,6 @@
 	return 0;
 }
 
-/**
- * rv770_copy_dma - copy pages using the DMA engine
- *
- * @rdev: radeon_device pointer
- * @src_offset: src GPU address
- * @dst_offset: dst GPU address
- * @num_gpu_pages: number of GPU pages to xfer
- * @fence: radeon fence object
- *
- * Copy GPU paging using the DMA engine (r7xx).
- * Used by the radeon ttm implementation to move pages if
- * registered as the asic copy callback.
- */
-int rv770_copy_dma(struct radeon_device *rdev,
-		  uint64_t src_offset, uint64_t dst_offset,
-		  unsigned num_gpu_pages,
-		  struct radeon_fence **fence)
-{
-	struct radeon_semaphore *sem = NULL;
-	int ring_index = rdev->asic->copy.dma_ring_index;
-	struct radeon_ring *ring = &rdev->ring[ring_index];
-	u32 size_in_dw, cur_size_in_dw;
-	int i, num_loops;
-	int r = 0;
-
-	r = radeon_semaphore_create(rdev, &sem);
-	if (r) {
-		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		return r;
-	}
-
-	size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
-	num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFF);
-	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8);
-	if (r) {
-		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
-	}
-
-	if (radeon_fence_need_sync(*fence, ring->idx)) {
-		radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
-					    ring->idx);
-		radeon_fence_note_sync(*fence, ring->idx);
-	} else {
-		radeon_semaphore_free(rdev, &sem, NULL);
-	}
-
-	for (i = 0; i < num_loops; i++) {
-		cur_size_in_dw = size_in_dw;
-		if (cur_size_in_dw > 0xFFFF)
-			cur_size_in_dw = 0xFFFF;
-		size_in_dw -= cur_size_in_dw;
-		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
-		radeon_ring_write(ring, dst_offset & 0xfffffffc);
-		radeon_ring_write(ring, src_offset & 0xfffffffc);
-		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
-		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
-		src_offset += cur_size_in_dw * 4;
-		dst_offset += cur_size_in_dw * 4;
-	}
-
-	r = radeon_fence_emit(rdev, fence, ring->idx);
-	if (r) {
-		radeon_ring_unlock_undo(rdev, ring);
-		return r;
-	}
-
-	radeon_ring_unlock_commit(rdev, ring);
-	radeon_semaphore_free(rdev, &sem, *fence);
-
-	return r;
-}
-
 static int rv770_startup(struct radeon_device *rdev)
 {
 	struct radeon_ring *ring;
@@ -1829,6 +1658,13 @@
 	/* enable pcie gen2 link */
 	rv770_pcie_gen2_enable(rdev);
 
+	/* scratch needs to be initialized before MC */
+	r = r600_vram_scratch_init(rdev);
+	if (r)
+		return r;
+
+	rv770_mc_program(rdev);
+
 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw) {
 		r = r600_init_microcode(rdev);
 		if (r) {
@@ -1837,11 +1673,6 @@
 		}
 	}
 
-	r = r600_vram_scratch_init(rdev);
-	if (r)
-		return r;
-
-	rv770_mc_program(rdev);
 	if (rdev->flags & RADEON_IS_AGP) {
 		rv770_agp_enable(rdev);
 	} else {
@@ -1851,12 +1682,6 @@
 	}
 
 	rv770_gpu_init(rdev);
-	r = r600_blit_init(rdev);
-	if (r) {
-		r600_blit_fini(rdev);
-		rdev->asic->copy.copy = NULL;
-		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
-	}
 
 	/* allocate wb buffer */
 	r = radeon_wb_init(rdev);
@@ -1875,7 +1700,7 @@
 		return r;
 	}
 
-	r = rv770_uvd_resume(rdev);
+	r = uvd_v2_2_resume(rdev);
 	if (!r) {
 		r = radeon_fence_driver_start_ring(rdev,
 						   R600_RING_TYPE_UVD_INDEX);
@@ -1904,14 +1729,14 @@
 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
 			     R600_CP_RB_RPTR, R600_CP_RB_WPTR,
-			     0, 0xfffff, RADEON_CP_PACKET2);
+			     RADEON_CP_PACKET2);
 	if (r)
 		return r;
 
 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
 			     DMA_RB_RPTR, DMA_RB_WPTR,
-			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
+			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0));
 	if (r)
 		return r;
 
@@ -1928,12 +1753,11 @@
 
 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
 	if (ring->ring_size) {
-		r = radeon_ring_init(rdev, ring, ring->ring_size,
-				     R600_WB_UVD_RPTR_OFFSET,
+		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
 				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
-				     0, 0xfffff, RADEON_CP_PACKET2);
+				     RADEON_CP_PACKET2);
 		if (!r)
-			r = r600_uvd_init(rdev);
+			r = uvd_v1_0_init(rdev);
 
 		if (r)
 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
@@ -1983,6 +1807,7 @@
 int rv770_suspend(struct radeon_device *rdev)
 {
 	r600_audio_fini(rdev);
+	uvd_v1_0_fini(rdev);
 	radeon_uvd_suspend(rdev);
 	r700_cp_stop(rdev);
 	r600_dma_stop(rdev);
@@ -2090,7 +1915,6 @@
 
 void rv770_fini(struct radeon_device *rdev)
 {
-	r600_blit_fini(rdev);
 	r700_cp_fini(rdev);
 	r600_dma_fini(rdev);
 	r600_irq_fini(rdev);
@@ -2098,6 +1922,7 @@
 	radeon_ib_pool_fini(rdev);
 	radeon_irq_kms_fini(rdev);
 	rv770_pcie_gart_fini(rdev);
+	uvd_v1_0_fini(rdev);
 	radeon_uvd_fini(rdev);
 	r600_vram_scratch_fini(rdev);
 	radeon_gem_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/rv770_dma.c b/drivers/gpu/drm/radeon/rv770_dma.c
new file mode 100644
index 0000000..f9b02e3
--- /dev/null
+++ b/drivers/gpu/drm/radeon/rv770_dma.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Alex Deucher
+ */
+#include <drm/drmP.h>
+#include "radeon.h"
+#include "radeon_asic.h"
+#include "rv770d.h"
+
+/**
+ * rv770_copy_dma - copy pages using the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @num_gpu_pages: number of GPU pages to xfer
+ * @fence: radeon fence object
+ *
+ * Copy GPU paging using the DMA engine (r7xx).
+ * Used by the radeon ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+int rv770_copy_dma(struct radeon_device *rdev,
+		  uint64_t src_offset, uint64_t dst_offset,
+		  unsigned num_gpu_pages,
+		  struct radeon_fence **fence)
+{
+	struct radeon_semaphore *sem = NULL;
+	int ring_index = rdev->asic->copy.dma_ring_index;
+	struct radeon_ring *ring = &rdev->ring[ring_index];
+	u32 size_in_dw, cur_size_in_dw;
+	int i, num_loops;
+	int r = 0;
+
+	r = radeon_semaphore_create(rdev, &sem);
+	if (r) {
+		DRM_ERROR("radeon: moving bo (%d).\n", r);
+		return r;
+	}
+
+	size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
+	num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFF);
+	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8);
+	if (r) {
+		DRM_ERROR("radeon: moving bo (%d).\n", r);
+		radeon_semaphore_free(rdev, &sem, NULL);
+		return r;
+	}
+
+	if (radeon_fence_need_sync(*fence, ring->idx)) {
+		radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
+					    ring->idx);
+		radeon_fence_note_sync(*fence, ring->idx);
+	} else {
+		radeon_semaphore_free(rdev, &sem, NULL);
+	}
+
+	for (i = 0; i < num_loops; i++) {
+		cur_size_in_dw = size_in_dw;
+		if (cur_size_in_dw > 0xFFFF)
+			cur_size_in_dw = 0xFFFF;
+		size_in_dw -= cur_size_in_dw;
+		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
+		radeon_ring_write(ring, dst_offset & 0xfffffffc);
+		radeon_ring_write(ring, src_offset & 0xfffffffc);
+		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
+		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
+		src_offset += cur_size_in_dw * 4;
+		dst_offset += cur_size_in_dw * 4;
+	}
+
+	r = radeon_fence_emit(rdev, fence, ring->idx);
+	if (r) {
+		radeon_ring_unlock_undo(rdev, ring);
+		return r;
+	}
+
+	radeon_ring_unlock_commit(rdev, ring);
+	radeon_semaphore_free(rdev, &sem, *fence);
+
+	return r;
+}
diff --git a/drivers/gpu/drm/radeon/rv770_dpm.c b/drivers/gpu/drm/radeon/rv770_dpm.c
index 2d34792..8cbb85d 100644
--- a/drivers/gpu/drm/radeon/rv770_dpm.c
+++ b/drivers/gpu/drm/radeon/rv770_dpm.c
@@ -2294,6 +2294,7 @@
 			 (power_state->v1.ucNonClockStateIndex *
 			  power_info->pplib.ucNonClockSize));
 		if (power_info->pplib.ucStateEntrySize - 1) {
+			u8 *idx;
 			ps = kzalloc(sizeof(struct rv7xx_ps), GFP_KERNEL);
 			if (ps == NULL) {
 				kfree(rdev->pm.dpm.ps);
@@ -2303,12 +2304,12 @@
 			rv7xx_parse_pplib_non_clock_info(rdev, &rdev->pm.dpm.ps[i],
 							 non_clock_info,
 							 power_info->pplib.ucNonClockSize);
+			idx = (u8 *)&power_state->v1.ucClockStateIndices[0];
 			for (j = 0; j < (power_info->pplib.ucStateEntrySize - 1); j++) {
 				clock_info = (union pplib_clock_info *)
 					(mode_info->atom_context->bios + data_offset +
 					 le16_to_cpu(power_info->pplib.usClockInfoArrayOffset) +
-					 (power_state->v1.ucClockStateIndices[j] *
-					  power_info->pplib.ucClockInfoSize));
+					 (idx[j] * power_info->pplib.ucClockInfoSize));
 				rv7xx_parse_pplib_clock_info(rdev,
 							     &rdev->pm.dpm.ps[i], j,
 							     clock_info);
@@ -2319,12 +2320,25 @@
 	return 0;
 }
 
+void rv770_get_engine_memory_ss(struct radeon_device *rdev)
+{
+	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
+	struct radeon_atom_ss ss;
+
+	pi->sclk_ss = radeon_atombios_get_asic_ss_info(rdev, &ss,
+						       ASIC_INTERNAL_ENGINE_SS, 0);
+	pi->mclk_ss = radeon_atombios_get_asic_ss_info(rdev, &ss,
+						       ASIC_INTERNAL_MEMORY_SS, 0);
+
+	if (pi->sclk_ss || pi->mclk_ss)
+		pi->dynamic_ss = true;
+	else
+		pi->dynamic_ss = false;
+}
+
 int rv770_dpm_init(struct radeon_device *rdev)
 {
 	struct rv7xx_power_info *pi;
-	int index = GetIndexIntoMasterTable(DATA, ASIC_InternalSS_Info);
-	uint16_t data_offset, size;
-	uint8_t frev, crev;
 	struct atom_clock_dividers dividers;
 	int ret;
 
@@ -2369,16 +2383,7 @@
 	pi->mvdd_control =
 		radeon_atom_is_voltage_gpio(rdev, SET_VOLTAGE_TYPE_ASIC_MVDDC, 0);
 
-	if (atom_parse_data_header(rdev->mode_info.atom_context, index, &size,
-                                   &frev, &crev, &data_offset)) {
-		pi->sclk_ss = true;
-		pi->mclk_ss = true;
-		pi->dynamic_ss = true;
-	} else {
-		pi->sclk_ss = false;
-		pi->mclk_ss = false;
-		pi->dynamic_ss = false;
-	}
+	rv770_get_engine_memory_ss(rdev);
 
 	pi->asi = RV770_ASI_DFLT;
 	pi->pasi = RV770_HASI_DFLT;
@@ -2393,8 +2398,7 @@
 
 	pi->dynamic_pcie_gen2 = true;
 
-	if (pi->gfx_clock_gating &&
-	    (rdev->pm.int_thermal_type != THERMAL_TYPE_NONE))
+	if (rdev->pm.int_thermal_type != THERMAL_TYPE_NONE)
 		pi->thermal_protection = true;
 	else
 		pi->thermal_protection = false;
@@ -2514,8 +2518,16 @@
 bool rv770_dpm_vblank_too_short(struct radeon_device *rdev)
 {
 	u32 vblank_time = r600_dpm_get_vblank_time(rdev);
+	u32 switch_limit = 300;
 
-	if (vblank_time < 300)
+	/* quirks */
+	/* ASUS K70AF */
+	if ((rdev->pdev->device == 0x9553) &&
+	    (rdev->pdev->subsystem_vendor == 0x1043) &&
+	    (rdev->pdev->subsystem_device == 0x1c42))
+		switch_limit = 200;
+
+	if (vblank_time < switch_limit)
 		return true;
 	else
 		return false;
diff --git a/drivers/gpu/drm/radeon/rv770_dpm.h b/drivers/gpu/drm/radeon/rv770_dpm.h
index 96b1b2a..9244eff 100644
--- a/drivers/gpu/drm/radeon/rv770_dpm.h
+++ b/drivers/gpu/drm/radeon/rv770_dpm.h
@@ -275,6 +275,7 @@
 void rv770_set_uvd_clock_after_set_eng_clock(struct radeon_device *rdev,
 					     struct radeon_ps *new_ps,
 					     struct radeon_ps *old_ps);
+void rv770_get_engine_memory_ss(struct radeon_device *rdev);
 
 /* smc */
 int rv770_read_smc_soft_register(struct radeon_device *rdev,
diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h
index 6bef2b7..9fe60e5 100644
--- a/drivers/gpu/drm/radeon/rv770d.h
+++ b/drivers/gpu/drm/radeon/rv770d.h
@@ -971,7 +971,21 @@
 #       define TARGET_LINK_SPEED_MASK                     (0xf << 0)
 #       define SELECTABLE_DEEMPHASIS                      (1 << 6)
 
+/*
+ * PM4
+ */
+#define PACKET0(reg, n)	((RADEON_PACKET_TYPE0 << 30) |			\
+			 (((reg) >> 2) & 0xFFFF) |			\
+			 ((n) & 0x3FFF) << 16)
+#define PACKET3(op, n)	((RADEON_PACKET_TYPE3 << 30) |			\
+			 (((op) & 0xFF) << 8) |				\
+			 ((n) & 0x3FFF) << 16)
+
 /* UVD */
+#define UVD_GPCOM_VCPU_CMD				0xef0c
+#define UVD_GPCOM_VCPU_DATA0				0xef10
+#define UVD_GPCOM_VCPU_DATA1				0xef14
+
 #define UVD_LMI_EXT40_ADDR				0xf498
 #define UVD_VCPU_CHIP_ID				0xf4d4
 #define UVD_VCPU_CACHE_OFFSET0				0xf4d8
@@ -985,4 +999,6 @@
 #define UVD_RBC_RB_RPTR					0xf690
 #define UVD_RBC_RB_WPTR					0xf694
 
+#define UVD_CONTEXT_ID					0xf6f4
+
 #endif
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index d325280..3e23b75 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -68,6 +68,8 @@
 
 static void si_pcie_gen3_enable(struct radeon_device *rdev);
 static void si_program_aspm(struct radeon_device *rdev);
+extern void sumo_rlc_fini(struct radeon_device *rdev);
+extern int sumo_rlc_init(struct radeon_device *rdev);
 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
 extern void r600_ih_ring_fini(struct radeon_device *rdev);
 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
@@ -76,6 +78,11 @@
 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
+extern void si_dma_vm_set_page(struct radeon_device *rdev,
+			       struct radeon_ib *ib,
+			       uint64_t pe,
+			       uint64_t addr, unsigned count,
+			       uint32_t incr, uint32_t flags);
 
 static const u32 verde_rlc_save_restore_register_list[] =
 {
@@ -1663,9 +1670,13 @@
 
 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
-	if (err)
-		goto out;
-	if (rdev->smc_fw->size != smc_req_size) {
+	if (err) {
+		printk(KERN_ERR
+		       "smc: error loading firmware \"%s\"\n",
+		       fw_name);
+		release_firmware(rdev->smc_fw);
+		rdev->smc_fw = NULL;
+	} else if (rdev->smc_fw->size != smc_req_size) {
 		printk(KERN_ERR
 		       "si_smc: Bogus length %zu in firmware \"%s\"\n",
 		       rdev->smc_fw->size, fw_name);
@@ -1700,7 +1711,8 @@
 				   struct drm_display_mode *mode,
 				   struct drm_display_mode *other_mode)
 {
-	u32 tmp;
+	u32 tmp, buffer_alloc, i;
+	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
 	/*
 	 * Line Buffer Setup
 	 * There are 3 line buffers, each one shared by 2 display controllers.
@@ -1715,16 +1727,30 @@
 	 * non-linked crtcs for maximum line buffer allocation.
 	 */
 	if (radeon_crtc->base.enabled && mode) {
-		if (other_mode)
+		if (other_mode) {
 			tmp = 0; /* 1/2 */
-		else
+			buffer_alloc = 1;
+		} else {
 			tmp = 2; /* whole */
-	} else
+			buffer_alloc = 2;
+		}
+	} else {
 		tmp = 0;
+		buffer_alloc = 0;
+	}
 
 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
 	       DC_LB_MEMORY_CONFIG(tmp));
 
+	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
+	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
+		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
+			break;
+		udelay(1);
+	}
+
 	if (radeon_crtc->base.enabled && mode) {
 		switch (tmp) {
 		case 0:
@@ -3360,17 +3386,6 @@
 	u32 rb_bufsz;
 	int r;
 
-	/* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
-	WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
-				 SOFT_RESET_PA |
-				 SOFT_RESET_VGT |
-				 SOFT_RESET_SPI |
-				 SOFT_RESET_SX));
-	RREG32(GRBM_SOFT_RESET);
-	mdelay(15);
-	WREG32(GRBM_SOFT_RESET, 0);
-	RREG32(GRBM_SOFT_RESET);
-
 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
 
@@ -3383,8 +3398,8 @@
 	/* ring 0 - compute and gfx */
 	/* Set ring buffer size */
 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
-	rb_bufsz = drm_order(ring->ring_size / 8);
-	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
+	rb_bufsz = order_base_2(ring->ring_size / 8);
+	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
 #ifdef __BIG_ENDIAN
 	tmp |= BUF_SWAP_32BIT;
 #endif
@@ -3416,8 +3431,8 @@
 	/* ring1  - compute only */
 	/* Set ring buffer size */
 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
-	rb_bufsz = drm_order(ring->ring_size / 8);
-	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
+	rb_bufsz = order_base_2(ring->ring_size / 8);
+	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
 #ifdef __BIG_ENDIAN
 	tmp |= BUF_SWAP_32BIT;
 #endif
@@ -3442,8 +3457,8 @@
 	/* ring2 - compute only */
 	/* Set ring buffer size */
 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
-	rb_bufsz = drm_order(ring->ring_size / 8);
-	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
+	rb_bufsz = order_base_2(ring->ring_size / 8);
+	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
 #ifdef __BIG_ENDIAN
 	tmp |= BUF_SWAP_32BIT;
 #endif
@@ -3489,7 +3504,7 @@
 	return 0;
 }
 
-static u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
+u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
 {
 	u32 reset_mask = 0;
 	u32 tmp;
@@ -3738,34 +3753,6 @@
 	return radeon_ring_test_lockup(rdev, ring);
 }
 
-/**
- * si_dma_is_lockup - Check if the DMA engine is locked up
- *
- * @rdev: radeon_device pointer
- * @ring: radeon_ring structure holding ring information
- *
- * Check if the async DMA engine is locked up.
- * Returns true if the engine appears to be locked up, false if not.
- */
-bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
-{
-	u32 reset_mask = si_gpu_check_soft_reset(rdev);
-	u32 mask;
-
-	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
-		mask = RADEON_RESET_DMA;
-	else
-		mask = RADEON_RESET_DMA1;
-
-	if (!(reset_mask & mask)) {
-		radeon_ring_lockup_update(ring);
-		return false;
-	}
-	/* force ring activities */
-	radeon_ring_force_activity(rdev, ring);
-	return radeon_ring_test_lockup(rdev, ring);
-}
-
 /* MC */
 static void si_mc_program(struct radeon_device *rdev)
 {
@@ -4079,13 +4066,64 @@
 	return 0;
 }
 
+static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
+{
+	u32 start_reg, reg, i;
+	u32 command = ib[idx + 4];
+	u32 info = ib[idx + 1];
+	u32 idx_value = ib[idx];
+	if (command & PACKET3_CP_DMA_CMD_SAS) {
+		/* src address space is register */
+		if (((info & 0x60000000) >> 29) == 0) {
+			start_reg = idx_value << 2;
+			if (command & PACKET3_CP_DMA_CMD_SAIC) {
+				reg = start_reg;
+				if (!si_vm_reg_valid(reg)) {
+					DRM_ERROR("CP DMA Bad SRC register\n");
+					return -EINVAL;
+				}
+			} else {
+				for (i = 0; i < (command & 0x1fffff); i++) {
+					reg = start_reg + (4 * i);
+					if (!si_vm_reg_valid(reg)) {
+						DRM_ERROR("CP DMA Bad SRC register\n");
+						return -EINVAL;
+					}
+				}
+			}
+		}
+	}
+	if (command & PACKET3_CP_DMA_CMD_DAS) {
+		/* dst address space is register */
+		if (((info & 0x00300000) >> 20) == 0) {
+			start_reg = ib[idx + 2];
+			if (command & PACKET3_CP_DMA_CMD_DAIC) {
+				reg = start_reg;
+				if (!si_vm_reg_valid(reg)) {
+					DRM_ERROR("CP DMA Bad DST register\n");
+					return -EINVAL;
+				}
+			} else {
+				for (i = 0; i < (command & 0x1fffff); i++) {
+					reg = start_reg + (4 * i);
+				if (!si_vm_reg_valid(reg)) {
+						DRM_ERROR("CP DMA Bad DST register\n");
+						return -EINVAL;
+					}
+				}
+			}
+		}
+	}
+	return 0;
+}
+
 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
 				   u32 *ib, struct radeon_cs_packet *pkt)
 {
+	int r;
 	u32 idx = pkt->idx + 1;
 	u32 idx_value = ib[idx];
 	u32 start_reg, end_reg, reg, i;
-	u32 command, info;
 
 	switch (pkt->opcode) {
 	case PACKET3_NOP:
@@ -4186,50 +4224,9 @@
 		}
 		break;
 	case PACKET3_CP_DMA:
-		command = ib[idx + 4];
-		info = ib[idx + 1];
-		if (command & PACKET3_CP_DMA_CMD_SAS) {
-			/* src address space is register */
-			if (((info & 0x60000000) >> 29) == 0) {
-				start_reg = idx_value << 2;
-				if (command & PACKET3_CP_DMA_CMD_SAIC) {
-					reg = start_reg;
-					if (!si_vm_reg_valid(reg)) {
-						DRM_ERROR("CP DMA Bad SRC register\n");
-						return -EINVAL;
-					}
-				} else {
-					for (i = 0; i < (command & 0x1fffff); i++) {
-						reg = start_reg + (4 * i);
-						if (!si_vm_reg_valid(reg)) {
-							DRM_ERROR("CP DMA Bad SRC register\n");
-							return -EINVAL;
-						}
-					}
-				}
-			}
-		}
-		if (command & PACKET3_CP_DMA_CMD_DAS) {
-			/* dst address space is register */
-			if (((info & 0x00300000) >> 20) == 0) {
-				start_reg = ib[idx + 2];
-				if (command & PACKET3_CP_DMA_CMD_DAIC) {
-					reg = start_reg;
-					if (!si_vm_reg_valid(reg)) {
-						DRM_ERROR("CP DMA Bad DST register\n");
-						return -EINVAL;
-					}
-				} else {
-					for (i = 0; i < (command & 0x1fffff); i++) {
-						reg = start_reg + (4 * i);
-						if (!si_vm_reg_valid(reg)) {
-							DRM_ERROR("CP DMA Bad DST register\n");
-							return -EINVAL;
-						}
-					}
-				}
-			}
-		}
+		r = si_vm_packet3_cp_dma_check(ib, idx);
+		if (r)
+			return r;
 		break;
 	default:
 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
@@ -4241,6 +4238,7 @@
 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
 				       u32 *ib, struct radeon_cs_packet *pkt)
 {
+	int r;
 	u32 idx = pkt->idx + 1;
 	u32 idx_value = ib[idx];
 	u32 start_reg, reg, i;
@@ -4313,6 +4311,11 @@
 				return -EINVAL;
 		}
 		break;
+	case PACKET3_CP_DMA:
+		r = si_vm_packet3_cp_dma_check(ib, idx);
+		if (r)
+			return r;
+		break;
 	default:
 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
 		return -EINVAL;
@@ -4704,58 +4707,7 @@
 		}
 	} else {
 		/* DMA */
-		if (flags & RADEON_VM_PAGE_SYSTEM) {
-			while (count) {
-				ndw = count * 2;
-				if (ndw > 0xFFFFE)
-					ndw = 0xFFFFE;
-
-				/* for non-physically contiguous pages (system) */
-				ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
-				ib->ptr[ib->length_dw++] = pe;
-				ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
-				for (; ndw > 0; ndw -= 2, --count, pe += 8) {
-					if (flags & RADEON_VM_PAGE_SYSTEM) {
-						value = radeon_vm_map_gart(rdev, addr);
-						value &= 0xFFFFFFFFFFFFF000ULL;
-					} else if (flags & RADEON_VM_PAGE_VALID) {
-						value = addr;
-					} else {
-						value = 0;
-					}
-					addr += incr;
-					value |= r600_flags;
-					ib->ptr[ib->length_dw++] = value;
-					ib->ptr[ib->length_dw++] = upper_32_bits(value);
-				}
-			}
-		} else {
-			while (count) {
-				ndw = count * 2;
-				if (ndw > 0xFFFFE)
-					ndw = 0xFFFFE;
-
-				if (flags & RADEON_VM_PAGE_VALID)
-					value = addr;
-				else
-					value = 0;
-				/* for physically contiguous pages (vram) */
-				ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
-				ib->ptr[ib->length_dw++] = pe; /* dst addr */
-				ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
-				ib->ptr[ib->length_dw++] = r600_flags; /* mask */
-				ib->ptr[ib->length_dw++] = 0;
-				ib->ptr[ib->length_dw++] = value; /* value */
-				ib->ptr[ib->length_dw++] = upper_32_bits(value);
-				ib->ptr[ib->length_dw++] = incr; /* increment size */
-				ib->ptr[ib->length_dw++] = 0;
-				pe += ndw * 4;
-				addr += (ndw / 2) * incr;
-				count -= ndw / 2;
-			}
-		}
-		while (ib->length_dw & 0x7)
-			ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
+		si_dma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
 	}
 }
 
@@ -4802,32 +4754,6 @@
 	radeon_ring_write(ring, 0x0);
 }
 
-void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
-{
-	struct radeon_ring *ring = &rdev->ring[ridx];
-
-	if (vm == NULL)
-		return;
-
-	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
-	if (vm->id < 8) {
-		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
-	} else {
-		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
-	}
-	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
-
-	/* flush hdp cache */
-	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
-	radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
-	radeon_ring_write(ring, 1);
-
-	/* bits 0-7 are the VM contexts0-7 */
-	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
-	radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
-	radeon_ring_write(ring, 1 << vm->id);
-}
-
 /*
  *  Power and clock gating
  */
@@ -4895,7 +4821,7 @@
 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
 }
 
-static void si_init_uvd_internal_cg(struct radeon_device *rdev)
+void si_init_uvd_internal_cg(struct radeon_device *rdev)
 {
 	bool hw_mode = true;
 
@@ -4938,7 +4864,7 @@
 	u32 data, orig;
 
 	orig = data = RREG32(DMA_PG);
-	if (enable)
+	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
 		data |= PG_CNTL_ENABLE;
 	else
 		data &= ~PG_CNTL_ENABLE;
@@ -4962,7 +4888,7 @@
 {
 	u32 tmp;
 
-	if (enable) {
+	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG)) {
 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
 		WREG32(RLC_TTOP_D, tmp);
 
@@ -5065,9 +4991,9 @@
 
 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
 
-	si_enable_gui_idle_interrupt(rdev, enable);
+	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
+		si_enable_gui_idle_interrupt(rdev, true);
 
-	if (enable) {
 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
 
 		tmp = si_halt_rlc(rdev);
@@ -5084,6 +5010,8 @@
 
 		data |= CGCG_EN | CGLS_EN;
 	} else {
+		si_enable_gui_idle_interrupt(rdev, false);
+
 		RREG32(CB_CGTT_SCLK_CTRL);
 		RREG32(CB_CGTT_SCLK_CTRL);
 		RREG32(CB_CGTT_SCLK_CTRL);
@@ -5101,16 +5029,18 @@
 {
 	u32 data, orig, tmp = 0;
 
-	if (enable) {
+	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
 		orig = data = RREG32(CGTS_SM_CTRL_REG);
 		data = 0x96940200;
 		if (orig != data)
 			WREG32(CGTS_SM_CTRL_REG, data);
 
-		orig = data = RREG32(CP_MEM_SLP_CNTL);
-		data |= CP_MEM_LS_EN;
-		if (orig != data)
-			WREG32(CP_MEM_SLP_CNTL, data);
+		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
+			orig = data = RREG32(CP_MEM_SLP_CNTL);
+			data |= CP_MEM_LS_EN;
+			if (orig != data)
+				WREG32(CP_MEM_SLP_CNTL, data);
+		}
 
 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
 		data &= 0xffffffc0;
@@ -5155,7 +5085,7 @@
 {
 	u32 orig, data, tmp;
 
-	if (enable) {
+	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
 		tmp |= 0x3fff;
 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
@@ -5203,7 +5133,7 @@
 
 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
 		orig = data = RREG32(mc_cg_registers[i]);
-		if (enable)
+		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
 			data |= MC_LS_ENABLE;
 		else
 			data &= ~MC_LS_ENABLE;
@@ -5212,45 +5142,276 @@
 	}
 }
 
+static void si_enable_mc_mgcg(struct radeon_device *rdev,
+			       bool enable)
+{
+	int i;
+	u32 orig, data;
+
+	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
+		orig = data = RREG32(mc_cg_registers[i]);
+		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
+			data |= MC_CG_ENABLE;
+		else
+			data &= ~MC_CG_ENABLE;
+		if (data != orig)
+			WREG32(mc_cg_registers[i], data);
+	}
+}
+
+static void si_enable_dma_mgcg(struct radeon_device *rdev,
+			       bool enable)
+{
+	u32 orig, data, offset;
+	int i;
+
+	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
+		for (i = 0; i < 2; i++) {
+			if (i == 0)
+				offset = DMA0_REGISTER_OFFSET;
+			else
+				offset = DMA1_REGISTER_OFFSET;
+			orig = data = RREG32(DMA_POWER_CNTL + offset);
+			data &= ~MEM_POWER_OVERRIDE;
+			if (data != orig)
+				WREG32(DMA_POWER_CNTL + offset, data);
+			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
+		}
+	} else {
+		for (i = 0; i < 2; i++) {
+			if (i == 0)
+				offset = DMA0_REGISTER_OFFSET;
+			else
+				offset = DMA1_REGISTER_OFFSET;
+			orig = data = RREG32(DMA_POWER_CNTL + offset);
+			data |= MEM_POWER_OVERRIDE;
+			if (data != orig)
+				WREG32(DMA_POWER_CNTL + offset, data);
+
+			orig = data = RREG32(DMA_CLK_CTRL + offset);
+			data = 0xff000000;
+			if (data != orig)
+				WREG32(DMA_CLK_CTRL + offset, data);
+		}
+	}
+}
+
+static void si_enable_bif_mgls(struct radeon_device *rdev,
+			       bool enable)
+{
+	u32 orig, data;
+
+	orig = data = RREG32_PCIE(PCIE_CNTL2);
+
+	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
+		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
+			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
+	else
+		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
+			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
+
+	if (orig != data)
+		WREG32_PCIE(PCIE_CNTL2, data);
+}
+
+static void si_enable_hdp_mgcg(struct radeon_device *rdev,
+			       bool enable)
+{
+	u32 orig, data;
+
+	orig = data = RREG32(HDP_HOST_PATH_CNTL);
+
+	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
+		data &= ~CLOCK_GATING_DIS;
+	else
+		data |= CLOCK_GATING_DIS;
+
+	if (orig != data)
+		WREG32(HDP_HOST_PATH_CNTL, data);
+}
+
+static void si_enable_hdp_ls(struct radeon_device *rdev,
+			     bool enable)
+{
+	u32 orig, data;
+
+	orig = data = RREG32(HDP_MEM_POWER_LS);
+
+	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
+		data |= HDP_LS_ENABLE;
+	else
+		data &= ~HDP_LS_ENABLE;
+
+	if (orig != data)
+		WREG32(HDP_MEM_POWER_LS, data);
+}
+
+void si_update_cg(struct radeon_device *rdev,
+		  u32 block, bool enable)
+{
+	if (block & RADEON_CG_BLOCK_GFX) {
+		/* order matters! */
+		if (enable) {
+			si_enable_mgcg(rdev, true);
+			si_enable_cgcg(rdev, true);
+		} else {
+			si_enable_cgcg(rdev, false);
+			si_enable_mgcg(rdev, false);
+		}
+	}
+
+	if (block & RADEON_CG_BLOCK_MC) {
+		si_enable_mc_mgcg(rdev, enable);
+		si_enable_mc_ls(rdev, enable);
+	}
+
+	if (block & RADEON_CG_BLOCK_SDMA) {
+		si_enable_dma_mgcg(rdev, enable);
+	}
+
+	if (block & RADEON_CG_BLOCK_BIF) {
+		si_enable_bif_mgls(rdev, enable);
+	}
+
+	if (block & RADEON_CG_BLOCK_UVD) {
+		if (rdev->has_uvd) {
+			si_enable_uvd_mgcg(rdev, enable);
+		}
+	}
+
+	if (block & RADEON_CG_BLOCK_HDP) {
+		si_enable_hdp_mgcg(rdev, enable);
+		si_enable_hdp_ls(rdev, enable);
+	}
+}
 
 static void si_init_cg(struct radeon_device *rdev)
 {
-	bool has_uvd = true;
-
-	si_enable_mgcg(rdev, true);
-	si_enable_cgcg(rdev, true);
-	/* disable MC LS on Tahiti */
-	if (rdev->family == CHIP_TAHITI)
-		si_enable_mc_ls(rdev, false);
-	if (has_uvd) {
-		si_enable_uvd_mgcg(rdev, true);
+	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
+			    RADEON_CG_BLOCK_MC |
+			    RADEON_CG_BLOCK_SDMA |
+			    RADEON_CG_BLOCK_BIF |
+			    RADEON_CG_BLOCK_HDP), true);
+	if (rdev->has_uvd) {
+		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
 		si_init_uvd_internal_cg(rdev);
 	}
 }
 
 static void si_fini_cg(struct radeon_device *rdev)
 {
-	bool has_uvd = true;
+	if (rdev->has_uvd) {
+		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
+	}
+	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
+			    RADEON_CG_BLOCK_MC |
+			    RADEON_CG_BLOCK_SDMA |
+			    RADEON_CG_BLOCK_BIF |
+			    RADEON_CG_BLOCK_HDP), false);
+}
 
-	if (has_uvd)
-		si_enable_uvd_mgcg(rdev, false);
-	si_enable_cgcg(rdev, false);
-	si_enable_mgcg(rdev, false);
+u32 si_get_csb_size(struct radeon_device *rdev)
+{
+	u32 count = 0;
+	const struct cs_section_def *sect = NULL;
+	const struct cs_extent_def *ext = NULL;
+
+	if (rdev->rlc.cs_data == NULL)
+		return 0;
+
+	/* begin clear state */
+	count += 2;
+	/* context control state */
+	count += 3;
+
+	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
+		for (ext = sect->section; ext->extent != NULL; ++ext) {
+			if (sect->id == SECT_CONTEXT)
+				count += 2 + ext->reg_count;
+			else
+				return 0;
+		}
+	}
+	/* pa_sc_raster_config */
+	count += 3;
+	/* end clear state */
+	count += 2;
+	/* clear state */
+	count += 2;
+
+	return count;
+}
+
+void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
+{
+	u32 count = 0, i;
+	const struct cs_section_def *sect = NULL;
+	const struct cs_extent_def *ext = NULL;
+
+	if (rdev->rlc.cs_data == NULL)
+		return;
+	if (buffer == NULL)
+		return;
+
+	buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
+	buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
+
+	buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
+	buffer[count++] = 0x80000000;
+	buffer[count++] = 0x80000000;
+
+	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
+		for (ext = sect->section; ext->extent != NULL; ++ext) {
+			if (sect->id == SECT_CONTEXT) {
+				buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
+				buffer[count++] = ext->reg_index - 0xa000;
+				for (i = 0; i < ext->reg_count; i++)
+					buffer[count++] = ext->extent[i];
+			} else {
+				return;
+			}
+		}
+	}
+
+	buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
+	buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
+	switch (rdev->family) {
+	case CHIP_TAHITI:
+	case CHIP_PITCAIRN:
+		buffer[count++] = 0x2a00126a;
+		break;
+	case CHIP_VERDE:
+		buffer[count++] = 0x0000124a;
+		break;
+	case CHIP_OLAND:
+		buffer[count++] = 0x00000082;
+		break;
+	case CHIP_HAINAN:
+		buffer[count++] = 0x00000000;
+		break;
+	default:
+		buffer[count++] = 0x00000000;
+		break;
+	}
+
+	buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
+	buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
+
+	buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
+	buffer[count++] = 0;
 }
 
 static void si_init_pg(struct radeon_device *rdev)
 {
-	bool has_pg = false;
-
-	/* only cape verde supports PG */
-	if (rdev->family == CHIP_VERDE)
-		has_pg = true;
-
-	if (has_pg) {
+	if (rdev->pg_flags) {
+		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
+			si_init_dma_pg(rdev);
+		}
 		si_init_ao_cu_mask(rdev);
-		si_init_dma_pg(rdev);
+		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG) {
+			si_init_gfx_cgpg(rdev);
+		}
 		si_enable_dma_pg(rdev, true);
-		si_init_gfx_cgpg(rdev);
 		si_enable_gfx_cgpg(rdev, true);
 	} else {
 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
@@ -5260,13 +5421,7 @@
 
 static void si_fini_pg(struct radeon_device *rdev)
 {
-	bool has_pg = false;
-
-	/* only cape verde supports PG */
-	if (rdev->family == CHIP_VERDE)
-		has_pg = true;
-
-	if (has_pg) {
+	if (rdev->pg_flags) {
 		si_enable_dma_pg(rdev, false);
 		si_enable_gfx_cgpg(rdev, false);
 	}
@@ -5275,167 +5430,7 @@
 /*
  * RLC
  */
-void si_rlc_fini(struct radeon_device *rdev)
-{
-	int r;
-
-	/* save restore block */
-	if (rdev->rlc.save_restore_obj) {
-		r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
-		if (unlikely(r != 0))
-			dev_warn(rdev->dev, "(%d) reserve RLC sr bo failed\n", r);
-		radeon_bo_unpin(rdev->rlc.save_restore_obj);
-		radeon_bo_unreserve(rdev->rlc.save_restore_obj);
-
-		radeon_bo_unref(&rdev->rlc.save_restore_obj);
-		rdev->rlc.save_restore_obj = NULL;
-	}
-
-	/* clear state block */
-	if (rdev->rlc.clear_state_obj) {
-		r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
-		if (unlikely(r != 0))
-			dev_warn(rdev->dev, "(%d) reserve RLC c bo failed\n", r);
-		radeon_bo_unpin(rdev->rlc.clear_state_obj);
-		radeon_bo_unreserve(rdev->rlc.clear_state_obj);
-
-		radeon_bo_unref(&rdev->rlc.clear_state_obj);
-		rdev->rlc.clear_state_obj = NULL;
-	}
-}
-
-#define RLC_CLEAR_STATE_END_MARKER          0x00000001
-
-int si_rlc_init(struct radeon_device *rdev)
-{
-	volatile u32 *dst_ptr;
-	u32 dws, data, i, j, k, reg_num;
-	u32 reg_list_num, reg_list_hdr_blk_index, reg_list_blk_index;
-	u64 reg_list_mc_addr;
-	const struct cs_section_def *cs_data = si_cs_data;
-	int r;
-
-	/* save restore block */
-	if (rdev->rlc.save_restore_obj == NULL) {
-		r = radeon_bo_create(rdev, RADEON_GPU_PAGE_SIZE, PAGE_SIZE, true,
-				     RADEON_GEM_DOMAIN_VRAM, NULL,
-				     &rdev->rlc.save_restore_obj);
-		if (r) {
-			dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r);
-			return r;
-		}
-	}
-
-	r = radeon_bo_reserve(rdev->rlc.save_restore_obj, false);
-	if (unlikely(r != 0)) {
-		si_rlc_fini(rdev);
-		return r;
-	}
-	r = radeon_bo_pin(rdev->rlc.save_restore_obj, RADEON_GEM_DOMAIN_VRAM,
-			  &rdev->rlc.save_restore_gpu_addr);
-	if (r) {
-		radeon_bo_unreserve(rdev->rlc.save_restore_obj);
-		dev_warn(rdev->dev, "(%d) pin RLC sr bo failed\n", r);
-		si_rlc_fini(rdev);
-		return r;
-	}
-
-	if (rdev->family == CHIP_VERDE) {
-		r = radeon_bo_kmap(rdev->rlc.save_restore_obj, (void **)&rdev->rlc.sr_ptr);
-		if (r) {
-			dev_warn(rdev->dev, "(%d) map RLC sr bo failed\n", r);
-			si_rlc_fini(rdev);
-		return r;
-		}
-		/* write the sr buffer */
-		dst_ptr = rdev->rlc.sr_ptr;
-		for (i = 0; i < ARRAY_SIZE(verde_rlc_save_restore_register_list); i++) {
-			dst_ptr[i] = verde_rlc_save_restore_register_list[i];
-		}
-		radeon_bo_kunmap(rdev->rlc.save_restore_obj);
-	}
-	radeon_bo_unreserve(rdev->rlc.save_restore_obj);
-
-	/* clear state block */
-	reg_list_num = 0;
-	dws = 0;
-	for (i = 0; cs_data[i].section != NULL; i++) {
-		for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
-			reg_list_num++;
-			dws += cs_data[i].section[j].reg_count;
-		}
-	}
-	reg_list_blk_index = (3 * reg_list_num + 2);
-	dws += reg_list_blk_index;
-
-	if (rdev->rlc.clear_state_obj == NULL) {
-		r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
-				     RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.clear_state_obj);
-		if (r) {
-			dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r);
-			si_rlc_fini(rdev);
-			return r;
-		}
-	}
-	r = radeon_bo_reserve(rdev->rlc.clear_state_obj, false);
-	if (unlikely(r != 0)) {
-		si_rlc_fini(rdev);
-		return r;
-	}
-	r = radeon_bo_pin(rdev->rlc.clear_state_obj, RADEON_GEM_DOMAIN_VRAM,
-			  &rdev->rlc.clear_state_gpu_addr);
-	if (r) {
-
-		radeon_bo_unreserve(rdev->rlc.clear_state_obj);
-		dev_warn(rdev->dev, "(%d) pin RLC c bo failed\n", r);
-		si_rlc_fini(rdev);
-		return r;
-	}
-	r = radeon_bo_kmap(rdev->rlc.clear_state_obj, (void **)&rdev->rlc.cs_ptr);
-	if (r) {
-		dev_warn(rdev->dev, "(%d) map RLC c bo failed\n", r);
-		si_rlc_fini(rdev);
-		return r;
-	}
-	/* set up the cs buffer */
-	dst_ptr = rdev->rlc.cs_ptr;
-	reg_list_hdr_blk_index = 0;
-	reg_list_mc_addr = rdev->rlc.clear_state_gpu_addr + (reg_list_blk_index * 4);
-	data = upper_32_bits(reg_list_mc_addr);
-	dst_ptr[reg_list_hdr_blk_index] = data;
-	reg_list_hdr_blk_index++;
-	for (i = 0; cs_data[i].section != NULL; i++) {
-		for (j = 0; cs_data[i].section[j].extent != NULL; j++) {
-			reg_num = cs_data[i].section[j].reg_count;
-			data = reg_list_mc_addr & 0xffffffff;
-			dst_ptr[reg_list_hdr_blk_index] = data;
-			reg_list_hdr_blk_index++;
-
-			data = (cs_data[i].section[j].reg_index * 4) & 0xffffffff;
-			dst_ptr[reg_list_hdr_blk_index] = data;
-			reg_list_hdr_blk_index++;
-
-			data = 0x08000000 | (reg_num * 4);
-			dst_ptr[reg_list_hdr_blk_index] = data;
-			reg_list_hdr_blk_index++;
-
-			for (k = 0; k < reg_num; k++) {
-				data = cs_data[i].section[j].extent[k];
-				dst_ptr[reg_list_blk_index + k] = data;
-			}
-			reg_list_mc_addr += reg_num * 4;
-			reg_list_blk_index += reg_num;
-		}
-	}
-	dst_ptr[reg_list_hdr_blk_index] = RLC_CLEAR_STATE_END_MARKER;
-
-	radeon_bo_kunmap(rdev->rlc.clear_state_obj);
-	radeon_bo_unreserve(rdev->rlc.clear_state_obj);
-
-	return 0;
-}
-
-static void si_rlc_reset(struct radeon_device *rdev)
+void si_rlc_reset(struct radeon_device *rdev)
 {
 	u32 tmp = RREG32(GRBM_SOFT_RESET);
 
@@ -5651,7 +5646,7 @@
 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
 
 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
-	rb_bufsz = drm_order(rdev->ih.ring_size / 4);
+	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
 
 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
 		      IH_WPTR_OVERFLOW_CLEAR |
@@ -6335,80 +6330,6 @@
 	return IRQ_HANDLED;
 }
 
-/**
- * si_copy_dma - copy pages using the DMA engine
- *
- * @rdev: radeon_device pointer
- * @src_offset: src GPU address
- * @dst_offset: dst GPU address
- * @num_gpu_pages: number of GPU pages to xfer
- * @fence: radeon fence object
- *
- * Copy GPU paging using the DMA engine (SI).
- * Used by the radeon ttm implementation to move pages if
- * registered as the asic copy callback.
- */
-int si_copy_dma(struct radeon_device *rdev,
-		uint64_t src_offset, uint64_t dst_offset,
-		unsigned num_gpu_pages,
-		struct radeon_fence **fence)
-{
-	struct radeon_semaphore *sem = NULL;
-	int ring_index = rdev->asic->copy.dma_ring_index;
-	struct radeon_ring *ring = &rdev->ring[ring_index];
-	u32 size_in_bytes, cur_size_in_bytes;
-	int i, num_loops;
-	int r = 0;
-
-	r = radeon_semaphore_create(rdev, &sem);
-	if (r) {
-		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		return r;
-	}
-
-	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
-	num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
-	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
-	if (r) {
-		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
-	}
-
-	if (radeon_fence_need_sync(*fence, ring->idx)) {
-		radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
-					    ring->idx);
-		radeon_fence_note_sync(*fence, ring->idx);
-	} else {
-		radeon_semaphore_free(rdev, &sem, NULL);
-	}
-
-	for (i = 0; i < num_loops; i++) {
-		cur_size_in_bytes = size_in_bytes;
-		if (cur_size_in_bytes > 0xFFFFF)
-			cur_size_in_bytes = 0xFFFFF;
-		size_in_bytes -= cur_size_in_bytes;
-		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
-		radeon_ring_write(ring, dst_offset & 0xffffffff);
-		radeon_ring_write(ring, src_offset & 0xffffffff);
-		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
-		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
-		src_offset += cur_size_in_bytes;
-		dst_offset += cur_size_in_bytes;
-	}
-
-	r = radeon_fence_emit(rdev, fence, ring->idx);
-	if (r) {
-		radeon_ring_unlock_undo(rdev, ring);
-		return r;
-	}
-
-	radeon_ring_unlock_commit(rdev, ring);
-	radeon_semaphore_free(rdev, &sem, *fence);
-
-	return r;
-}
-
 /*
  * startup/shutdown callbacks
  */
@@ -6422,6 +6343,13 @@
 	/* enable aspm */
 	si_program_aspm(rdev);
 
+	/* scratch needs to be initialized before MC */
+	r = r600_vram_scratch_init(rdev);
+	if (r)
+		return r;
+
+	si_mc_program(rdev);
+
 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
 	    !rdev->rlc_fw || !rdev->mc_fw) {
 		r = si_init_microcode(rdev);
@@ -6437,18 +6365,19 @@
 		return r;
 	}
 
-	r = r600_vram_scratch_init(rdev);
-	if (r)
-		return r;
-
-	si_mc_program(rdev);
 	r = si_pcie_gart_enable(rdev);
 	if (r)
 		return r;
 	si_gpu_init(rdev);
 
 	/* allocate rlc buffers */
-	r = si_rlc_init(rdev);
+	if (rdev->family == CHIP_VERDE) {
+		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
+		rdev->rlc.reg_list_size =
+			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
+	}
+	rdev->rlc.cs_data = si_cs_data;
+	r = sumo_rlc_init(rdev);
 	if (r) {
 		DRM_ERROR("Failed to init rlc BOs!\n");
 		return r;
@@ -6490,7 +6419,7 @@
 	}
 
 	if (rdev->has_uvd) {
-		r = rv770_uvd_resume(rdev);
+		r = uvd_v2_2_resume(rdev);
 		if (!r) {
 			r = radeon_fence_driver_start_ring(rdev,
 							   R600_RING_TYPE_UVD_INDEX);
@@ -6519,21 +6448,21 @@
 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
 			     CP_RB0_RPTR, CP_RB0_WPTR,
-			     0, 0xfffff, RADEON_CP_PACKET2);
+			     RADEON_CP_PACKET2);
 	if (r)
 		return r;
 
 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
 			     CP_RB1_RPTR, CP_RB1_WPTR,
-			     0, 0xfffff, RADEON_CP_PACKET2);
+			     RADEON_CP_PACKET2);
 	if (r)
 		return r;
 
 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
 			     CP_RB2_RPTR, CP_RB2_WPTR,
-			     0, 0xfffff, RADEON_CP_PACKET2);
+			     RADEON_CP_PACKET2);
 	if (r)
 		return r;
 
@@ -6541,7 +6470,7 @@
 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
 			     DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
 			     DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
-			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
+			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
 	if (r)
 		return r;
 
@@ -6549,7 +6478,7 @@
 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
 			     DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
 			     DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
-			     2, 0x3fffc, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
+			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
 	if (r)
 		return r;
 
@@ -6567,12 +6496,11 @@
 	if (rdev->has_uvd) {
 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
 		if (ring->ring_size) {
-			r = radeon_ring_init(rdev, ring, ring->ring_size,
-					     R600_WB_UVD_RPTR_OFFSET,
+			r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
 					     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
-					     0, 0xfffff, RADEON_CP_PACKET2);
+					     RADEON_CP_PACKET2);
 			if (!r)
-				r = r600_uvd_init(rdev);
+				r = uvd_v1_0_init(rdev);
 			if (r)
 				DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
 		}
@@ -6590,6 +6518,10 @@
 		return r;
 	}
 
+	r = dce6_audio_init(rdev);
+	if (r)
+		return r;
+
 	return 0;
 }
 
@@ -6621,13 +6553,16 @@
 
 int si_suspend(struct radeon_device *rdev)
 {
+	dce6_audio_fini(rdev);
 	radeon_vm_manager_fini(rdev);
 	si_cp_enable(rdev, false);
 	cayman_dma_stop(rdev);
 	if (rdev->has_uvd) {
-		r600_uvd_rbc_stop(rdev);
+		uvd_v1_0_fini(rdev);
 		radeon_uvd_suspend(rdev);
 	}
+	si_fini_pg(rdev);
+	si_fini_cg(rdev);
 	si_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	si_pcie_gart_disable(rdev);
@@ -6734,7 +6669,7 @@
 		si_cp_fini(rdev);
 		cayman_dma_fini(rdev);
 		si_irq_fini(rdev);
-		si_rlc_fini(rdev);
+		sumo_rlc_fini(rdev);
 		radeon_wb_fini(rdev);
 		radeon_ib_pool_fini(rdev);
 		radeon_vm_manager_fini(rdev);
@@ -6759,16 +6694,18 @@
 {
 	si_cp_fini(rdev);
 	cayman_dma_fini(rdev);
-	si_irq_fini(rdev);
-	si_rlc_fini(rdev);
-	si_fini_cg(rdev);
 	si_fini_pg(rdev);
+	si_fini_cg(rdev);
+	si_irq_fini(rdev);
+	sumo_rlc_fini(rdev);
 	radeon_wb_fini(rdev);
 	radeon_vm_manager_fini(rdev);
 	radeon_ib_pool_fini(rdev);
 	radeon_irq_kms_fini(rdev);
-	if (rdev->has_uvd)
+	if (rdev->has_uvd) {
+		uvd_v1_0_fini(rdev);
 		radeon_uvd_fini(rdev);
+	}
 	si_pcie_gart_fini(rdev);
 	r600_vram_scratch_fini(rdev);
 	radeon_gem_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c
new file mode 100644
index 0000000..49909d2
--- /dev/null
+++ b/drivers/gpu/drm/radeon/si_dma.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Alex Deucher
+ */
+#include <drm/drmP.h>
+#include "radeon.h"
+#include "radeon_asic.h"
+#include "sid.h"
+
+u32 si_gpu_check_soft_reset(struct radeon_device *rdev);
+
+/**
+ * si_dma_is_lockup - Check if the DMA engine is locked up
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Check if the async DMA engine is locked up.
+ * Returns true if the engine appears to be locked up, false if not.
+ */
+bool si_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+	u32 reset_mask = si_gpu_check_soft_reset(rdev);
+	u32 mask;
+
+	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
+		mask = RADEON_RESET_DMA;
+	else
+		mask = RADEON_RESET_DMA1;
+
+	if (!(reset_mask & mask)) {
+		radeon_ring_lockup_update(ring);
+		return false;
+	}
+	/* force ring activities */
+	radeon_ring_force_activity(rdev, ring);
+	return radeon_ring_test_lockup(rdev, ring);
+}
+
+/**
+ * si_dma_vm_set_page - update the page tables using the DMA
+ *
+ * @rdev: radeon_device pointer
+ * @ib: indirect buffer to fill with commands
+ * @pe: addr of the page entry
+ * @addr: dst addr to write into pe
+ * @count: number of page entries to update
+ * @incr: increase next addr by incr bytes
+ * @flags: access flags
+ *
+ * Update the page tables using the DMA (SI).
+ */
+void si_dma_vm_set_page(struct radeon_device *rdev,
+			struct radeon_ib *ib,
+			uint64_t pe,
+			uint64_t addr, unsigned count,
+			uint32_t incr, uint32_t flags)
+{
+	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
+	uint64_t value;
+	unsigned ndw;
+
+	if (flags & RADEON_VM_PAGE_SYSTEM) {
+		while (count) {
+			ndw = count * 2;
+			if (ndw > 0xFFFFE)
+				ndw = 0xFFFFE;
+
+			/* for non-physically contiguous pages (system) */
+			ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, ndw);
+			ib->ptr[ib->length_dw++] = pe;
+			ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
+			for (; ndw > 0; ndw -= 2, --count, pe += 8) {
+				if (flags & RADEON_VM_PAGE_SYSTEM) {
+					value = radeon_vm_map_gart(rdev, addr);
+					value &= 0xFFFFFFFFFFFFF000ULL;
+				} else if (flags & RADEON_VM_PAGE_VALID) {
+					value = addr;
+				} else {
+					value = 0;
+				}
+				addr += incr;
+				value |= r600_flags;
+				ib->ptr[ib->length_dw++] = value;
+				ib->ptr[ib->length_dw++] = upper_32_bits(value);
+			}
+		}
+	} else {
+		while (count) {
+			ndw = count * 2;
+			if (ndw > 0xFFFFE)
+				ndw = 0xFFFFE;
+
+			if (flags & RADEON_VM_PAGE_VALID)
+				value = addr;
+			else
+				value = 0;
+			/* for physically contiguous pages (vram) */
+			ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw);
+			ib->ptr[ib->length_dw++] = pe; /* dst addr */
+			ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff;
+			ib->ptr[ib->length_dw++] = r600_flags; /* mask */
+			ib->ptr[ib->length_dw++] = 0;
+			ib->ptr[ib->length_dw++] = value; /* value */
+			ib->ptr[ib->length_dw++] = upper_32_bits(value);
+			ib->ptr[ib->length_dw++] = incr; /* increment size */
+			ib->ptr[ib->length_dw++] = 0;
+			pe += ndw * 4;
+			addr += (ndw / 2) * incr;
+			count -= ndw / 2;
+		}
+	}
+	while (ib->length_dw & 0x7)
+		ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0);
+}
+
+void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
+{
+	struct radeon_ring *ring = &rdev->ring[ridx];
+
+	if (vm == NULL)
+		return;
+
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
+	if (vm->id < 8) {
+		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2));
+	} else {
+		radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2));
+	}
+	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
+
+	/* flush hdp cache */
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
+	radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2));
+	radeon_ring_write(ring, 1);
+
+	/* bits 0-7 are the VM contexts0-7 */
+	radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
+	radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2));
+	radeon_ring_write(ring, 1 << vm->id);
+}
+
+/**
+ * si_copy_dma - copy pages using the DMA engine
+ *
+ * @rdev: radeon_device pointer
+ * @src_offset: src GPU address
+ * @dst_offset: dst GPU address
+ * @num_gpu_pages: number of GPU pages to xfer
+ * @fence: radeon fence object
+ *
+ * Copy GPU paging using the DMA engine (SI).
+ * Used by the radeon ttm implementation to move pages if
+ * registered as the asic copy callback.
+ */
+int si_copy_dma(struct radeon_device *rdev,
+		uint64_t src_offset, uint64_t dst_offset,
+		unsigned num_gpu_pages,
+		struct radeon_fence **fence)
+{
+	struct radeon_semaphore *sem = NULL;
+	int ring_index = rdev->asic->copy.dma_ring_index;
+	struct radeon_ring *ring = &rdev->ring[ring_index];
+	u32 size_in_bytes, cur_size_in_bytes;
+	int i, num_loops;
+	int r = 0;
+
+	r = radeon_semaphore_create(rdev, &sem);
+	if (r) {
+		DRM_ERROR("radeon: moving bo (%d).\n", r);
+		return r;
+	}
+
+	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
+	num_loops = DIV_ROUND_UP(size_in_bytes, 0xfffff);
+	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 11);
+	if (r) {
+		DRM_ERROR("radeon: moving bo (%d).\n", r);
+		radeon_semaphore_free(rdev, &sem, NULL);
+		return r;
+	}
+
+	if (radeon_fence_need_sync(*fence, ring->idx)) {
+		radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
+					    ring->idx);
+		radeon_fence_note_sync(*fence, ring->idx);
+	} else {
+		radeon_semaphore_free(rdev, &sem, NULL);
+	}
+
+	for (i = 0; i < num_loops; i++) {
+		cur_size_in_bytes = size_in_bytes;
+		if (cur_size_in_bytes > 0xFFFFF)
+			cur_size_in_bytes = 0xFFFFF;
+		size_in_bytes -= cur_size_in_bytes;
+		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 1, 0, 0, cur_size_in_bytes));
+		radeon_ring_write(ring, dst_offset & 0xffffffff);
+		radeon_ring_write(ring, src_offset & 0xffffffff);
+		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
+		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
+		src_offset += cur_size_in_bytes;
+		dst_offset += cur_size_in_bytes;
+	}
+
+	r = radeon_fence_emit(rdev, fence, ring->idx);
+	if (r) {
+		radeon_ring_unlock_undo(rdev, ring);
+		return r;
+	}
+
+	radeon_ring_unlock_commit(rdev, ring);
+	radeon_semaphore_free(rdev, &sem, *fence);
+
+	return r;
+}
+
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index 73aaa2e..5be9b4e 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -37,8 +37,6 @@
 
 #define SMC_RAM_END                 0x20000
 
-#define DDR3_DRAM_ROWS              0x2000
-
 #define SCLK_MIN_DEEPSLEEP_FREQ     1350
 
 static const struct si_cac_config_reg cac_weights_tahiti[] =
@@ -1755,6 +1753,9 @@
 				    u32 engine_clock,
 				    SISLANDS_SMC_SCLK_VALUE *sclk);
 
+extern void si_update_cg(struct radeon_device *rdev,
+			 u32 block, bool enable);
+
 static struct si_power_info *si_get_pi(struct radeon_device *rdev)
 {
         struct si_power_info *pi = rdev->pm.dpm.priv;
@@ -1767,8 +1768,9 @@
 {
 	s64 kt, kv, leakage_w, i_leakage, vddc;
 	s64 temperature, t_slope, t_intercept, av, bv, t_ref;
+	s64 tmp;
 
-	i_leakage = drm_int2fixp(ileakage / 100);
+	i_leakage = div64_s64(drm_int2fixp(ileakage), 100);
 	vddc = div64_s64(drm_int2fixp(v), 1000);
 	temperature = div64_s64(drm_int2fixp(t), 1000);
 
@@ -1778,8 +1780,9 @@
 	bv = div64_s64(drm_int2fixp(coeff->bv), 100000000);
 	t_ref = drm_int2fixp(coeff->t_ref);
 
-	kt = drm_fixp_div(drm_fixp_exp(drm_fixp_mul(drm_fixp_mul(t_slope, vddc) + t_intercept, temperature)),
-			  drm_fixp_exp(drm_fixp_mul(drm_fixp_mul(t_slope, vddc) + t_intercept, t_ref)));
+	tmp = drm_fixp_mul(t_slope, vddc) + t_intercept;
+	kt = drm_fixp_exp(drm_fixp_mul(tmp, temperature));
+	kt = drm_fixp_div(kt, drm_fixp_exp(drm_fixp_mul(tmp, t_ref)));
 	kv = drm_fixp_mul(av, drm_fixp_exp(drm_fixp_mul(bv, vddc)));
 
 	leakage_w = drm_fixp_mul(drm_fixp_mul(drm_fixp_mul(i_leakage, kt), kv), vddc);
@@ -1931,6 +1934,7 @@
 			si_pi->cac_override = cac_override_pitcairn;
 			si_pi->powertune_data = &powertune_data_pitcairn;
 			si_pi->dte_data = dte_data_pitcairn;
+			break;
 		}
 	} else if (rdev->family == CHIP_VERDE) {
 		si_pi->lcac_config = lcac_cape_verde;
@@ -1941,6 +1945,7 @@
 		case 0x683B:
 		case 0x683F:
 		case 0x6829:
+		case 0x6835:
 			si_pi->cac_weights = cac_weights_cape_verde_pro;
 			si_pi->dte_data = dte_data_cape_verde;
 			break;
@@ -2901,7 +2906,8 @@
 {
 	struct ni_ps *ps = ni_get_ps(rps);
 	struct radeon_clock_and_voltage_limits *max_limits;
-	bool disable_mclk_switching;
+	bool disable_mclk_switching = false;
+	bool disable_sclk_switching = false;
 	u32 mclk, sclk;
 	u16 vddc, vddci;
 	int i;
@@ -2909,8 +2915,11 @@
 	if ((rdev->pm.dpm.new_active_crtc_count > 1) ||
 	    ni_dpm_vblank_too_short(rdev))
 		disable_mclk_switching = true;
-	else
-		disable_mclk_switching = false;
+
+	if (rps->vclk || rps->dclk) {
+		disable_mclk_switching = true;
+		disable_sclk_switching = true;
+	}
 
 	if (rdev->pm.dpm.ac_power)
 		max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
@@ -2938,27 +2947,43 @@
 
 	if (disable_mclk_switching) {
 		mclk  = ps->performance_levels[ps->performance_level_count - 1].mclk;
-		sclk = ps->performance_levels[0].sclk;
-		vddc = ps->performance_levels[0].vddc;
 		vddci = ps->performance_levels[ps->performance_level_count - 1].vddci;
 	} else {
-		sclk = ps->performance_levels[0].sclk;
 		mclk = ps->performance_levels[0].mclk;
-		vddc = ps->performance_levels[0].vddc;
 		vddci = ps->performance_levels[0].vddci;
 	}
 
+	if (disable_sclk_switching) {
+		sclk = ps->performance_levels[ps->performance_level_count - 1].sclk;
+		vddc = ps->performance_levels[ps->performance_level_count - 1].vddc;
+	} else {
+		sclk = ps->performance_levels[0].sclk;
+		vddc = ps->performance_levels[0].vddc;
+	}
+
 	/* adjusted low state */
 	ps->performance_levels[0].sclk = sclk;
 	ps->performance_levels[0].mclk = mclk;
 	ps->performance_levels[0].vddc = vddc;
 	ps->performance_levels[0].vddci = vddci;
 
-	for (i = 1; i < ps->performance_level_count; i++) {
-		if (ps->performance_levels[i].sclk < ps->performance_levels[i - 1].sclk)
-			ps->performance_levels[i].sclk = ps->performance_levels[i - 1].sclk;
-		if (ps->performance_levels[i].vddc < ps->performance_levels[i - 1].vddc)
-			ps->performance_levels[i].vddc = ps->performance_levels[i - 1].vddc;
+	if (disable_sclk_switching) {
+		sclk = ps->performance_levels[0].sclk;
+		for (i = 1; i < ps->performance_level_count; i++) {
+			if (sclk < ps->performance_levels[i].sclk)
+				sclk = ps->performance_levels[i].sclk;
+		}
+		for (i = 0; i < ps->performance_level_count; i++) {
+			ps->performance_levels[i].sclk = sclk;
+			ps->performance_levels[i].vddc = vddc;
+		}
+	} else {
+		for (i = 1; i < ps->performance_level_count; i++) {
+			if (ps->performance_levels[i].sclk < ps->performance_levels[i - 1].sclk)
+				ps->performance_levels[i].sclk = ps->performance_levels[i - 1].sclk;
+			if (ps->performance_levels[i].vddc < ps->performance_levels[i - 1].vddc)
+				ps->performance_levels[i].vddc = ps->performance_levels[i - 1].vddc;
+		}
 	}
 
 	if (disable_mclk_switching) {
@@ -3237,10 +3262,10 @@
 {
 	struct radeon_ps *rps = rdev->pm.dpm.current_ps;
 	struct ni_ps *ps = ni_get_ps(rps);
-	u32 levels;
+	u32 levels = ps->performance_level_count;
 
 	if (level == RADEON_DPM_FORCED_LEVEL_HIGH) {
-		if (si_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_SetEnabledLevels, 0) != PPSMC_Result_OK)
+		if (si_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_SetEnabledLevels, levels) != PPSMC_Result_OK)
 			return -EINVAL;
 
 		if (si_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_SetForcedLevels, 1) != PPSMC_Result_OK)
@@ -3249,14 +3274,13 @@
 		if (si_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_SetForcedLevels, 0) != PPSMC_Result_OK)
 			return -EINVAL;
 
-		levels = ps->performance_level_count - 1;
-		if (si_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_SetEnabledLevels, levels) != PPSMC_Result_OK)
+		if (si_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_SetEnabledLevels, 1) != PPSMC_Result_OK)
 			return -EINVAL;
 	} else if (level == RADEON_DPM_FORCED_LEVEL_AUTO) {
 		if (si_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_SetForcedLevels, 0) != PPSMC_Result_OK)
 			return -EINVAL;
 
-		if (si_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_SetEnabledLevels, 0) != PPSMC_Result_OK)
+		if (si_send_msg_to_smc_with_parameter(rdev, PPSMC_MSG_SetEnabledLevels, levels) != PPSMC_Result_OK)
 			return -EINVAL;
 	}
 
@@ -3620,8 +3644,12 @@
 {
 	u32 tmp = RREG32(CG_DISPLAY_GAP_CNTL);
 
+	tmp &= ~(DISP1_GAP_MASK | DISP2_GAP_MASK);
+	tmp |= (DISP1_GAP(R600_PM_DISPLAY_GAP_IGNORE) |
+		DISP2_GAP(R600_PM_DISPLAY_GAP_IGNORE));
+
 	tmp &= ~(DISP1_GAP_MCHG_MASK | DISP2_GAP_MCHG_MASK);
-	tmp |= (DISP1_GAP_MCHG(R600_PM_DISPLAY_GAP_IGNORE) |
+	tmp |= (DISP1_GAP_MCHG(R600_PM_DISPLAY_GAP_VBLANK) |
 		DISP2_GAP_MCHG(R600_PM_DISPLAY_GAP_IGNORE));
 	WREG32(CG_DISPLAY_GAP_CNTL, tmp);
 }
@@ -3638,7 +3666,7 @@
 	WREG32(CG_FTV, 0);
 }
 
-static u8 si_get_ddr3_mclk_frequency_ratio(u32 memory_clock)
+u8 si_get_ddr3_mclk_frequency_ratio(u32 memory_clock)
 {
 	u8 mc_para_index;
 
@@ -3651,7 +3679,7 @@
 	return mc_para_index;
 }
 
-static u8 si_get_mclk_frequency_ratio(u32 memory_clock, bool strobe_mode)
+u8 si_get_mclk_frequency_ratio(u32 memory_clock, bool strobe_mode)
 {
 	u8 mc_para_index;
 
@@ -3733,20 +3761,21 @@
 	return true;
 }
 
-static void si_trim_voltage_table_to_fit_state_table(struct radeon_device *rdev,
-						     struct atom_voltage_table *voltage_table)
+void si_trim_voltage_table_to_fit_state_table(struct radeon_device *rdev,
+					      u32 max_voltage_steps,
+					      struct atom_voltage_table *voltage_table)
 {
 	unsigned int i, diff;
 
-	if (voltage_table->count <= SISLANDS_MAX_NO_VREG_STEPS)
+	if (voltage_table->count <= max_voltage_steps)
 		return;
 
-	diff = voltage_table->count - SISLANDS_MAX_NO_VREG_STEPS;
+	diff = voltage_table->count - max_voltage_steps;
 
-	for (i= 0; i < SISLANDS_MAX_NO_VREG_STEPS; i++)
+	for (i= 0; i < max_voltage_steps; i++)
 		voltage_table->entries[i] = voltage_table->entries[i + diff];
 
-	voltage_table->count = SISLANDS_MAX_NO_VREG_STEPS;
+	voltage_table->count = max_voltage_steps;
 }
 
 static int si_construct_voltage_tables(struct radeon_device *rdev)
@@ -3762,7 +3791,9 @@
 		return ret;
 
 	if (eg_pi->vddc_voltage_table.count > SISLANDS_MAX_NO_VREG_STEPS)
-		si_trim_voltage_table_to_fit_state_table(rdev, &eg_pi->vddc_voltage_table);
+		si_trim_voltage_table_to_fit_state_table(rdev,
+							 SISLANDS_MAX_NO_VREG_STEPS,
+							 &eg_pi->vddc_voltage_table);
 
 	if (eg_pi->vddci_control) {
 		ret = radeon_atom_get_voltage_table(rdev, VOLTAGE_TYPE_VDDCI,
@@ -3771,7 +3802,9 @@
 			return ret;
 
 		if (eg_pi->vddci_voltage_table.count > SISLANDS_MAX_NO_VREG_STEPS)
-			si_trim_voltage_table_to_fit_state_table(rdev, &eg_pi->vddci_voltage_table);
+			si_trim_voltage_table_to_fit_state_table(rdev,
+								 SISLANDS_MAX_NO_VREG_STEPS,
+								 &eg_pi->vddci_voltage_table);
 	}
 
 	if (pi->mvdd_control) {
@@ -3789,7 +3822,9 @@
 		}
 
 		if (si_pi->mvdd_voltage_table.count > SISLANDS_MAX_NO_VREG_STEPS)
-			si_trim_voltage_table_to_fit_state_table(rdev, &si_pi->mvdd_voltage_table);
+			si_trim_voltage_table_to_fit_state_table(rdev,
+								 SISLANDS_MAX_NO_VREG_STEPS,
+								 &si_pi->mvdd_voltage_table);
 	}
 
 	if (si_pi->vddc_phase_shed_control) {
@@ -4036,16 +4071,15 @@
 static u32 si_calculate_memory_refresh_rate(struct radeon_device *rdev,
 					    u32 engine_clock)
 {
-	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
 	u32 dram_rows;
 	u32 dram_refresh_rate;
 	u32 mc_arb_rfsh_rate;
 	u32 tmp = (RREG32(MC_ARB_RAMCFG) & NOOFROWS_MASK) >> NOOFROWS_SHIFT;
 
-	if (pi->mem_gddr5)
-		dram_rows = 1 << (tmp + 10);
+	if (tmp >= 4)
+		dram_rows = 16384;
 	else
-		dram_rows = DDR3_DRAM_ROWS;
+		dram_rows = 1 << (tmp + 10);
 
 	dram_refresh_rate = 1 << ((RREG32(MC_SEQ_MISC0) & 0x3) + 3);
 	mc_arb_rfsh_rate = ((engine_clock * 10) * dram_refresh_rate / dram_rows - 32) / 64;
@@ -5728,6 +5762,13 @@
 	struct radeon_ps *boot_ps = rdev->pm.dpm.boot_ps;
 	int ret;
 
+	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
+			    RADEON_CG_BLOCK_MC |
+			    RADEON_CG_BLOCK_SDMA |
+			    RADEON_CG_BLOCK_BIF |
+			    RADEON_CG_BLOCK_UVD |
+			    RADEON_CG_BLOCK_HDP), false);
+
 	if (si_is_smc_running(rdev))
 		return -EINVAL;
 	if (pi->voltage_control)
@@ -5847,6 +5888,13 @@
 
 	si_enable_auto_throttle_source(rdev, RADEON_DPM_AUTO_THROTTLE_SRC_THERMAL, true);
 
+	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
+			    RADEON_CG_BLOCK_MC |
+			    RADEON_CG_BLOCK_SDMA |
+			    RADEON_CG_BLOCK_BIF |
+			    RADEON_CG_BLOCK_UVD |
+			    RADEON_CG_BLOCK_HDP), true);
+
 	ni_update_current_ps(rdev, boot_ps);
 
 	return 0;
@@ -5857,6 +5905,13 @@
 	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
 	struct radeon_ps *boot_ps = rdev->pm.dpm.boot_ps;
 
+	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
+			    RADEON_CG_BLOCK_MC |
+			    RADEON_CG_BLOCK_SDMA |
+			    RADEON_CG_BLOCK_BIF |
+			    RADEON_CG_BLOCK_UVD |
+			    RADEON_CG_BLOCK_HDP), false);
+
 	if (!si_is_smc_running(rdev))
 		return;
 	si_disable_ulv(rdev);
@@ -5921,6 +5976,13 @@
 	struct radeon_ps *old_ps = &eg_pi->current_rps;
 	int ret;
 
+	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
+			    RADEON_CG_BLOCK_MC |
+			    RADEON_CG_BLOCK_SDMA |
+			    RADEON_CG_BLOCK_BIF |
+			    RADEON_CG_BLOCK_UVD |
+			    RADEON_CG_BLOCK_HDP), false);
+
 	ret = si_disable_ulv(rdev);
 	if (ret) {
 		DRM_ERROR("si_disable_ulv failed\n");
@@ -6013,16 +6075,18 @@
 		return ret;
 	}
 
-#if 0
-	/* XXX */
 	ret = si_dpm_force_performance_level(rdev, RADEON_DPM_FORCED_LEVEL_AUTO);
 	if (ret) {
 		DRM_ERROR("si_dpm_force_performance_level failed\n");
 		return ret;
 	}
-#else
-	rdev->pm.dpm.forced_level = RADEON_DPM_FORCED_LEVEL_AUTO;
-#endif
+
+	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
+			    RADEON_CG_BLOCK_MC |
+			    RADEON_CG_BLOCK_SDMA |
+			    RADEON_CG_BLOCK_BIF |
+			    RADEON_CG_BLOCK_UVD |
+			    RADEON_CG_BLOCK_HDP), true);
 
 	return 0;
 }
@@ -6213,6 +6277,7 @@
 	rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime);
 	rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime);
 	for (i = 0; i < state_array->ucNumEntries; i++) {
+		u8 *idx;
 		power_state = (union pplib_power_state *)power_state_offset;
 		non_clock_array_index = power_state->v2.nonClockInfoIndex;
 		non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *)
@@ -6229,14 +6294,16 @@
 					      non_clock_info,
 					      non_clock_info_array->ucEntrySize);
 		k = 0;
+		idx = (u8 *)&power_state->v2.clockInfoIndex[0];
 		for (j = 0; j < power_state->v2.ucNumDPMLevels; j++) {
-			clock_array_index = power_state->v2.clockInfoIndex[j];
+			clock_array_index = idx[j];
 			if (clock_array_index >= clock_info_array->ucNumEntries)
 				continue;
 			if (k >= SISLANDS_MAX_HARDWARE_POWERLEVELS)
 				break;
 			clock_info = (union pplib_clock_info *)
-				&clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize];
+				((u8 *)&clock_info_array->clockInfo[0] +
+				 (clock_array_index * clock_info_array->ucEntrySize));
 			si_parse_pplib_clock_info(rdev,
 						  &rdev->pm.dpm.ps[i], k,
 						  clock_info);
@@ -6254,9 +6321,6 @@
 	struct evergreen_power_info *eg_pi;
 	struct ni_power_info *ni_pi;
 	struct si_power_info *si_pi;
-	int index = GetIndexIntoMasterTable(DATA, ASIC_InternalSS_Info);
-	u16 data_offset, size;
-	u8 frev, crev;
 	struct atom_clock_dividers dividers;
 	int ret;
 	u32 mask;
@@ -6347,16 +6411,7 @@
 	si_pi->vddc_phase_shed_control =
 		radeon_atom_is_voltage_gpio(rdev, SET_VOLTAGE_TYPE_ASIC_VDDC, VOLTAGE_OBJ_PHASE_LUT);
 
-	if (atom_parse_data_header(rdev->mode_info.atom_context, index, &size,
-                                   &frev, &crev, &data_offset)) {
-		pi->sclk_ss = true;
-		pi->mclk_ss = true;
-		pi->dynamic_ss = true;
-	} else {
-		pi->sclk_ss = false;
-		pi->mclk_ss = false;
-		pi->dynamic_ss = true;
-	}
+	rv770_get_engine_memory_ss(rdev);
 
 	pi->asi = RV770_ASI_DFLT;
 	pi->pasi = CYPRESS_HASI_DFLT;
@@ -6367,8 +6422,7 @@
 	eg_pi->sclk_deep_sleep = true;
 	si_pi->sclk_deep_sleep_above_low = false;
 
-	if (pi->gfx_clock_gating &&
-	    (rdev->pm.int_thermal_type != THERMAL_TYPE_NONE))
+	if (rdev->pm.int_thermal_type != THERMAL_TYPE_NONE)
 		pi->thermal_protection = true;
 	else
 		pi->thermal_protection = false;
@@ -6395,6 +6449,12 @@
 
 	si_initialize_powertune_defaults(rdev);
 
+	/* make sure dc limits are valid */
+	if ((rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc.sclk == 0) ||
+	    (rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc.mclk == 0))
+		rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc =
+			rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h
index 2c8da27..52d2ab6 100644
--- a/drivers/gpu/drm/radeon/sid.h
+++ b/drivers/gpu/drm/radeon/sid.h
@@ -282,6 +282,10 @@
 
 #define DMIF_ADDR_CALC  				0xC00
 
+#define	PIPE0_DMIF_BUFFER_CONTROL			  0x0ca0
+#       define DMIF_BUFFERS_ALLOCATED(x)                  ((x) << 0)
+#       define DMIF_BUFFERS_ALLOCATED_COMPLETED           (1 << 4)
+
 #define	SRBM_STATUS				        0xE50
 #define		GRBM_RQ_PENDING 			(1 << 5)
 #define		VMC_BUSY 				(1 << 8)
@@ -581,6 +585,7 @@
 #define		CLKS_MASK				(0xfff << 0)
 
 #define	HDP_HOST_PATH_CNTL				0x2C00
+#define 	CLOCK_GATING_DIS			(1 << 23)
 #define	HDP_NONSURFACE_BASE				0x2C04
 #define	HDP_NONSURFACE_INFO				0x2C08
 #define	HDP_NONSURFACE_SIZE				0x2C0C
@@ -588,6 +593,8 @@
 #define HDP_ADDR_CONFIG  				0x2F48
 #define HDP_MISC_CNTL					0x2F4C
 #define 	HDP_FLUSH_INVALIDATE_CACHE			(1 << 0)
+#define HDP_MEM_POWER_LS				0x2F50
+#define 	HDP_LS_ENABLE				(1 << 0)
 
 #define ATC_MISC_CG           				0x3350
 
@@ -635,6 +642,54 @@
 
 #define HDP_REG_COHERENCY_FLUSH_CNTL			0x54A0
 
+/* DCE6 ELD audio interface */
+#define AZ_F0_CODEC_ENDPOINT_INDEX                       0x5E00
+#       define AZ_ENDPOINT_REG_INDEX(x)                  (((x) & 0xff) << 0)
+#       define AZ_ENDPOINT_REG_WRITE_EN                  (1 << 8)
+#define AZ_F0_CODEC_ENDPOINT_DATA                        0x5E04
+
+#define AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER          0x25
+#define		SPEAKER_ALLOCATION(x)			(((x) & 0x7f) << 0)
+#define		SPEAKER_ALLOCATION_MASK			(0x7f << 0)
+#define		SPEAKER_ALLOCATION_SHIFT		0
+#define		HDMI_CONNECTION				(1 << 16)
+#define		DP_CONNECTION				(1 << 17)
+
+#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0        0x28 /* LPCM */
+#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1        0x29 /* AC3 */
+#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2        0x2A /* MPEG1 */
+#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3        0x2B /* MP3 */
+#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4        0x2C /* MPEG2 */
+#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5        0x2D /* AAC */
+#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6        0x2E /* DTS */
+#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7        0x2F /* ATRAC */
+#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR8        0x30 /* one bit audio - leave at 0 (default) */
+#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9        0x31 /* Dolby Digital */
+#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10       0x32 /* DTS-HD */
+#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11       0x33 /* MAT-MLP */
+#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR12       0x34 /* DTS */
+#define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13       0x35 /* WMA Pro */
+#       define MAX_CHANNELS(x)                            (((x) & 0x7) << 0)
+/* max channels minus one.  7 = 8 channels */
+#       define SUPPORTED_FREQUENCIES(x)                   (((x) & 0xff) << 8)
+#       define DESCRIPTOR_BYTE_2(x)                       (((x) & 0xff) << 16)
+#       define SUPPORTED_FREQUENCIES_STEREO(x)            (((x) & 0xff) << 24) /* LPCM only */
+/* SUPPORTED_FREQUENCIES, SUPPORTED_FREQUENCIES_STEREO
+ * bit0 = 32 kHz
+ * bit1 = 44.1 kHz
+ * bit2 = 48 kHz
+ * bit3 = 88.2 kHz
+ * bit4 = 96 kHz
+ * bit5 = 176.4 kHz
+ * bit6 = 192 kHz
+ */
+#define AZ_F0_CODEC_PIN_CONTROL_HOTPLUG_CONTROL          0x54
+#       define AUDIO_ENABLED                             (1 << 31)
+
+#define AZ_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT  0x56
+#define		PORT_CONNECTIVITY_MASK				(3 << 30)
+#define		PORT_CONNECTIVITY_SHIFT				30
+
 #define	DC_LB_MEMORY_SPLIT					0x6b0c
 #define		DC_LB_MEMORY_CONFIG(x)				((x) << 20)
 
@@ -755,6 +810,17 @@
 /* 0x6e98, 0x7a98, 0x10698, 0x11298, 0x11e98, 0x12a98 */
 #define CRTC_STATUS_FRAME_COUNT                         0x6e98
 
+#define AFMT_AUDIO_SRC_CONTROL                          0x713c
+#define		AFMT_AUDIO_SRC_SELECT(x)		(((x) & 7) << 0)
+/* AFMT_AUDIO_SRC_SELECT
+ * 0 = stream0
+ * 1 = stream1
+ * 2 = stream2
+ * 3 = stream3
+ * 4 = stream4
+ * 5 = stream5
+ */
+
 #define	GRBM_CNTL					0x8000
 #define		GRBM_READ_TIMEOUT(x)				((x) << 0)
 
@@ -1295,6 +1361,7 @@
 /* PCIE registers idx/data 0x30/0x34 */
 #define PCIE_CNTL2                                        0x1c /* PCIE */
 #       define SLV_MEM_LS_EN                              (1 << 16)
+#       define SLV_MEM_AGGRESSIVE_LS_EN                   (1 << 17)
 #       define MST_MEM_LS_EN                              (1 << 18)
 #       define REPLAY_MEM_LS_EN                           (1 << 19)
 #define PCIE_LC_STATUS1                                   0x28 /* PCIE */
@@ -1644,6 +1711,10 @@
 #       define DMA_IDLE                                   (1 << 0)
 #define DMA_TILING_CONFIG  				  0xd0b8
 
+#define	DMA_POWER_CNTL					0xd0bc
+#       define MEM_POWER_OVERRIDE                       (1 << 8)
+#define	DMA_CLK_CTRL					0xd0c0
+
 #define	DMA_PG						0xd0d4
 #	define PG_CNTL_ENABLE				(1 << 0)
 #define	DMA_PGFSM_CONFIG				0xd0d8
diff --git a/drivers/gpu/drm/radeon/smu7.h b/drivers/gpu/drm/radeon/smu7.h
new file mode 100644
index 0000000..75a380a
--- /dev/null
+++ b/drivers/gpu/drm/radeon/smu7.h
@@ -0,0 +1,170 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef SMU7_H
+#define SMU7_H
+
+#pragma pack(push, 1)
+
+#define SMU7_CONTEXT_ID_SMC        1
+#define SMU7_CONTEXT_ID_VBIOS      2
+
+
+#define SMU7_CONTEXT_ID_SMC        1
+#define SMU7_CONTEXT_ID_VBIOS      2
+
+#define SMU7_MAX_LEVELS_VDDC            8
+#define SMU7_MAX_LEVELS_VDDCI           4
+#define SMU7_MAX_LEVELS_MVDD            4
+#define SMU7_MAX_LEVELS_VDDNB           8
+
+#define SMU7_MAX_LEVELS_GRAPHICS        SMU__NUM_SCLK_DPM_STATE   // SCLK + SQ DPM + ULV
+#define SMU7_MAX_LEVELS_MEMORY          SMU__NUM_MCLK_DPM_LEVELS   // MCLK Levels DPM
+#define SMU7_MAX_LEVELS_GIO             SMU__NUM_LCLK_DPM_LEVELS  // LCLK Levels
+#define SMU7_MAX_LEVELS_LINK            SMU__NUM_PCIE_DPM_LEVELS  // PCIe speed and number of lanes.
+#define SMU7_MAX_LEVELS_UVD             8   // VCLK/DCLK levels for UVD.
+#define SMU7_MAX_LEVELS_VCE             8   // ECLK levels for VCE.
+#define SMU7_MAX_LEVELS_ACP             8   // ACLK levels for ACP.
+#define SMU7_MAX_LEVELS_SAMU            8   // SAMCLK levels for SAMU.
+#define SMU7_MAX_ENTRIES_SMIO           32  // Number of entries in SMIO table.
+
+#define DPM_NO_LIMIT 0
+#define DPM_NO_UP 1
+#define DPM_GO_DOWN 2
+#define DPM_GO_UP 3
+
+#define SMU7_FIRST_DPM_GRAPHICS_LEVEL    0
+#define SMU7_FIRST_DPM_MEMORY_LEVEL      0
+
+#define GPIO_CLAMP_MODE_VRHOT      1
+#define GPIO_CLAMP_MODE_THERM      2
+#define GPIO_CLAMP_MODE_DC         4
+
+#define SCRATCH_B_TARG_PCIE_INDEX_SHIFT 0
+#define SCRATCH_B_TARG_PCIE_INDEX_MASK  (0x7<<SCRATCH_B_TARG_PCIE_INDEX_SHIFT)
+#define SCRATCH_B_CURR_PCIE_INDEX_SHIFT 3
+#define SCRATCH_B_CURR_PCIE_INDEX_MASK  (0x7<<SCRATCH_B_CURR_PCIE_INDEX_SHIFT)
+#define SCRATCH_B_TARG_UVD_INDEX_SHIFT  6
+#define SCRATCH_B_TARG_UVD_INDEX_MASK   (0x7<<SCRATCH_B_TARG_UVD_INDEX_SHIFT)
+#define SCRATCH_B_CURR_UVD_INDEX_SHIFT  9
+#define SCRATCH_B_CURR_UVD_INDEX_MASK   (0x7<<SCRATCH_B_CURR_UVD_INDEX_SHIFT)
+#define SCRATCH_B_TARG_VCE_INDEX_SHIFT  12
+#define SCRATCH_B_TARG_VCE_INDEX_MASK   (0x7<<SCRATCH_B_TARG_VCE_INDEX_SHIFT)
+#define SCRATCH_B_CURR_VCE_INDEX_SHIFT  15
+#define SCRATCH_B_CURR_VCE_INDEX_MASK   (0x7<<SCRATCH_B_CURR_VCE_INDEX_SHIFT)
+#define SCRATCH_B_TARG_ACP_INDEX_SHIFT  18
+#define SCRATCH_B_TARG_ACP_INDEX_MASK   (0x7<<SCRATCH_B_TARG_ACP_INDEX_SHIFT)
+#define SCRATCH_B_CURR_ACP_INDEX_SHIFT  21
+#define SCRATCH_B_CURR_ACP_INDEX_MASK   (0x7<<SCRATCH_B_CURR_ACP_INDEX_SHIFT)
+#define SCRATCH_B_TARG_SAMU_INDEX_SHIFT 24
+#define SCRATCH_B_TARG_SAMU_INDEX_MASK  (0x7<<SCRATCH_B_TARG_SAMU_INDEX_SHIFT)
+#define SCRATCH_B_CURR_SAMU_INDEX_SHIFT 27
+#define SCRATCH_B_CURR_SAMU_INDEX_MASK  (0x7<<SCRATCH_B_CURR_SAMU_INDEX_SHIFT)
+
+
+struct SMU7_PIDController
+{
+    uint32_t Ki;
+    int32_t LFWindupUL;
+    int32_t LFWindupLL;
+    uint32_t StatePrecision;
+    uint32_t LfPrecision;
+    uint32_t LfOffset;
+    uint32_t MaxState;
+    uint32_t MaxLfFraction;
+    uint32_t StateShift;
+};
+
+typedef struct SMU7_PIDController SMU7_PIDController;
+
+// -------------------------------------------------------------------------------------------------------------------------
+#define SMU7_MAX_PCIE_LINK_SPEEDS 3 /* 0:Gen1 1:Gen2 2:Gen3 */
+
+#define SMU7_SCLK_DPM_CONFIG_MASK                        0x01
+#define SMU7_VOLTAGE_CONTROLLER_CONFIG_MASK              0x02
+#define SMU7_THERMAL_CONTROLLER_CONFIG_MASK              0x04
+#define SMU7_MCLK_DPM_CONFIG_MASK                        0x08
+#define SMU7_UVD_DPM_CONFIG_MASK                         0x10
+#define SMU7_VCE_DPM_CONFIG_MASK                         0x20
+#define SMU7_ACP_DPM_CONFIG_MASK                         0x40
+#define SMU7_SAMU_DPM_CONFIG_MASK                        0x80
+#define SMU7_PCIEGEN_DPM_CONFIG_MASK                    0x100
+
+#define SMU7_ACP_MCLK_HANDSHAKE_DISABLE                  0x00000001
+#define SMU7_ACP_SCLK_HANDSHAKE_DISABLE                  0x00000002
+#define SMU7_UVD_MCLK_HANDSHAKE_DISABLE                  0x00000100
+#define SMU7_UVD_SCLK_HANDSHAKE_DISABLE                  0x00000200
+#define SMU7_VCE_MCLK_HANDSHAKE_DISABLE                  0x00010000
+#define SMU7_VCE_SCLK_HANDSHAKE_DISABLE                  0x00020000
+
+struct SMU7_Firmware_Header
+{
+    uint32_t Digest[5];
+    uint32_t Version;
+    uint32_t HeaderSize;
+    uint32_t Flags;
+    uint32_t EntryPoint;
+    uint32_t CodeSize;
+    uint32_t ImageSize;
+
+    uint32_t Rtos;
+    uint32_t SoftRegisters;
+    uint32_t DpmTable;
+    uint32_t FanTable;
+    uint32_t CacConfigTable;
+    uint32_t CacStatusTable;
+
+    uint32_t mcRegisterTable;
+
+    uint32_t mcArbDramTimingTable;
+
+    uint32_t PmFuseTable;
+    uint32_t Globals;
+    uint32_t Reserved[42];
+    uint32_t Signature;
+};
+
+typedef struct SMU7_Firmware_Header SMU7_Firmware_Header;
+
+#define SMU7_FIRMWARE_HEADER_LOCATION 0x20000
+
+enum  DisplayConfig {
+    PowerDown = 1,
+    DP54x4,
+    DP54x2,
+    DP54x1,
+    DP27x4,
+    DP27x2,
+    DP27x1,
+    HDMI297,
+    HDMI162,
+    LVDS,
+    DP324x4,
+    DP324x2,
+    DP324x1
+};
+
+#pragma pack(pop)
+
+#endif
+
diff --git a/drivers/gpu/drm/radeon/smu7_discrete.h b/drivers/gpu/drm/radeon/smu7_discrete.h
new file mode 100644
index 0000000..82f70c9
--- /dev/null
+++ b/drivers/gpu/drm/radeon/smu7_discrete.h
@@ -0,0 +1,486 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef SMU7_DISCRETE_H
+#define SMU7_DISCRETE_H
+
+#include "smu7.h"
+
+#pragma pack(push, 1)
+
+#define SMU7_DTE_ITERATIONS 5
+#define SMU7_DTE_SOURCES 3
+#define SMU7_DTE_SINKS 1
+#define SMU7_NUM_CPU_TES 0
+#define SMU7_NUM_GPU_TES 1
+#define SMU7_NUM_NON_TES 2
+
+struct SMU7_SoftRegisters
+{
+    uint32_t        RefClockFrequency;
+    uint32_t        PmTimerP;
+    uint32_t        FeatureEnables;
+    uint32_t        PreVBlankGap;
+    uint32_t        VBlankTimeout;
+    uint32_t        TrainTimeGap;
+
+    uint32_t        MvddSwitchTime;
+    uint32_t        LongestAcpiTrainTime;
+    uint32_t        AcpiDelay;
+    uint32_t        G5TrainTime;
+    uint32_t        DelayMpllPwron;
+    uint32_t        VoltageChangeTimeout;
+    uint32_t        HandshakeDisables;
+
+    uint8_t         DisplayPhy1Config;
+    uint8_t         DisplayPhy2Config;
+    uint8_t         DisplayPhy3Config;
+    uint8_t         DisplayPhy4Config;
+
+    uint8_t         DisplayPhy5Config;
+    uint8_t         DisplayPhy6Config;
+    uint8_t         DisplayPhy7Config;
+    uint8_t         DisplayPhy8Config;
+
+    uint32_t        AverageGraphicsA;
+    uint32_t        AverageMemoryA;
+    uint32_t        AverageGioA;
+
+    uint8_t         SClkDpmEnabledLevels;
+    uint8_t         MClkDpmEnabledLevels;
+    uint8_t         LClkDpmEnabledLevels;
+    uint8_t         PCIeDpmEnabledLevels;
+
+    uint8_t         UVDDpmEnabledLevels;
+    uint8_t         SAMUDpmEnabledLevels;
+    uint8_t         ACPDpmEnabledLevels;
+    uint8_t         VCEDpmEnabledLevels;
+
+    uint32_t        DRAM_LOG_ADDR_H;
+    uint32_t        DRAM_LOG_ADDR_L;
+    uint32_t        DRAM_LOG_PHY_ADDR_H;
+    uint32_t        DRAM_LOG_PHY_ADDR_L;
+    uint32_t        DRAM_LOG_BUFF_SIZE;
+    uint32_t        UlvEnterC;
+    uint32_t        UlvTime;
+    uint32_t        Reserved[3];
+
+};
+
+typedef struct SMU7_SoftRegisters SMU7_SoftRegisters;
+
+struct SMU7_Discrete_VoltageLevel
+{
+    uint16_t    Voltage;
+    uint16_t    StdVoltageHiSidd;
+    uint16_t    StdVoltageLoSidd;
+    uint8_t     Smio;
+    uint8_t     padding;
+};
+
+typedef struct SMU7_Discrete_VoltageLevel SMU7_Discrete_VoltageLevel;
+
+struct SMU7_Discrete_GraphicsLevel
+{
+    uint32_t    Flags;
+    uint32_t    MinVddc;
+    uint32_t    MinVddcPhases;
+
+    uint32_t    SclkFrequency;
+
+    uint8_t     padding1[2];
+    uint16_t    ActivityLevel;
+
+    uint32_t    CgSpllFuncCntl3;
+    uint32_t    CgSpllFuncCntl4;
+    uint32_t    SpllSpreadSpectrum;
+    uint32_t    SpllSpreadSpectrum2;
+    uint32_t    CcPwrDynRm;
+    uint32_t    CcPwrDynRm1;
+    uint8_t     SclkDid;
+    uint8_t     DisplayWatermark;
+    uint8_t     EnabledForActivity;
+    uint8_t     EnabledForThrottle;
+    uint8_t     UpH;
+    uint8_t     DownH;
+    uint8_t     VoltageDownH;
+    uint8_t     PowerThrottle;
+    uint8_t     DeepSleepDivId;
+    uint8_t     padding[3];
+};
+
+typedef struct SMU7_Discrete_GraphicsLevel SMU7_Discrete_GraphicsLevel;
+
+struct SMU7_Discrete_ACPILevel
+{
+    uint32_t    Flags;
+    uint32_t    MinVddc;
+    uint32_t    MinVddcPhases;
+    uint32_t    SclkFrequency;
+    uint8_t     SclkDid;
+    uint8_t     DisplayWatermark;
+    uint8_t     DeepSleepDivId;
+    uint8_t     padding;
+    uint32_t    CgSpllFuncCntl;
+    uint32_t    CgSpllFuncCntl2;
+    uint32_t    CgSpllFuncCntl3;
+    uint32_t    CgSpllFuncCntl4;
+    uint32_t    SpllSpreadSpectrum;
+    uint32_t    SpllSpreadSpectrum2;
+    uint32_t    CcPwrDynRm;
+    uint32_t    CcPwrDynRm1;
+};
+
+typedef struct SMU7_Discrete_ACPILevel SMU7_Discrete_ACPILevel;
+
+struct SMU7_Discrete_Ulv
+{
+    uint32_t    CcPwrDynRm;
+    uint32_t    CcPwrDynRm1;
+    uint16_t    VddcOffset;
+    uint8_t     VddcOffsetVid;
+    uint8_t     VddcPhase;
+    uint32_t    Reserved;
+};
+
+typedef struct SMU7_Discrete_Ulv SMU7_Discrete_Ulv;
+
+struct SMU7_Discrete_MemoryLevel
+{
+    uint32_t    MinVddc;
+    uint32_t    MinVddcPhases;
+    uint32_t    MinVddci;
+    uint32_t    MinMvdd;
+
+    uint32_t    MclkFrequency;
+
+    uint8_t     EdcReadEnable;
+    uint8_t     EdcWriteEnable;
+    uint8_t     RttEnable;
+    uint8_t     StutterEnable;
+
+    uint8_t     StrobeEnable;
+    uint8_t     StrobeRatio;
+    uint8_t     EnabledForThrottle;
+    uint8_t     EnabledForActivity;
+
+    uint8_t     UpH;
+    uint8_t     DownH;
+    uint8_t     VoltageDownH;
+    uint8_t     padding;
+
+    uint16_t    ActivityLevel;
+    uint8_t     DisplayWatermark;
+    uint8_t     padding1;
+
+    uint32_t    MpllFuncCntl;
+    uint32_t    MpllFuncCntl_1;
+    uint32_t    MpllFuncCntl_2;
+    uint32_t    MpllAdFuncCntl;
+    uint32_t    MpllDqFuncCntl;
+    uint32_t    MclkPwrmgtCntl;
+    uint32_t    DllCntl;
+    uint32_t    MpllSs1;
+    uint32_t    MpllSs2;
+};
+
+typedef struct SMU7_Discrete_MemoryLevel SMU7_Discrete_MemoryLevel;
+
+struct SMU7_Discrete_LinkLevel
+{
+    uint8_t     PcieGenSpeed;
+    uint8_t     PcieLaneCount;
+    uint8_t     EnabledForActivity;
+    uint8_t     Padding;
+    uint32_t    DownT;
+    uint32_t    UpT;
+    uint32_t    Reserved;
+};
+
+typedef struct SMU7_Discrete_LinkLevel SMU7_Discrete_LinkLevel;
+
+
+struct SMU7_Discrete_MCArbDramTimingTableEntry
+{
+    uint32_t McArbDramTiming;
+    uint32_t McArbDramTiming2;
+    uint8_t  McArbBurstTime;
+    uint8_t  padding[3];
+};
+
+typedef struct SMU7_Discrete_MCArbDramTimingTableEntry SMU7_Discrete_MCArbDramTimingTableEntry;
+
+struct SMU7_Discrete_MCArbDramTimingTable
+{
+    SMU7_Discrete_MCArbDramTimingTableEntry entries[SMU__NUM_SCLK_DPM_STATE][SMU__NUM_MCLK_DPM_LEVELS];
+};
+
+typedef struct SMU7_Discrete_MCArbDramTimingTable SMU7_Discrete_MCArbDramTimingTable;
+
+struct SMU7_Discrete_UvdLevel
+{
+    uint32_t VclkFrequency;
+    uint32_t DclkFrequency;
+    uint16_t MinVddc;
+    uint8_t  MinVddcPhases;
+    uint8_t  VclkDivider;
+    uint8_t  DclkDivider;
+    uint8_t  padding[3];
+};
+
+typedef struct SMU7_Discrete_UvdLevel SMU7_Discrete_UvdLevel;
+
+struct SMU7_Discrete_ExtClkLevel
+{
+    uint32_t Frequency;
+    uint16_t MinVoltage;
+    uint8_t  MinPhases;
+    uint8_t  Divider;
+};
+
+typedef struct SMU7_Discrete_ExtClkLevel SMU7_Discrete_ExtClkLevel;
+
+struct SMU7_Discrete_StateInfo
+{
+    uint32_t SclkFrequency;
+    uint32_t MclkFrequency;
+    uint32_t VclkFrequency;
+    uint32_t DclkFrequency;
+    uint32_t SamclkFrequency;
+    uint32_t AclkFrequency;
+    uint32_t EclkFrequency;
+    uint16_t MvddVoltage;
+    uint16_t padding16;
+    uint8_t  DisplayWatermark;
+    uint8_t  McArbIndex;
+    uint8_t  McRegIndex;
+    uint8_t  SeqIndex;
+    uint8_t  SclkDid;
+    int8_t   SclkIndex;
+    int8_t   MclkIndex;
+    uint8_t  PCIeGen;
+
+};
+
+typedef struct SMU7_Discrete_StateInfo SMU7_Discrete_StateInfo;
+
+
+struct SMU7_Discrete_DpmTable
+{
+    SMU7_PIDController                  GraphicsPIDController;
+    SMU7_PIDController                  MemoryPIDController;
+    SMU7_PIDController                  LinkPIDController;
+
+    uint32_t                            SystemFlags;
+
+
+    uint32_t                            SmioMaskVddcVid;
+    uint32_t                            SmioMaskVddcPhase;
+    uint32_t                            SmioMaskVddciVid;
+    uint32_t                            SmioMaskMvddVid;
+
+    uint32_t                            VddcLevelCount;
+    uint32_t                            VddciLevelCount;
+    uint32_t                            MvddLevelCount;
+
+    SMU7_Discrete_VoltageLevel          VddcLevel               [SMU7_MAX_LEVELS_VDDC];
+//    SMU7_Discrete_VoltageLevel          VddcStandardReference   [SMU7_MAX_LEVELS_VDDC];
+    SMU7_Discrete_VoltageLevel          VddciLevel              [SMU7_MAX_LEVELS_VDDCI];
+    SMU7_Discrete_VoltageLevel          MvddLevel               [SMU7_MAX_LEVELS_MVDD];
+
+    uint8_t                             GraphicsDpmLevelCount;
+    uint8_t                             MemoryDpmLevelCount;
+    uint8_t                             LinkLevelCount;
+    uint8_t                             UvdLevelCount;
+    uint8_t                             VceLevelCount;
+    uint8_t                             AcpLevelCount;
+    uint8_t                             SamuLevelCount;
+    uint8_t                             MasterDeepSleepControl;
+    uint32_t                            Reserved[5];
+//    uint32_t                            SamuDefaultLevel;
+
+    SMU7_Discrete_GraphicsLevel         GraphicsLevel           [SMU7_MAX_LEVELS_GRAPHICS];
+    SMU7_Discrete_MemoryLevel           MemoryACPILevel;
+    SMU7_Discrete_MemoryLevel           MemoryLevel             [SMU7_MAX_LEVELS_MEMORY];
+    SMU7_Discrete_LinkLevel             LinkLevel               [SMU7_MAX_LEVELS_LINK];
+    SMU7_Discrete_ACPILevel             ACPILevel;
+    SMU7_Discrete_UvdLevel              UvdLevel                [SMU7_MAX_LEVELS_UVD];
+    SMU7_Discrete_ExtClkLevel           VceLevel                [SMU7_MAX_LEVELS_VCE];
+    SMU7_Discrete_ExtClkLevel           AcpLevel                [SMU7_MAX_LEVELS_ACP];
+    SMU7_Discrete_ExtClkLevel           SamuLevel               [SMU7_MAX_LEVELS_SAMU];
+    SMU7_Discrete_Ulv                   Ulv;
+
+    uint32_t                            SclkStepSize;
+    uint32_t                            Smio                    [SMU7_MAX_ENTRIES_SMIO];
+
+    uint8_t                             UvdBootLevel;
+    uint8_t                             VceBootLevel;
+    uint8_t                             AcpBootLevel;
+    uint8_t                             SamuBootLevel;
+
+    uint8_t                             UVDInterval;
+    uint8_t                             VCEInterval;
+    uint8_t                             ACPInterval;
+    uint8_t                             SAMUInterval;
+
+    uint8_t                             GraphicsBootLevel;
+    uint8_t                             GraphicsVoltageChangeEnable;
+    uint8_t                             GraphicsThermThrottleEnable;
+    uint8_t                             GraphicsInterval;
+
+    uint8_t                             VoltageInterval;
+    uint8_t                             ThermalInterval;
+    uint16_t                            TemperatureLimitHigh;
+
+    uint16_t                            TemperatureLimitLow;
+    uint8_t                             MemoryBootLevel;
+    uint8_t                             MemoryVoltageChangeEnable;
+
+    uint8_t                             MemoryInterval;
+    uint8_t                             MemoryThermThrottleEnable;
+    uint16_t                            VddcVddciDelta;
+
+    uint16_t                            VoltageResponseTime;
+    uint16_t                            PhaseResponseTime;
+
+    uint8_t                             PCIeBootLinkLevel;
+    uint8_t                             PCIeGenInterval;
+    uint8_t                             DTEInterval;
+    uint8_t                             DTEMode;
+
+    uint8_t                             SVI2Enable;
+    uint8_t                             VRHotGpio;
+    uint8_t                             AcDcGpio;
+    uint8_t                             ThermGpio;
+
+    uint16_t                            PPM_PkgPwrLimit;
+    uint16_t                            PPM_TemperatureLimit;
+
+    uint16_t                            DefaultTdp;
+    uint16_t                            TargetTdp;
+
+    uint16_t                            FpsHighT;
+    uint16_t                            FpsLowT;
+
+    uint16_t                            BAPMTI_R  [SMU7_DTE_ITERATIONS][SMU7_DTE_SOURCES][SMU7_DTE_SINKS];
+    uint16_t                            BAPMTI_RC [SMU7_DTE_ITERATIONS][SMU7_DTE_SOURCES][SMU7_DTE_SINKS];
+
+    uint8_t                             DTEAmbientTempBase;
+    uint8_t                             DTETjOffset;
+    uint8_t                             GpuTjMax;
+    uint8_t                             GpuTjHyst;
+
+    uint16_t                            BootVddc;
+    uint16_t                            BootVddci;
+
+    uint16_t                            BootMVdd;
+    uint16_t                            padding;
+
+    uint32_t                            BAPM_TEMP_GRADIENT;
+
+    uint32_t                            LowSclkInterruptT;
+};
+
+typedef struct SMU7_Discrete_DpmTable SMU7_Discrete_DpmTable;
+
+#define SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE 16
+#define SMU7_DISCRETE_MC_REGISTER_ARRAY_SET_COUNT SMU7_MAX_LEVELS_MEMORY
+
+struct SMU7_Discrete_MCRegisterAddress
+{
+    uint16_t s0;
+    uint16_t s1;
+};
+
+typedef struct SMU7_Discrete_MCRegisterAddress SMU7_Discrete_MCRegisterAddress;
+
+struct SMU7_Discrete_MCRegisterSet
+{
+    uint32_t value[SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE];
+};
+
+typedef struct SMU7_Discrete_MCRegisterSet SMU7_Discrete_MCRegisterSet;
+
+struct SMU7_Discrete_MCRegisters
+{
+    uint8_t                             last;
+    uint8_t                             reserved[3];
+    SMU7_Discrete_MCRegisterAddress     address[SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE];
+    SMU7_Discrete_MCRegisterSet         data[SMU7_DISCRETE_MC_REGISTER_ARRAY_SET_COUNT];
+};
+
+typedef struct SMU7_Discrete_MCRegisters SMU7_Discrete_MCRegisters;
+
+struct SMU7_Discrete_PmFuses {
+  // dw0-dw1
+  uint8_t BapmVddCVidHiSidd[8];
+
+  // dw2-dw3
+  uint8_t BapmVddCVidLoSidd[8];
+
+  // dw4-dw5
+  uint8_t VddCVid[8];
+
+  // dw6
+  uint8_t SviLoadLineEn;
+  uint8_t SviLoadLineVddC;
+  uint8_t SviLoadLineTrimVddC;
+  uint8_t SviLoadLineOffsetVddC;
+
+  // dw7
+  uint16_t TDC_VDDC_PkgLimit;
+  uint8_t TDC_VDDC_ThrottleReleaseLimitPerc;
+  uint8_t TDC_MAWt;
+
+  // dw8
+  uint8_t TdcWaterfallCtl;
+  uint8_t LPMLTemperatureMin;
+  uint8_t LPMLTemperatureMax;
+  uint8_t Reserved;
+
+  // dw9-dw10
+  uint8_t BapmVddCVidHiSidd2[8];
+
+  // dw11-dw12
+  uint32_t Reserved6[2];
+
+  // dw13-dw16
+  uint8_t GnbLPML[16];
+
+  // dw17
+  uint8_t GnbLPMLMaxVid;
+  uint8_t GnbLPMLMinVid;
+  uint8_t Reserved1[2];
+
+  // dw18
+  uint16_t BapmVddCBaseLeakageHiSidd;
+  uint16_t BapmVddCBaseLeakageLoSidd;
+};
+
+typedef struct SMU7_Discrete_PmFuses SMU7_Discrete_PmFuses;
+
+
+#pragma pack(pop)
+
+#endif
+
diff --git a/drivers/gpu/drm/radeon/smu7_fusion.h b/drivers/gpu/drm/radeon/smu7_fusion.h
new file mode 100644
index 0000000..78ada9f
--- /dev/null
+++ b/drivers/gpu/drm/radeon/smu7_fusion.h
@@ -0,0 +1,300 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef SMU7_FUSION_H
+#define SMU7_FUSION_H
+
+#include "smu7.h"
+
+#pragma pack(push, 1)
+
+#define SMU7_DTE_ITERATIONS 5
+#define SMU7_DTE_SOURCES 5
+#define SMU7_DTE_SINKS 3
+#define SMU7_NUM_CPU_TES 2
+#define SMU7_NUM_GPU_TES 1
+#define SMU7_NUM_NON_TES 2
+
+// All 'soft registers' should be uint32_t.
+struct SMU7_SoftRegisters
+{
+    uint32_t        RefClockFrequency;
+    uint32_t        PmTimerP;
+    uint32_t        FeatureEnables;
+    uint32_t        HandshakeDisables;
+
+    uint8_t         DisplayPhy1Config;
+    uint8_t         DisplayPhy2Config;
+    uint8_t         DisplayPhy3Config;
+    uint8_t         DisplayPhy4Config;
+
+    uint8_t         DisplayPhy5Config;
+    uint8_t         DisplayPhy6Config;
+    uint8_t         DisplayPhy7Config;
+    uint8_t         DisplayPhy8Config;
+
+    uint32_t        AverageGraphicsA;
+    uint32_t        AverageMemoryA;
+    uint32_t        AverageGioA;
+
+    uint8_t         SClkDpmEnabledLevels;
+    uint8_t         MClkDpmEnabledLevels;
+    uint8_t         LClkDpmEnabledLevels;
+    uint8_t         PCIeDpmEnabledLevels;
+
+    uint8_t         UVDDpmEnabledLevels;
+    uint8_t         SAMUDpmEnabledLevels;
+    uint8_t         ACPDpmEnabledLevels;
+    uint8_t         VCEDpmEnabledLevels;
+
+    uint32_t        DRAM_LOG_ADDR_H;
+    uint32_t        DRAM_LOG_ADDR_L;
+    uint32_t        DRAM_LOG_PHY_ADDR_H;
+    uint32_t        DRAM_LOG_PHY_ADDR_L;
+    uint32_t        DRAM_LOG_BUFF_SIZE;
+    uint32_t        UlvEnterC;
+    uint32_t        UlvTime;
+    uint32_t        Reserved[3];
+
+};
+
+typedef struct SMU7_SoftRegisters SMU7_SoftRegisters;
+
+struct SMU7_Fusion_GraphicsLevel
+{
+    uint32_t    MinVddNb;
+
+    uint32_t    SclkFrequency;
+
+    uint8_t     Vid;
+    uint8_t     VidOffset;
+    uint16_t    AT;
+
+    uint8_t     PowerThrottle;
+    uint8_t     GnbSlow;
+    uint8_t     ForceNbPs1;
+    uint8_t     SclkDid;
+
+    uint8_t     DisplayWatermark;
+    uint8_t     EnabledForActivity;
+    uint8_t     EnabledForThrottle;
+    uint8_t     UpH;
+
+    uint8_t     DownH;
+    uint8_t     VoltageDownH;
+    uint8_t     DeepSleepDivId;
+
+    uint8_t     ClkBypassCntl;
+
+    uint32_t    reserved;
+};
+
+typedef struct SMU7_Fusion_GraphicsLevel SMU7_Fusion_GraphicsLevel;
+
+struct SMU7_Fusion_GIOLevel
+{
+    uint8_t     EnabledForActivity;
+    uint8_t     LclkDid;
+    uint8_t     Vid;
+    uint8_t     VoltageDownH;
+
+    uint32_t    MinVddNb;
+
+    uint16_t    ResidencyCounter;
+    uint8_t     UpH;
+    uint8_t     DownH;
+
+    uint32_t    LclkFrequency;
+
+    uint8_t     ActivityLevel;
+    uint8_t     EnabledForThrottle;
+
+    uint8_t     ClkBypassCntl;
+
+    uint8_t     padding;
+};
+
+typedef struct SMU7_Fusion_GIOLevel SMU7_Fusion_GIOLevel;
+
+// UVD VCLK/DCLK state (level) definition.
+struct SMU7_Fusion_UvdLevel
+{
+    uint32_t VclkFrequency;
+    uint32_t DclkFrequency;
+    uint16_t MinVddNb;
+    uint8_t  VclkDivider;
+    uint8_t  DclkDivider;
+
+    uint8_t     VClkBypassCntl;
+    uint8_t     DClkBypassCntl;
+
+    uint8_t     padding[2];
+
+};
+
+typedef struct SMU7_Fusion_UvdLevel SMU7_Fusion_UvdLevel;
+
+// Clocks for other external blocks (VCE, ACP, SAMU).
+struct SMU7_Fusion_ExtClkLevel
+{
+    uint32_t Frequency;
+    uint16_t MinVoltage;
+    uint8_t  Divider;
+    uint8_t  ClkBypassCntl;
+
+    uint32_t Reserved;
+};
+typedef struct SMU7_Fusion_ExtClkLevel SMU7_Fusion_ExtClkLevel;
+
+struct SMU7_Fusion_ACPILevel
+{
+    uint32_t    Flags;
+    uint32_t    MinVddNb;
+    uint32_t    SclkFrequency;
+    uint8_t     SclkDid;
+    uint8_t     GnbSlow;
+    uint8_t     ForceNbPs1;
+    uint8_t     DisplayWatermark;
+    uint8_t     DeepSleepDivId;
+    uint8_t     padding[3];
+};
+
+typedef struct SMU7_Fusion_ACPILevel SMU7_Fusion_ACPILevel;
+
+struct SMU7_Fusion_NbDpm
+{
+    uint8_t DpmXNbPsHi;
+    uint8_t DpmXNbPsLo;
+    uint8_t Dpm0PgNbPsHi;
+    uint8_t Dpm0PgNbPsLo;
+    uint8_t EnablePsi1;
+    uint8_t SkipDPM0;
+    uint8_t SkipPG;
+    uint8_t Hysteresis;
+    uint8_t EnableDpmPstatePoll;
+    uint8_t padding[3];
+};
+
+typedef struct SMU7_Fusion_NbDpm SMU7_Fusion_NbDpm;
+
+struct SMU7_Fusion_StateInfo
+{
+    uint32_t SclkFrequency;
+    uint32_t LclkFrequency;
+    uint32_t VclkFrequency;
+    uint32_t DclkFrequency;
+    uint32_t SamclkFrequency;
+    uint32_t AclkFrequency;
+    uint32_t EclkFrequency;
+    uint8_t  DisplayWatermark;
+    uint8_t  McArbIndex;
+    int8_t   SclkIndex;
+    int8_t   MclkIndex;
+};
+
+typedef struct SMU7_Fusion_StateInfo SMU7_Fusion_StateInfo;
+
+struct SMU7_Fusion_DpmTable
+{
+    uint32_t                            SystemFlags;
+
+    SMU7_PIDController                  GraphicsPIDController;
+    SMU7_PIDController                  GioPIDController;
+
+    uint8_t                            GraphicsDpmLevelCount;
+    uint8_t                            GIOLevelCount;
+    uint8_t                            UvdLevelCount;
+    uint8_t                            VceLevelCount;
+
+    uint8_t                            AcpLevelCount;
+    uint8_t                            SamuLevelCount;
+    uint16_t                           FpsHighT;
+
+    SMU7_Fusion_GraphicsLevel         GraphicsLevel           [SMU__NUM_SCLK_DPM_STATE];
+    SMU7_Fusion_ACPILevel             ACPILevel;
+    SMU7_Fusion_UvdLevel              UvdLevel                [SMU7_MAX_LEVELS_UVD];
+    SMU7_Fusion_ExtClkLevel           VceLevel                [SMU7_MAX_LEVELS_VCE];
+    SMU7_Fusion_ExtClkLevel           AcpLevel                [SMU7_MAX_LEVELS_ACP];
+    SMU7_Fusion_ExtClkLevel           SamuLevel               [SMU7_MAX_LEVELS_SAMU];
+
+    uint8_t                           UvdBootLevel;
+    uint8_t                           VceBootLevel;
+    uint8_t                           AcpBootLevel;
+    uint8_t                           SamuBootLevel;
+    uint8_t                           UVDInterval;
+    uint8_t                           VCEInterval;
+    uint8_t                           ACPInterval;
+    uint8_t                           SAMUInterval;
+
+    uint8_t                           GraphicsBootLevel;
+    uint8_t                           GraphicsInterval;
+    uint8_t                           GraphicsThermThrottleEnable;
+    uint8_t                           GraphicsVoltageChangeEnable;
+
+    uint8_t                           GraphicsClkSlowEnable;
+    uint8_t                           GraphicsClkSlowDivider;
+    uint16_t                          FpsLowT;
+
+    uint32_t                          DisplayCac;
+    uint32_t                          LowSclkInterruptT;
+
+    uint32_t                          DRAM_LOG_ADDR_H;
+    uint32_t                          DRAM_LOG_ADDR_L;
+    uint32_t                          DRAM_LOG_PHY_ADDR_H;
+    uint32_t                          DRAM_LOG_PHY_ADDR_L;
+    uint32_t                          DRAM_LOG_BUFF_SIZE;
+
+};
+
+struct SMU7_Fusion_GIODpmTable
+{
+
+    SMU7_Fusion_GIOLevel              GIOLevel                [SMU7_MAX_LEVELS_GIO];
+
+    SMU7_PIDController                GioPIDController;
+
+    uint32_t                          GIOLevelCount;
+
+    uint8_t                           Enable;
+    uint8_t                           GIOVoltageChangeEnable;
+    uint8_t                           GIOBootLevel;
+    uint8_t                           padding;
+    uint8_t                           padding1[2];
+    uint8_t                           TargetState;
+    uint8_t                           CurrenttState;
+    uint8_t                           ThrottleOnHtc;
+    uint8_t                           ThermThrottleStatus;
+    uint8_t                           ThermThrottleTempSelect;
+    uint8_t                           ThermThrottleEnable;
+    uint16_t                          TemperatureLimitHigh;
+    uint16_t                          TemperatureLimitLow;
+
+};
+
+typedef struct SMU7_Fusion_DpmTable SMU7_Fusion_DpmTable;
+typedef struct SMU7_Fusion_GIODpmTable SMU7_Fusion_GIODpmTable;
+
+#pragma pack(pop)
+
+#endif
+
diff --git a/drivers/gpu/drm/radeon/sumo_dpm.c b/drivers/gpu/drm/radeon/sumo_dpm.c
index c0a8503..864761c 100644
--- a/drivers/gpu/drm/radeon/sumo_dpm.c
+++ b/drivers/gpu/drm/radeon/sumo_dpm.c
@@ -1483,6 +1483,7 @@
 	rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime);
 	rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime);
 	for (i = 0; i < state_array->ucNumEntries; i++) {
+		u8 *idx;
 		power_state = (union pplib_power_state *)power_state_offset;
 		non_clock_array_index = power_state->v2.nonClockInfoIndex;
 		non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *)
@@ -1496,12 +1497,15 @@
 		}
 		rdev->pm.dpm.ps[i].ps_priv = ps;
 		k = 0;
+		idx = (u8 *)&power_state->v2.clockInfoIndex[0];
 		for (j = 0; j < power_state->v2.ucNumDPMLevels; j++) {
-			clock_array_index = power_state->v2.clockInfoIndex[j];
+			clock_array_index = idx[j];
 			if (k >= SUMO_MAX_HARDWARE_POWERLEVELS)
 				break;
+
 			clock_info = (union pplib_clock_info *)
-				&clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize];
+				((u8 *)&clock_info_array->clockInfo[0] +
+				 (clock_array_index * clock_info_array->ucEntrySize));
 			sumo_parse_pplib_clock_info(rdev,
 						    &rdev->pm.dpm.ps[i], k,
 						    clock_info);
@@ -1530,6 +1534,20 @@
 	return vid_mapping_table->entries[vid_mapping_table->num_entries - 1].vid_7bit;
 }
 
+u32 sumo_convert_vid7_to_vid2(struct radeon_device *rdev,
+			      struct sumo_vid_mapping_table *vid_mapping_table,
+			      u32 vid_7bit)
+{
+	u32 i;
+
+	for (i = 0; i < vid_mapping_table->num_entries; i++) {
+		if (vid_mapping_table->entries[i].vid_7bit == vid_7bit)
+			return vid_mapping_table->entries[i].vid_2bit;
+	}
+
+	return vid_mapping_table->entries[vid_mapping_table->num_entries - 1].vid_2bit;
+}
+
 static u16 sumo_convert_voltage_index_to_value(struct radeon_device *rdev,
 					       u32 vid_2bit)
 {
diff --git a/drivers/gpu/drm/radeon/sumo_dpm.h b/drivers/gpu/drm/radeon/sumo_dpm.h
index 07dda29..db1ea32 100644
--- a/drivers/gpu/drm/radeon/sumo_dpm.h
+++ b/drivers/gpu/drm/radeon/sumo_dpm.h
@@ -202,6 +202,9 @@
 u32 sumo_convert_vid2_to_vid7(struct radeon_device *rdev,
 			      struct sumo_vid_mapping_table *vid_mapping_table,
 			      u32 vid_2bit);
+u32 sumo_convert_vid7_to_vid2(struct radeon_device *rdev,
+			      struct sumo_vid_mapping_table *vid_mapping_table,
+			      u32 vid_7bit);
 u32 sumo_get_sleep_divider_from_id(u32 id);
 u32 sumo_get_sleep_divider_id_from_clock(struct radeon_device *rdev,
 					 u32 sclk,
diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c
index a1eb5f5..b07b7b8 100644
--- a/drivers/gpu/drm/radeon/trinity_dpm.c
+++ b/drivers/gpu/drm/radeon/trinity_dpm.c
@@ -1675,6 +1675,7 @@
 	rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime);
 	rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime);
 	for (i = 0; i < state_array->ucNumEntries; i++) {
+		u8 *idx;
 		power_state = (union pplib_power_state *)power_state_offset;
 		non_clock_array_index = power_state->v2.nonClockInfoIndex;
 		non_clock_info = (struct _ATOM_PPLIB_NONCLOCK_INFO *)
@@ -1688,14 +1689,16 @@
 		}
 		rdev->pm.dpm.ps[i].ps_priv = ps;
 		k = 0;
+		idx = (u8 *)&power_state->v2.clockInfoIndex[0];
 		for (j = 0; j < power_state->v2.ucNumDPMLevels; j++) {
-			clock_array_index = power_state->v2.clockInfoIndex[j];
+			clock_array_index = idx[j];
 			if (clock_array_index >= clock_info_array->ucNumEntries)
 				continue;
 			if (k >= SUMO_MAX_HARDWARE_POWERLEVELS)
 				break;
 			clock_info = (union pplib_clock_info *)
-				&clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize];
+				((u8 *)&clock_info_array->clockInfo[0] +
+				 (clock_array_index * clock_info_array->ucEntrySize));
 			trinity_parse_pplib_clock_info(rdev,
 						       &rdev->pm.dpm.ps[i], k,
 						       clock_info);
diff --git a/drivers/gpu/drm/radeon/uvd_v1_0.c b/drivers/gpu/drm/radeon/uvd_v1_0.c
new file mode 100644
index 0000000..7266805
--- /dev/null
+++ b/drivers/gpu/drm/radeon/uvd_v1_0.c
@@ -0,0 +1,436 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König <christian.koenig@amd.com>
+ */
+
+#include <drm/drmP.h>
+#include "radeon.h"
+#include "radeon_asic.h"
+#include "r600d.h"
+
+/**
+ * uvd_v1_0_get_rptr - get read pointer
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring pointer
+ *
+ * Returns the current hardware read pointer
+ */
+uint32_t uvd_v1_0_get_rptr(struct radeon_device *rdev,
+			   struct radeon_ring *ring)
+{
+	return RREG32(UVD_RBC_RB_RPTR);
+}
+
+/**
+ * uvd_v1_0_get_wptr - get write pointer
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring pointer
+ *
+ * Returns the current hardware write pointer
+ */
+uint32_t uvd_v1_0_get_wptr(struct radeon_device *rdev,
+			   struct radeon_ring *ring)
+{
+	return RREG32(UVD_RBC_RB_WPTR);
+}
+
+/**
+ * uvd_v1_0_set_wptr - set write pointer
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring pointer
+ *
+ * Commits the write pointer to the hardware
+ */
+void uvd_v1_0_set_wptr(struct radeon_device *rdev,
+		       struct radeon_ring *ring)
+{
+	WREG32(UVD_RBC_RB_WPTR, ring->wptr);
+}
+
+/**
+ * uvd_v1_0_init - start and test UVD block
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Initialize the hardware, boot up the VCPU and do some testing
+ */
+int uvd_v1_0_init(struct radeon_device *rdev)
+{
+	struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+	uint32_t tmp;
+	int r;
+
+	/* raise clocks while booting up the VCPU */
+	radeon_set_uvd_clocks(rdev, 53300, 40000);
+
+	r = uvd_v1_0_start(rdev);
+	if (r)
+		goto done;
+
+	ring->ready = true;
+	r = radeon_ring_test(rdev, R600_RING_TYPE_UVD_INDEX, ring);
+	if (r) {
+		ring->ready = false;
+		goto done;
+	}
+
+	r = radeon_ring_lock(rdev, ring, 10);
+	if (r) {
+		DRM_ERROR("radeon: ring failed to lock UVD ring (%d).\n", r);
+		goto done;
+	}
+
+	tmp = PACKET0(UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0);
+	radeon_ring_write(ring, tmp);
+	radeon_ring_write(ring, 0xFFFFF);
+
+	tmp = PACKET0(UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0);
+	radeon_ring_write(ring, tmp);
+	radeon_ring_write(ring, 0xFFFFF);
+
+	tmp = PACKET0(UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0);
+	radeon_ring_write(ring, tmp);
+	radeon_ring_write(ring, 0xFFFFF);
+
+	/* Clear timeout status bits */
+	radeon_ring_write(ring, PACKET0(UVD_SEMA_TIMEOUT_STATUS, 0));
+	radeon_ring_write(ring, 0x8);
+
+	radeon_ring_write(ring, PACKET0(UVD_SEMA_CNTL, 0));
+	radeon_ring_write(ring, 3);
+
+	radeon_ring_unlock_commit(rdev, ring);
+
+done:
+	/* lower clocks again */
+	radeon_set_uvd_clocks(rdev, 0, 0);
+
+	if (!r)
+		DRM_INFO("UVD initialized successfully.\n");
+
+	return r;
+}
+
+/**
+ * uvd_v1_0_fini - stop the hardware block
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Stop the UVD block, mark ring as not ready any more
+ */
+void uvd_v1_0_fini(struct radeon_device *rdev)
+{
+	struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+
+	uvd_v1_0_stop(rdev);
+	ring->ready = false;
+}
+
+/**
+ * uvd_v1_0_start - start UVD block
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Setup and start the UVD block
+ */
+int uvd_v1_0_start(struct radeon_device *rdev)
+{
+	struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+	uint32_t rb_bufsz;
+	int i, j, r;
+
+	/* disable byte swapping */
+	u32 lmi_swap_cntl = 0;
+	u32 mp_swap_cntl = 0;
+
+	/* disable clock gating */
+	WREG32(UVD_CGC_GATE, 0);
+
+	/* disable interupt */
+	WREG32_P(UVD_MASTINT_EN, 0, ~(1 << 1));
+
+	/* Stall UMC and register bus before resetting VCPU */
+	WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8));
+	WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3));
+	mdelay(1);
+
+	/* put LMI, VCPU, RBC etc... into reset */
+	WREG32(UVD_SOFT_RESET, LMI_SOFT_RESET | VCPU_SOFT_RESET |
+	       LBSI_SOFT_RESET | RBC_SOFT_RESET | CSM_SOFT_RESET |
+	       CXW_SOFT_RESET | TAP_SOFT_RESET | LMI_UMC_SOFT_RESET);
+	mdelay(5);
+
+	/* take UVD block out of reset */
+	WREG32_P(SRBM_SOFT_RESET, 0, ~SOFT_RESET_UVD);
+	mdelay(5);
+
+	/* initialize UVD memory controller */
+	WREG32(UVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) |
+			     (1 << 21) | (1 << 9) | (1 << 20));
+
+#ifdef __BIG_ENDIAN
+	/* swap (8 in 32) RB and IB */
+	lmi_swap_cntl = 0xa;
+	mp_swap_cntl = 0;
+#endif
+	WREG32(UVD_LMI_SWAP_CNTL, lmi_swap_cntl);
+	WREG32(UVD_MP_SWAP_CNTL, mp_swap_cntl);
+
+	WREG32(UVD_MPC_SET_MUXA0, 0x40c2040);
+	WREG32(UVD_MPC_SET_MUXA1, 0x0);
+	WREG32(UVD_MPC_SET_MUXB0, 0x40c2040);
+	WREG32(UVD_MPC_SET_MUXB1, 0x0);
+	WREG32(UVD_MPC_SET_ALU, 0);
+	WREG32(UVD_MPC_SET_MUX, 0x88);
+
+	/* take all subblocks out of reset, except VCPU */
+	WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET);
+	mdelay(5);
+
+	/* enable VCPU clock */
+	WREG32(UVD_VCPU_CNTL,  1 << 9);
+
+	/* enable UMC */
+	WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8));
+
+	/* boot up the VCPU */
+	WREG32(UVD_SOFT_RESET, 0);
+	mdelay(10);
+
+	WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3));
+
+	for (i = 0; i < 10; ++i) {
+		uint32_t status;
+		for (j = 0; j < 100; ++j) {
+			status = RREG32(UVD_STATUS);
+			if (status & 2)
+				break;
+			mdelay(10);
+		}
+		r = 0;
+		if (status & 2)
+			break;
+
+		DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n");
+		WREG32_P(UVD_SOFT_RESET, VCPU_SOFT_RESET, ~VCPU_SOFT_RESET);
+		mdelay(10);
+		WREG32_P(UVD_SOFT_RESET, 0, ~VCPU_SOFT_RESET);
+		mdelay(10);
+		r = -1;
+	}
+
+	if (r) {
+		DRM_ERROR("UVD not responding, giving up!!!\n");
+		return r;
+	}
+
+	/* enable interupt */
+	WREG32_P(UVD_MASTINT_EN, 3<<1, ~(3 << 1));
+
+	/* force RBC into idle state */
+	WREG32(UVD_RBC_RB_CNTL, 0x11010101);
+
+	/* Set the write pointer delay */
+	WREG32(UVD_RBC_RB_WPTR_CNTL, 0);
+
+	/* programm the 4GB memory segment for rptr and ring buffer */
+	WREG32(UVD_LMI_EXT40_ADDR, upper_32_bits(ring->gpu_addr) |
+				   (0x7 << 16) | (0x1 << 31));
+
+	/* Initialize the ring buffer's read and write pointers */
+	WREG32(UVD_RBC_RB_RPTR, 0x0);
+
+	ring->wptr = ring->rptr = RREG32(UVD_RBC_RB_RPTR);
+	WREG32(UVD_RBC_RB_WPTR, ring->wptr);
+
+	/* set the ring address */
+	WREG32(UVD_RBC_RB_BASE, ring->gpu_addr);
+
+	/* Set ring buffer size */
+	rb_bufsz = order_base_2(ring->ring_size);
+	rb_bufsz = (0x1 << 8) | rb_bufsz;
+	WREG32_P(UVD_RBC_RB_CNTL, rb_bufsz, ~0x11f1f);
+
+	return 0;
+}
+
+/**
+ * uvd_v1_0_stop - stop UVD block
+ *
+ * @rdev: radeon_device pointer
+ *
+ * stop the UVD block
+ */
+void uvd_v1_0_stop(struct radeon_device *rdev)
+{
+	/* force RBC into idle state */
+	WREG32(UVD_RBC_RB_CNTL, 0x11010101);
+
+	/* Stall UMC and register bus before resetting VCPU */
+	WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8));
+	WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3));
+	mdelay(1);
+
+	/* put VCPU into reset */
+	WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET);
+	mdelay(5);
+
+	/* disable VCPU clock */
+	WREG32(UVD_VCPU_CNTL, 0x0);
+
+	/* Unstall UMC and register bus */
+	WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8));
+	WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3));
+}
+
+/**
+ * uvd_v1_0_ring_test - register write test
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring pointer
+ *
+ * Test if we can successfully write to the context register
+ */
+int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+	uint32_t tmp = 0;
+	unsigned i;
+	int r;
+
+	WREG32(UVD_CONTEXT_ID, 0xCAFEDEAD);
+	r = radeon_ring_lock(rdev, ring, 3);
+	if (r) {
+		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n",
+			  ring->idx, r);
+		return r;
+	}
+	radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0));
+	radeon_ring_write(ring, 0xDEADBEEF);
+	radeon_ring_unlock_commit(rdev, ring);
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		tmp = RREG32(UVD_CONTEXT_ID);
+		if (tmp == 0xDEADBEEF)
+			break;
+		DRM_UDELAY(1);
+	}
+
+	if (i < rdev->usec_timeout) {
+		DRM_INFO("ring test on %d succeeded in %d usecs\n",
+			 ring->idx, i);
+	} else {
+		DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
+			  ring->idx, tmp);
+		r = -EINVAL;
+	}
+	return r;
+}
+
+/**
+ * uvd_v1_0_semaphore_emit - emit semaphore command
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring pointer
+ * @semaphore: semaphore to emit commands for
+ * @emit_wait: true if we should emit a wait command
+ *
+ * Emit a semaphore command (either wait or signal) to the UVD ring.
+ */
+void uvd_v1_0_semaphore_emit(struct radeon_device *rdev,
+			     struct radeon_ring *ring,
+			     struct radeon_semaphore *semaphore,
+			     bool emit_wait)
+{
+	uint64_t addr = semaphore->gpu_addr;
+
+	radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0));
+	radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
+
+	radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0));
+	radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
+
+	radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
+	radeon_ring_write(ring, emit_wait ? 1 : 0);
+}
+
+/**
+ * uvd_v1_0_ib_execute - execute indirect buffer
+ *
+ * @rdev: radeon_device pointer
+ * @ib: indirect buffer to execute
+ *
+ * Write ring commands to execute the indirect buffer
+ */
+void uvd_v1_0_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+	struct radeon_ring *ring = &rdev->ring[ib->ring];
+
+	radeon_ring_write(ring, PACKET0(UVD_RBC_IB_BASE, 0));
+	radeon_ring_write(ring, ib->gpu_addr);
+	radeon_ring_write(ring, PACKET0(UVD_RBC_IB_SIZE, 0));
+	radeon_ring_write(ring, ib->length_dw);
+}
+
+/**
+ * uvd_v1_0_ib_test - test ib execution
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring pointer
+ *
+ * Test if we can successfully execute an IB
+ */
+int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+	struct radeon_fence *fence = NULL;
+	int r;
+
+	r = radeon_set_uvd_clocks(rdev, 53300, 40000);
+	if (r) {
+		DRM_ERROR("radeon: failed to raise UVD clocks (%d).\n", r);
+		return r;
+	}
+
+	r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL);
+	if (r) {
+		DRM_ERROR("radeon: failed to get create msg (%d).\n", r);
+		goto error;
+	}
+
+	r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, &fence);
+	if (r) {
+		DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r);
+		goto error;
+	}
+
+	r = radeon_fence_wait(fence, false);
+	if (r) {
+		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
+		goto error;
+	}
+	DRM_INFO("ib test on ring %d succeeded\n",  ring->idx);
+error:
+	radeon_fence_unref(&fence);
+	radeon_set_uvd_clocks(rdev, 0, 0);
+	return r;
+}
diff --git a/drivers/gpu/drm/radeon/uvd_v2_2.c b/drivers/gpu/drm/radeon/uvd_v2_2.c
new file mode 100644
index 0000000..b19ef49
--- /dev/null
+++ b/drivers/gpu/drm/radeon/uvd_v2_2.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König <christian.koenig@amd.com>
+ */
+
+#include <linux/firmware.h>
+#include <drm/drmP.h>
+#include "radeon.h"
+#include "radeon_asic.h"
+#include "rv770d.h"
+
+/**
+ * uvd_v2_2_fence_emit - emit an fence & trap command
+ *
+ * @rdev: radeon_device pointer
+ * @fence: fence to emit
+ *
+ * Write a fence and a trap command to the ring.
+ */
+void uvd_v2_2_fence_emit(struct radeon_device *rdev,
+			 struct radeon_fence *fence)
+{
+	struct radeon_ring *ring = &rdev->ring[fence->ring];
+	uint64_t addr = rdev->fence_drv[fence->ring].gpu_addr;
+
+	radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0));
+	radeon_ring_write(ring, fence->seq);
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0));
+	radeon_ring_write(ring, addr & 0xffffffff);
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0));
+	radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0));
+	radeon_ring_write(ring, 0);
+
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0));
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0));
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0));
+	radeon_ring_write(ring, 2);
+	return;
+}
+
+/**
+ * uvd_v2_2_resume - memory controller programming
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Let the UVD memory controller know it's offsets
+ */
+int uvd_v2_2_resume(struct radeon_device *rdev)
+{
+	uint64_t addr;
+	uint32_t chip_id, size;
+	int r;
+
+	r = radeon_uvd_resume(rdev);
+	if (r)
+		return r;
+
+	/* programm the VCPU memory controller bits 0-27 */
+	addr = rdev->uvd.gpu_addr >> 3;
+	size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
+	WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
+	WREG32(UVD_VCPU_CACHE_SIZE0, size);
+
+	addr += size;
+	size = RADEON_UVD_STACK_SIZE >> 3;
+	WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
+	WREG32(UVD_VCPU_CACHE_SIZE1, size);
+
+	addr += size;
+	size = RADEON_UVD_HEAP_SIZE >> 3;
+	WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
+	WREG32(UVD_VCPU_CACHE_SIZE2, size);
+
+	/* bits 28-31 */
+	addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
+	WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
+
+	/* bits 32-39 */
+	addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
+	WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
+
+	/* tell firmware which hardware it is running on */
+	switch (rdev->family) {
+	default:
+		return -EINVAL;
+	case CHIP_RV710:
+		chip_id = 0x01000005;
+		break;
+	case CHIP_RV730:
+		chip_id = 0x01000006;
+		break;
+	case CHIP_RV740:
+		chip_id = 0x01000007;
+		break;
+	case CHIP_CYPRESS:
+	case CHIP_HEMLOCK:
+		chip_id = 0x01000008;
+		break;
+	case CHIP_JUNIPER:
+		chip_id = 0x01000009;
+		break;
+	case CHIP_REDWOOD:
+		chip_id = 0x0100000a;
+		break;
+	case CHIP_CEDAR:
+		chip_id = 0x0100000b;
+		break;
+	case CHIP_SUMO:
+	case CHIP_SUMO2:
+		chip_id = 0x0100000c;
+		break;
+	case CHIP_PALM:
+		chip_id = 0x0100000e;
+		break;
+	case CHIP_CAYMAN:
+		chip_id = 0x0100000f;
+		break;
+	case CHIP_BARTS:
+		chip_id = 0x01000010;
+		break;
+	case CHIP_TURKS:
+		chip_id = 0x01000011;
+		break;
+	case CHIP_CAICOS:
+		chip_id = 0x01000012;
+		break;
+	case CHIP_TAHITI:
+		chip_id = 0x01000014;
+		break;
+	case CHIP_VERDE:
+		chip_id = 0x01000015;
+		break;
+	case CHIP_PITCAIRN:
+		chip_id = 0x01000016;
+		break;
+	case CHIP_ARUBA:
+		chip_id = 0x01000017;
+		break;
+	}
+	WREG32(UVD_VCPU_CHIP_ID, chip_id);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/radeon/uvd_v3_1.c b/drivers/gpu/drm/radeon/uvd_v3_1.c
new file mode 100644
index 0000000..5b6fa1f
--- /dev/null
+++ b/drivers/gpu/drm/radeon/uvd_v3_1.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König <christian.koenig@amd.com>
+ */
+
+#include <drm/drmP.h>
+#include "radeon.h"
+#include "radeon_asic.h"
+#include "nid.h"
+
+/**
+ * uvd_v3_1_semaphore_emit - emit semaphore command
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring pointer
+ * @semaphore: semaphore to emit commands for
+ * @emit_wait: true if we should emit a wait command
+ *
+ * Emit a semaphore command (either wait or signal) to the UVD ring.
+ */
+void uvd_v3_1_semaphore_emit(struct radeon_device *rdev,
+			     struct radeon_ring *ring,
+			     struct radeon_semaphore *semaphore,
+			     bool emit_wait)
+{
+	uint64_t addr = semaphore->gpu_addr;
+
+	radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0));
+	radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
+
+	radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0));
+	radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
+
+	radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
+	radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0));
+}
diff --git a/drivers/gpu/drm/radeon/uvd_v4_2.c b/drivers/gpu/drm/radeon/uvd_v4_2.c
new file mode 100644
index 0000000..d04d507
--- /dev/null
+++ b/drivers/gpu/drm/radeon/uvd_v4_2.c
@@ -0,0 +1,68 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christian König <christian.koenig@amd.com>
+ */
+
+#include <linux/firmware.h>
+#include <drm/drmP.h>
+#include "radeon.h"
+#include "radeon_asic.h"
+#include "cikd.h"
+
+/**
+ * uvd_v4_2_resume - memory controller programming
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Let the UVD memory controller know it's offsets
+ */
+int uvd_v4_2_resume(struct radeon_device *rdev)
+{
+	uint64_t addr;
+	uint32_t size;
+
+	/* programm the VCPU memory controller bits 0-27 */
+	addr = rdev->uvd.gpu_addr >> 3;
+	size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
+	WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
+	WREG32(UVD_VCPU_CACHE_SIZE0, size);
+
+	addr += size;
+	size = RADEON_UVD_STACK_SIZE >> 3;
+	WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
+	WREG32(UVD_VCPU_CACHE_SIZE1, size);
+
+	addr += size;
+	size = RADEON_UVD_HEAP_SIZE >> 3;
+	WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
+	WREG32(UVD_VCPU_CACHE_SIZE2, size);
+
+	/* bits 28-31 */
+	addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
+	WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
+
+	/* bits 32-39 */
+	addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
+	WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/rcar-du/Kconfig b/drivers/gpu/drm/rcar-du/Kconfig
index 72887df..c590cd9 100644
--- a/drivers/gpu/drm/rcar-du/Kconfig
+++ b/drivers/gpu/drm/rcar-du/Kconfig
@@ -7,3 +7,10 @@
 	help
 	  Choose this option if you have an R-Car chipset.
 	  If M is selected the module will be called rcar-du-drm.
+
+config DRM_RCAR_LVDS
+	bool "R-Car DU LVDS Encoder Support"
+	depends on DRM_RCAR_DU
+	help
+	  Enable support the R-Car Display Unit embedded LVDS encoders
+	  (currently only on R8A7790).
diff --git a/drivers/gpu/drm/rcar-du/Makefile b/drivers/gpu/drm/rcar-du/Makefile
index 7333c00..12b8d44 100644
--- a/drivers/gpu/drm/rcar-du/Makefile
+++ b/drivers/gpu/drm/rcar-du/Makefile
@@ -1,8 +1,12 @@
 rcar-du-drm-y := rcar_du_crtc.o \
 		 rcar_du_drv.o \
+		 rcar_du_encoder.o \
+		 rcar_du_group.o \
 		 rcar_du_kms.o \
-		 rcar_du_lvds.o \
+		 rcar_du_lvdscon.o \
 		 rcar_du_plane.o \
-		 rcar_du_vga.o
+		 rcar_du_vgacon.o
 
-obj-$(CONFIG_DRM_RCAR_DU)	+= rcar-du-drm.o
+rcar-du-drm-$(CONFIG_DRM_RCAR_LVDS)	+= rcar_du_lvdsenc.o
+
+obj-$(CONFIG_DRM_RCAR_DU)		+= rcar-du-drm.o
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
index 24183fb..a9d24e4 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
@@ -23,30 +23,26 @@
 #include "rcar_du_crtc.h"
 #include "rcar_du_drv.h"
 #include "rcar_du_kms.h"
-#include "rcar_du_lvds.h"
 #include "rcar_du_plane.h"
 #include "rcar_du_regs.h"
-#include "rcar_du_vga.h"
-
-#define to_rcar_crtc(c)	container_of(c, struct rcar_du_crtc, crtc)
 
 static u32 rcar_du_crtc_read(struct rcar_du_crtc *rcrtc, u32 reg)
 {
-	struct rcar_du_device *rcdu = rcrtc->crtc.dev->dev_private;
+	struct rcar_du_device *rcdu = rcrtc->group->dev;
 
 	return rcar_du_read(rcdu, rcrtc->mmio_offset + reg);
 }
 
 static void rcar_du_crtc_write(struct rcar_du_crtc *rcrtc, u32 reg, u32 data)
 {
-	struct rcar_du_device *rcdu = rcrtc->crtc.dev->dev_private;
+	struct rcar_du_device *rcdu = rcrtc->group->dev;
 
 	rcar_du_write(rcdu, rcrtc->mmio_offset + reg, data);
 }
 
 static void rcar_du_crtc_clr(struct rcar_du_crtc *rcrtc, u32 reg, u32 clr)
 {
-	struct rcar_du_device *rcdu = rcrtc->crtc.dev->dev_private;
+	struct rcar_du_device *rcdu = rcrtc->group->dev;
 
 	rcar_du_write(rcdu, rcrtc->mmio_offset + reg,
 		      rcar_du_read(rcdu, rcrtc->mmio_offset + reg) & ~clr);
@@ -54,7 +50,7 @@
 
 static void rcar_du_crtc_set(struct rcar_du_crtc *rcrtc, u32 reg, u32 set)
 {
-	struct rcar_du_device *rcdu = rcrtc->crtc.dev->dev_private;
+	struct rcar_du_device *rcdu = rcrtc->group->dev;
 
 	rcar_du_write(rcdu, rcrtc->mmio_offset + reg,
 		      rcar_du_read(rcdu, rcrtc->mmio_offset + reg) | set);
@@ -63,29 +59,48 @@
 static void rcar_du_crtc_clr_set(struct rcar_du_crtc *rcrtc, u32 reg,
 				 u32 clr, u32 set)
 {
-	struct rcar_du_device *rcdu = rcrtc->crtc.dev->dev_private;
+	struct rcar_du_device *rcdu = rcrtc->group->dev;
 	u32 value = rcar_du_read(rcdu, rcrtc->mmio_offset + reg);
 
 	rcar_du_write(rcdu, rcrtc->mmio_offset + reg, (value & ~clr) | set);
 }
 
+static int rcar_du_crtc_get(struct rcar_du_crtc *rcrtc)
+{
+	int ret;
+
+	ret = clk_prepare_enable(rcrtc->clock);
+	if (ret < 0)
+		return ret;
+
+	ret = rcar_du_group_get(rcrtc->group);
+	if (ret < 0)
+		clk_disable_unprepare(rcrtc->clock);
+
+	return ret;
+}
+
+static void rcar_du_crtc_put(struct rcar_du_crtc *rcrtc)
+{
+	rcar_du_group_put(rcrtc->group);
+	clk_disable_unprepare(rcrtc->clock);
+}
+
 static void rcar_du_crtc_set_display_timing(struct rcar_du_crtc *rcrtc)
 {
-	struct drm_crtc *crtc = &rcrtc->crtc;
-	struct rcar_du_device *rcdu = crtc->dev->dev_private;
-	const struct drm_display_mode *mode = &crtc->mode;
+	const struct drm_display_mode *mode = &rcrtc->crtc.mode;
 	unsigned long clk;
 	u32 value;
 	u32 div;
 
 	/* Dot clock */
-	clk = clk_get_rate(rcdu->clock);
+	clk = clk_get_rate(rcrtc->clock);
 	div = DIV_ROUND_CLOSEST(clk, mode->clock * 1000);
 	div = clamp(div, 1U, 64U) - 1;
 
-	rcar_du_write(rcdu, rcrtc->index ? ESCR2 : ESCR,
-		      ESCR_DCLKSEL_CLKS | div);
-	rcar_du_write(rcdu, rcrtc->index ? OTAR2 : OTAR, 0);
+	rcar_du_group_write(rcrtc->group, rcrtc->index % 2 ? ESCR2 : ESCR,
+			    ESCR_DCLKSEL_CLKS | div);
+	rcar_du_group_write(rcrtc->group, rcrtc->index % 2 ? OTAR2 : OTAR, 0);
 
 	/* Signal polarities */
 	value = ((mode->flags & DRM_MODE_FLAG_PVSYNC) ? 0 : DSMR_VSL)
@@ -112,68 +127,25 @@
 	rcar_du_crtc_write(rcrtc, DEWR,  mode->hdisplay);
 }
 
-static void rcar_du_crtc_set_routing(struct rcar_du_crtc *rcrtc)
-{
-	struct rcar_du_device *rcdu = rcrtc->crtc.dev->dev_private;
-	u32 dorcr = rcar_du_read(rcdu, DORCR);
-
-	dorcr &= ~(DORCR_PG2T | DORCR_DK2S | DORCR_PG2D_MASK);
-
-	/* Set the DU1 pins sources. Select CRTC 0 if explicitly requested and
-	 * CRTC 1 in all other cases to avoid cloning CRTC 0 to DU0 and DU1 by
-	 * default.
-	 */
-	if (rcrtc->outputs & (1 << 1) && rcrtc->index == 0)
-		dorcr |= DORCR_PG2D_DS1;
-	else
-		dorcr |= DORCR_PG2T | DORCR_DK2S | DORCR_PG2D_DS2;
-
-	rcar_du_write(rcdu, DORCR, dorcr);
-}
-
-static void __rcar_du_start_stop(struct rcar_du_device *rcdu, bool start)
-{
-	rcar_du_write(rcdu, DSYSR,
-		      (rcar_du_read(rcdu, DSYSR) & ~(DSYSR_DRES | DSYSR_DEN)) |
-		      (start ? DSYSR_DEN : DSYSR_DRES));
-}
-
-static void rcar_du_start_stop(struct rcar_du_device *rcdu, bool start)
-{
-	/* Many of the configuration bits are only updated when the display
-	 * reset (DRES) bit in DSYSR is set to 1, disabling *both* CRTCs. Some
-	 * of those bits could be pre-configured, but others (especially the
-	 * bits related to plane assignment to display timing controllers) need
-	 * to be modified at runtime.
-	 *
-	 * Restart the display controller if a start is requested. Sorry for the
-	 * flicker. It should be possible to move most of the "DRES-update" bits
-	 * setup to driver initialization time and minimize the number of cases
-	 * when the display controller will have to be restarted.
-	 */
-	if (start) {
-		if (rcdu->used_crtcs++ != 0)
-			__rcar_du_start_stop(rcdu, false);
-		__rcar_du_start_stop(rcdu, true);
-	} else {
-		if (--rcdu->used_crtcs == 0)
-			__rcar_du_start_stop(rcdu, false);
-	}
-}
-
-void rcar_du_crtc_route_output(struct drm_crtc *crtc, unsigned int output)
+void rcar_du_crtc_route_output(struct drm_crtc *crtc,
+			       enum rcar_du_output output)
 {
 	struct rcar_du_crtc *rcrtc = to_rcar_crtc(crtc);
+	struct rcar_du_device *rcdu = rcrtc->group->dev;
 
 	/* Store the route from the CRTC output to the DU output. The DU will be
 	 * configured when starting the CRTC.
 	 */
-	rcrtc->outputs |= 1 << output;
+	rcrtc->outputs |= BIT(output);
+
+	/* Store RGB routing to DPAD0 for R8A7790. */
+	if (rcar_du_has(rcdu, RCAR_DU_FEATURE_DEFR8) &&
+	    output == RCAR_DU_OUTPUT_DPAD0)
+		rcdu->dpad0_source = rcrtc->index;
 }
 
 void rcar_du_crtc_update_planes(struct drm_crtc *crtc)
 {
-	struct rcar_du_device *rcdu = crtc->dev->dev_private;
 	struct rcar_du_crtc *rcrtc = to_rcar_crtc(crtc);
 	struct rcar_du_plane *planes[RCAR_DU_NUM_HW_PLANES];
 	unsigned int num_planes = 0;
@@ -182,8 +154,8 @@
 	u32 dptsr = 0;
 	u32 dspr = 0;
 
-	for (i = 0; i < ARRAY_SIZE(rcdu->planes.planes); ++i) {
-		struct rcar_du_plane *plane = &rcdu->planes.planes[i];
+	for (i = 0; i < ARRAY_SIZE(rcrtc->group->planes.planes); ++i) {
+		struct rcar_du_plane *plane = &rcrtc->group->planes.planes[i];
 		unsigned int j;
 
 		if (plane->crtc != &rcrtc->crtc || !plane->enabled)
@@ -220,8 +192,8 @@
 	/* Select display timing and dot clock generator 2 for planes associated
 	 * with superposition controller 2.
 	 */
-	if (rcrtc->index) {
-		u32 value = rcar_du_read(rcdu, DPTSR);
+	if (rcrtc->index % 2) {
+		u32 value = rcar_du_group_read(rcrtc->group, DPTSR);
 
 		/* The DPTSR register is updated when the display controller is
 		 * stopped. We thus need to restart the DU. Once again, sorry
@@ -231,21 +203,19 @@
 		 * occur only if we need to break the pre-association.
 		 */
 		if (value != dptsr) {
-			rcar_du_write(rcdu, DPTSR, dptsr);
-			if (rcdu->used_crtcs) {
-				__rcar_du_start_stop(rcdu, false);
-				__rcar_du_start_stop(rcdu, true);
-			}
+			rcar_du_group_write(rcrtc->group, DPTSR, dptsr);
+			if (rcrtc->group->used_crtcs)
+				rcar_du_group_restart(rcrtc->group);
 		}
 	}
 
-	rcar_du_write(rcdu, rcrtc->index ? DS2PR : DS1PR, dspr);
+	rcar_du_group_write(rcrtc->group, rcrtc->index % 2 ? DS2PR : DS1PR,
+			    dspr);
 }
 
 static void rcar_du_crtc_start(struct rcar_du_crtc *rcrtc)
 {
 	struct drm_crtc *crtc = &rcrtc->crtc;
-	struct rcar_du_device *rcdu = crtc->dev->dev_private;
 	unsigned int i;
 
 	if (rcrtc->started)
@@ -260,16 +230,16 @@
 
 	/* Configure display timings and output routing */
 	rcar_du_crtc_set_display_timing(rcrtc);
-	rcar_du_crtc_set_routing(rcrtc);
+	rcar_du_group_set_routing(rcrtc->group);
 
-	mutex_lock(&rcdu->planes.lock);
+	mutex_lock(&rcrtc->group->planes.lock);
 	rcrtc->plane->enabled = true;
 	rcar_du_crtc_update_planes(crtc);
-	mutex_unlock(&rcdu->planes.lock);
+	mutex_unlock(&rcrtc->group->planes.lock);
 
 	/* Setup planes. */
-	for (i = 0; i < ARRAY_SIZE(rcdu->planes.planes); ++i) {
-		struct rcar_du_plane *plane = &rcdu->planes.planes[i];
+	for (i = 0; i < ARRAY_SIZE(rcrtc->group->planes.planes); ++i) {
+		struct rcar_du_plane *plane = &rcrtc->group->planes.planes[i];
 
 		if (plane->crtc != crtc || !plane->enabled)
 			continue;
@@ -283,7 +253,7 @@
 	 */
 	rcar_du_crtc_clr_set(rcrtc, DSYSR, DSYSR_TVM_MASK, DSYSR_TVM_MASTER);
 
-	rcar_du_start_stop(rcdu, true);
+	rcar_du_group_start_stop(rcrtc->group, true);
 
 	rcrtc->started = true;
 }
@@ -291,42 +261,37 @@
 static void rcar_du_crtc_stop(struct rcar_du_crtc *rcrtc)
 {
 	struct drm_crtc *crtc = &rcrtc->crtc;
-	struct rcar_du_device *rcdu = crtc->dev->dev_private;
 
 	if (!rcrtc->started)
 		return;
 
-	mutex_lock(&rcdu->planes.lock);
+	mutex_lock(&rcrtc->group->planes.lock);
 	rcrtc->plane->enabled = false;
 	rcar_du_crtc_update_planes(crtc);
-	mutex_unlock(&rcdu->planes.lock);
+	mutex_unlock(&rcrtc->group->planes.lock);
 
 	/* Select switch sync mode. This stops display operation and configures
 	 * the HSYNC and VSYNC signals as inputs.
 	 */
 	rcar_du_crtc_clr_set(rcrtc, DSYSR, DSYSR_TVM_MASK, DSYSR_TVM_SWITCH);
 
-	rcar_du_start_stop(rcdu, false);
+	rcar_du_group_start_stop(rcrtc->group, false);
 
 	rcrtc->started = false;
 }
 
 void rcar_du_crtc_suspend(struct rcar_du_crtc *rcrtc)
 {
-	struct rcar_du_device *rcdu = rcrtc->crtc.dev->dev_private;
-
 	rcar_du_crtc_stop(rcrtc);
-	rcar_du_put(rcdu);
+	rcar_du_crtc_put(rcrtc);
 }
 
 void rcar_du_crtc_resume(struct rcar_du_crtc *rcrtc)
 {
-	struct rcar_du_device *rcdu = rcrtc->crtc.dev->dev_private;
-
 	if (rcrtc->dpms != DRM_MODE_DPMS_ON)
 		return;
 
-	rcar_du_get(rcdu);
+	rcar_du_crtc_get(rcrtc);
 	rcar_du_crtc_start(rcrtc);
 }
 
@@ -340,18 +305,17 @@
 
 static void rcar_du_crtc_dpms(struct drm_crtc *crtc, int mode)
 {
-	struct rcar_du_device *rcdu = crtc->dev->dev_private;
 	struct rcar_du_crtc *rcrtc = to_rcar_crtc(crtc);
 
 	if (rcrtc->dpms == mode)
 		return;
 
 	if (mode == DRM_MODE_DPMS_ON) {
-		rcar_du_get(rcdu);
+		rcar_du_crtc_get(rcrtc);
 		rcar_du_crtc_start(rcrtc);
 	} else {
 		rcar_du_crtc_stop(rcrtc);
-		rcar_du_put(rcdu);
+		rcar_du_crtc_put(rcrtc);
 	}
 
 	rcrtc->dpms = mode;
@@ -367,13 +331,12 @@
 
 static void rcar_du_crtc_mode_prepare(struct drm_crtc *crtc)
 {
-	struct rcar_du_device *rcdu = crtc->dev->dev_private;
 	struct rcar_du_crtc *rcrtc = to_rcar_crtc(crtc);
 
 	/* We need to access the hardware during mode set, acquire a reference
-	 * to the DU.
+	 * to the CRTC.
 	 */
-	rcar_du_get(rcdu);
+	rcar_du_crtc_get(rcrtc);
 
 	/* Stop the CRTC and release the plane. Force the DPMS mode to off as a
 	 * result.
@@ -390,8 +353,8 @@
 				 int x, int y,
 				 struct drm_framebuffer *old_fb)
 {
-	struct rcar_du_device *rcdu = crtc->dev->dev_private;
 	struct rcar_du_crtc *rcrtc = to_rcar_crtc(crtc);
+	struct rcar_du_device *rcdu = rcrtc->group->dev;
 	const struct rcar_du_format_info *format;
 	int ret;
 
@@ -423,10 +386,10 @@
 
 error:
 	/* There's no rollback/abort operation to clean up in case of error. We
-	 * thus need to release the reference to the DU acquired in prepare()
+	 * thus need to release the reference to the CRTC acquired in prepare()
 	 * here.
 	 */
-	rcar_du_put(rcdu);
+	rcar_du_crtc_put(rcrtc);
 	return ret;
 }
 
@@ -514,9 +477,28 @@
 	drm_vblank_put(dev, rcrtc->index);
 }
 
+static irqreturn_t rcar_du_crtc_irq(int irq, void *arg)
+{
+	struct rcar_du_crtc *rcrtc = arg;
+	irqreturn_t ret = IRQ_NONE;
+	u32 status;
+
+	status = rcar_du_crtc_read(rcrtc, DSSR);
+	rcar_du_crtc_write(rcrtc, DSRCR, status & DSRCR_MASK);
+
+	if (status & DSSR_VBK) {
+		drm_handle_vblank(rcrtc->crtc.dev, rcrtc->index);
+		rcar_du_crtc_finish_page_flip(rcrtc);
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
 static int rcar_du_crtc_page_flip(struct drm_crtc *crtc,
 				  struct drm_framebuffer *fb,
-				  struct drm_pending_vblank_event *event)
+				  struct drm_pending_vblank_event *event,
+				  uint32_t page_flip_flags)
 {
 	struct rcar_du_crtc *rcrtc = to_rcar_crtc(crtc);
 	struct drm_device *dev = rcrtc->crtc.dev;
@@ -549,16 +531,41 @@
 	.page_flip = rcar_du_crtc_page_flip,
 };
 
-int rcar_du_crtc_create(struct rcar_du_device *rcdu, unsigned int index)
+int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index)
 {
+	static const unsigned int mmio_offsets[] = {
+		DU0_REG_OFFSET, DU1_REG_OFFSET, DU2_REG_OFFSET
+	};
+
+	struct rcar_du_device *rcdu = rgrp->dev;
+	struct platform_device *pdev = to_platform_device(rcdu->dev);
 	struct rcar_du_crtc *rcrtc = &rcdu->crtcs[index];
 	struct drm_crtc *crtc = &rcrtc->crtc;
+	unsigned int irqflags;
+	char clk_name[5];
+	char *name;
+	int irq;
 	int ret;
 
-	rcrtc->mmio_offset = index ? DISP2_REG_OFFSET : 0;
+	/* Get the CRTC clock. */
+	if (rcar_du_has(rcdu, RCAR_DU_FEATURE_CRTC_IRQ_CLOCK)) {
+		sprintf(clk_name, "du.%u", index);
+		name = clk_name;
+	} else {
+		name = NULL;
+	}
+
+	rcrtc->clock = devm_clk_get(rcdu->dev, name);
+	if (IS_ERR(rcrtc->clock)) {
+		dev_err(rcdu->dev, "no clock for CRTC %u\n", index);
+		return PTR_ERR(rcrtc->clock);
+	}
+
+	rcrtc->group = rgrp;
+	rcrtc->mmio_offset = mmio_offsets[index];
 	rcrtc->index = index;
 	rcrtc->dpms = DRM_MODE_DPMS_OFF;
-	rcrtc->plane = &rcdu->planes.planes[index];
+	rcrtc->plane = &rgrp->planes.planes[index % 2];
 
 	rcrtc->plane->crtc = crtc;
 
@@ -568,6 +575,28 @@
 
 	drm_crtc_helper_add(crtc, &crtc_helper_funcs);
 
+	/* Register the interrupt handler. */
+	if (rcar_du_has(rcdu, RCAR_DU_FEATURE_CRTC_IRQ_CLOCK)) {
+		irq = platform_get_irq(pdev, index);
+		irqflags = 0;
+	} else {
+		irq = platform_get_irq(pdev, 0);
+		irqflags = IRQF_SHARED;
+	}
+
+	if (irq < 0) {
+		dev_err(rcdu->dev, "no IRQ for CRTC %u\n", index);
+		return ret;
+	}
+
+	ret = devm_request_irq(rcdu->dev, irq, rcar_du_crtc_irq, irqflags,
+			       dev_name(rcdu->dev), rcrtc);
+	if (ret < 0) {
+		dev_err(rcdu->dev,
+			"failed to register IRQ for CRTC %u\n", index);
+		return ret;
+	}
+
 	return 0;
 }
 
@@ -580,16 +609,3 @@
 		rcar_du_crtc_clr(rcrtc, DIER, DIER_VBE);
 	}
 }
-
-void rcar_du_crtc_irq(struct rcar_du_crtc *rcrtc)
-{
-	u32 status;
-
-	status = rcar_du_crtc_read(rcrtc, DSSR);
-	rcar_du_crtc_write(rcrtc, DSRCR, status & DSRCR_MASK);
-
-	if (status & DSSR_VBK) {
-		drm_handle_vblank(rcrtc->crtc.dev, rcrtc->index);
-		rcar_du_crtc_finish_page_flip(rcrtc);
-	}
-}
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
index 2a0365b..43e7575 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
@@ -15,16 +15,18 @@
 #define __RCAR_DU_CRTC_H__
 
 #include <linux/mutex.h>
+#include <linux/platform_data/rcar-du.h>
 
 #include <drm/drmP.h>
 #include <drm/drm_crtc.h>
 
-struct rcar_du_device;
+struct rcar_du_group;
 struct rcar_du_plane;
 
 struct rcar_du_crtc {
 	struct drm_crtc crtc;
 
+	struct clk *clock;
 	unsigned int mmio_offset;
 	unsigned int index;
 	bool started;
@@ -33,18 +35,21 @@
 	unsigned int outputs;
 	int dpms;
 
+	struct rcar_du_group *group;
 	struct rcar_du_plane *plane;
 };
 
-int rcar_du_crtc_create(struct rcar_du_device *rcdu, unsigned int index);
+#define to_rcar_crtc(c)	container_of(c, struct rcar_du_crtc, crtc)
+
+int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index);
 void rcar_du_crtc_enable_vblank(struct rcar_du_crtc *rcrtc, bool enable);
-void rcar_du_crtc_irq(struct rcar_du_crtc *rcrtc);
 void rcar_du_crtc_cancel_page_flip(struct rcar_du_crtc *rcrtc,
 				   struct drm_file *file);
 void rcar_du_crtc_suspend(struct rcar_du_crtc *rcrtc);
 void rcar_du_crtc_resume(struct rcar_du_crtc *rcrtc);
 
-void rcar_du_crtc_route_output(struct drm_crtc *crtc, unsigned int output);
+void rcar_du_crtc_route_output(struct drm_crtc *crtc,
+			       enum rcar_du_output output);
 void rcar_du_crtc_update_planes(struct drm_crtc *crtc);
 
 #endif /* __RCAR_DU_CRTC_H__ */
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.c b/drivers/gpu/drm/rcar-du/rcar_du_drv.c
index dc0fe09..0023f97 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_drv.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.c
@@ -21,6 +21,7 @@
 
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 
 #include "rcar_du_crtc.h"
@@ -29,74 +30,21 @@
 #include "rcar_du_regs.h"
 
 /* -----------------------------------------------------------------------------
- * Core device operations
- */
-
-/*
- * rcar_du_get - Acquire a reference to the DU
- *
- * Acquiring a reference enables the device clock and setup core registers. A
- * reference must be held before accessing any hardware registers.
- *
- * This function must be called with the DRM mode_config lock held.
- *
- * Return 0 in case of success or a negative error code otherwise.
- */
-int rcar_du_get(struct rcar_du_device *rcdu)
-{
-	int ret;
-
-	if (rcdu->use_count)
-		goto done;
-
-	/* Enable clocks before accessing the hardware. */
-	ret = clk_prepare_enable(rcdu->clock);
-	if (ret < 0)
-		return ret;
-
-	/* Enable extended features */
-	rcar_du_write(rcdu, DEFR, DEFR_CODE | DEFR_DEFE);
-	rcar_du_write(rcdu, DEFR2, DEFR2_CODE | DEFR2_DEFE2G);
-	rcar_du_write(rcdu, DEFR3, DEFR3_CODE | DEFR3_DEFE3);
-	rcar_du_write(rcdu, DEFR4, DEFR4_CODE);
-	rcar_du_write(rcdu, DEFR5, DEFR5_CODE | DEFR5_DEFE5);
-
-	/* Use DS1PR and DS2PR to configure planes priorities and connects the
-	 * superposition 0 to DU0 pins. DU1 pins will be configured dynamically.
-	 */
-	rcar_du_write(rcdu, DORCR, DORCR_PG1D_DS1 | DORCR_DPRS);
-
-done:
-	rcdu->use_count++;
-	return 0;
-}
-
-/*
- * rcar_du_put - Release a reference to the DU
- *
- * Releasing the last reference disables the device clock.
- *
- * This function must be called with the DRM mode_config lock held.
- */
-void rcar_du_put(struct rcar_du_device *rcdu)
-{
-	if (--rcdu->use_count)
-		return;
-
-	clk_disable_unprepare(rcdu->clock);
-}
-
-/* -----------------------------------------------------------------------------
  * DRM operations
  */
 
 static int rcar_du_unload(struct drm_device *dev)
 {
+	struct rcar_du_device *rcdu = dev->dev_private;
+
+	if (rcdu->fbdev)
+		drm_fbdev_cma_fini(rcdu->fbdev);
+
 	drm_kms_helper_poll_fini(dev);
 	drm_mode_config_cleanup(dev);
 	drm_vblank_cleanup(dev);
-	drm_irq_uninstall(dev);
 
+	dev->irq_enabled = 0;
 	dev->dev_private = NULL;
 
 	return 0;
@@ -107,7 +55,6 @@
 	struct platform_device *pdev = dev->platformdev;
 	struct rcar_du_platform_data *pdata = pdev->dev.platform_data;
 	struct rcar_du_device *rcdu;
-	struct resource *ioarea;
 	struct resource *mem;
 	int ret;
 
@@ -124,35 +71,15 @@
 
 	rcdu->dev = &pdev->dev;
 	rcdu->pdata = pdata;
+	rcdu->info = (struct rcar_du_device_info *)pdev->id_entry->driver_data;
 	rcdu->ddev = dev;
 	dev->dev_private = rcdu;
 
-	/* I/O resources and clocks */
+	/* I/O resources */
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (mem == NULL) {
-		dev_err(&pdev->dev, "failed to get memory resource\n");
-		return -EINVAL;
-	}
-
-	ioarea = devm_request_mem_region(&pdev->dev, mem->start,
-					 resource_size(mem), pdev->name);
-	if (ioarea == NULL) {
-		dev_err(&pdev->dev, "failed to request memory region\n");
-		return -EBUSY;
-	}
-
-	rcdu->mmio = devm_ioremap_nocache(&pdev->dev, ioarea->start,
-					  resource_size(ioarea));
-	if (rcdu->mmio == NULL) {
-		dev_err(&pdev->dev, "failed to remap memory resource\n");
-		return -ENOMEM;
-	}
-
-	rcdu->clock = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(rcdu->clock)) {
-		dev_err(&pdev->dev, "failed to get clock\n");
-		return -ENOENT;
-	}
+	rcdu->mmio = devm_ioremap_resource(&pdev->dev, mem);
+	if (IS_ERR(rcdu->mmio))
+		return PTR_ERR(rcdu->mmio);
 
 	/* DRM/KMS objects */
 	ret = rcar_du_modeset_init(rcdu);
@@ -161,18 +88,14 @@
 		goto done;
 	}
 
-	/* IRQ and vblank handling */
+	/* vblank handling */
 	ret = drm_vblank_init(dev, (1 << rcdu->num_crtcs) - 1);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "failed to initialize vblank\n");
 		goto done;
 	}
 
-	ret = drm_irq_install(dev);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "failed to install IRQ handler\n");
-		goto done;
-	}
+	dev->irq_enabled = 1;
 
 	platform_set_drvdata(pdev, rcdu);
 
@@ -188,20 +111,15 @@
 	struct rcar_du_device *rcdu = dev->dev_private;
 	unsigned int i;
 
-	for (i = 0; i < ARRAY_SIZE(rcdu->crtcs); ++i)
+	for (i = 0; i < rcdu->num_crtcs; ++i)
 		rcar_du_crtc_cancel_page_flip(&rcdu->crtcs[i], file);
 }
 
-static irqreturn_t rcar_du_irq(int irq, void *arg)
+static void rcar_du_lastclose(struct drm_device *dev)
 {
-	struct drm_device *dev = arg;
 	struct rcar_du_device *rcdu = dev->dev_private;
-	unsigned int i;
 
-	for (i = 0; i < ARRAY_SIZE(rcdu->crtcs); ++i)
-		rcar_du_crtc_irq(&rcdu->crtcs[i]);
-
-	return IRQ_HANDLED;
+	drm_fbdev_cma_restore_mode(rcdu->fbdev);
 }
 
 static int rcar_du_enable_vblank(struct drm_device *dev, int crtc)
@@ -230,18 +148,16 @@
 #endif
 	.poll		= drm_poll,
 	.read		= drm_read,
-	.fasync		= drm_fasync,
 	.llseek		= no_llseek,
 	.mmap		= drm_gem_cma_mmap,
 };
 
 static struct drm_driver rcar_du_driver = {
-	.driver_features	= DRIVER_HAVE_IRQ | DRIVER_GEM | DRIVER_MODESET
-				| DRIVER_PRIME,
+	.driver_features	= DRIVER_GEM | DRIVER_MODESET | DRIVER_PRIME,
 	.load			= rcar_du_load,
 	.unload			= rcar_du_unload,
 	.preclose		= rcar_du_preclose,
-	.irq_handler		= rcar_du_irq,
+	.lastclose		= rcar_du_lastclose,
 	.get_vblank_counter	= drm_vblank_count,
 	.enable_vblank		= rcar_du_enable_vblank,
 	.disable_vblank		= rcar_du_disable_vblank,
@@ -258,7 +174,7 @@
 	.gem_prime_mmap		= drm_gem_cma_prime_mmap,
 	.dumb_create		= rcar_du_dumb_create,
 	.dumb_map_offset	= drm_gem_cma_dumb_map_offset,
-	.dumb_destroy		= drm_gem_cma_dumb_destroy,
+	.dumb_destroy		= drm_gem_dumb_destroy,
 	.fops			= &rcar_du_fops,
 	.name			= "rcar-du",
 	.desc			= "Renesas R-Car Display Unit",
@@ -313,6 +229,57 @@
 	return 0;
 }
 
+static const struct rcar_du_device_info rcar_du_r8a7779_info = {
+	.features = 0,
+	.num_crtcs = 2,
+	.routes = {
+		/* R8A7779 has two RGB outputs and one (currently unsupported)
+		 * TCON output.
+		 */
+		[RCAR_DU_OUTPUT_DPAD0] = {
+			.possible_crtcs = BIT(0),
+			.encoder_type = DRM_MODE_ENCODER_NONE,
+		},
+		[RCAR_DU_OUTPUT_DPAD1] = {
+			.possible_crtcs = BIT(1) | BIT(0),
+			.encoder_type = DRM_MODE_ENCODER_NONE,
+		},
+	},
+	.num_lvds = 0,
+};
+
+static const struct rcar_du_device_info rcar_du_r8a7790_info = {
+	.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_ALIGN_128B
+		  | RCAR_DU_FEATURE_DEFR8,
+	.num_crtcs = 3,
+	.routes = {
+		/* R8A7790 has one RGB output, two LVDS outputs and one
+		 * (currently unsupported) TCON output.
+		 */
+		[RCAR_DU_OUTPUT_DPAD0] = {
+			.possible_crtcs = BIT(2) | BIT(1) | BIT(0),
+			.encoder_type = DRM_MODE_ENCODER_NONE,
+		},
+		[RCAR_DU_OUTPUT_LVDS0] = {
+			.possible_crtcs = BIT(0),
+			.encoder_type = DRM_MODE_ENCODER_LVDS,
+		},
+		[RCAR_DU_OUTPUT_LVDS1] = {
+			.possible_crtcs = BIT(2) | BIT(1),
+			.encoder_type = DRM_MODE_ENCODER_LVDS,
+		},
+	},
+	.num_lvds = 2,
+};
+
+static const struct platform_device_id rcar_du_id_table[] = {
+	{ "rcar-du-r8a7779", (kernel_ulong_t)&rcar_du_r8a7779_info },
+	{ "rcar-du-r8a7790", (kernel_ulong_t)&rcar_du_r8a7790_info },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(platform, rcar_du_id_table);
+
 static struct platform_driver rcar_du_platform_driver = {
 	.probe		= rcar_du_probe,
 	.remove		= rcar_du_remove,
@@ -321,6 +288,7 @@
 		.name	= "rcar-du",
 		.pm	= &rcar_du_pm_ops,
 	},
+	.id_table	= rcar_du_id_table,
 };
 
 module_platform_driver(rcar_du_platform_driver);
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.h b/drivers/gpu/drm/rcar-du/rcar_du_drv.h
index 193cc59..65d2d63 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_drv.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.h
@@ -15,43 +15,74 @@
 #define __RCAR_DU_DRV_H__
 
 #include <linux/kernel.h>
-#include <linux/mutex.h>
 #include <linux/platform_data/rcar-du.h>
 
 #include "rcar_du_crtc.h"
-#include "rcar_du_plane.h"
+#include "rcar_du_group.h"
 
 struct clk;
 struct device;
 struct drm_device;
+struct drm_fbdev_cma;
+struct rcar_du_device;
+struct rcar_du_lvdsenc;
+
+#define RCAR_DU_FEATURE_CRTC_IRQ_CLOCK	(1 << 0)	/* Per-CRTC IRQ and clock */
+#define RCAR_DU_FEATURE_ALIGN_128B	(1 << 1)	/* Align pitches to 128 bytes */
+#define RCAR_DU_FEATURE_DEFR8		(1 << 2)	/* Has DEFR8 register */
+
+/*
+ * struct rcar_du_output_routing - Output routing specification
+ * @possible_crtcs: bitmask of possible CRTCs for the output
+ * @encoder_type: DRM type of the internal encoder associated with the output
+ *
+ * The DU has 5 possible outputs (DPAD0/1, LVDS0/1, TCON). Output routing data
+ * specify the valid SoC outputs, which CRTCs can drive the output, and the type
+ * of in-SoC encoder for the output.
+ */
+struct rcar_du_output_routing {
+	unsigned int possible_crtcs;
+	unsigned int encoder_type;
+};
+
+/*
+ * struct rcar_du_device_info - DU model-specific information
+ * @features: device features (RCAR_DU_FEATURE_*)
+ * @num_crtcs: total number of CRTCs
+ * @routes: array of CRTC to output routes, indexed by output (RCAR_DU_OUTPUT_*)
+ * @num_lvds: number of internal LVDS encoders
+ */
+struct rcar_du_device_info {
+	unsigned int features;
+	unsigned int num_crtcs;
+	struct rcar_du_output_routing routes[RCAR_DU_OUTPUT_MAX];
+	unsigned int num_lvds;
+};
 
 struct rcar_du_device {
 	struct device *dev;
 	const struct rcar_du_platform_data *pdata;
+	const struct rcar_du_device_info *info;
 
 	void __iomem *mmio;
-	struct clk *clock;
-	unsigned int use_count;
 
 	struct drm_device *ddev;
+	struct drm_fbdev_cma *fbdev;
 
-	struct rcar_du_crtc crtcs[2];
-	unsigned int used_crtcs;
+	struct rcar_du_crtc crtcs[3];
 	unsigned int num_crtcs;
 
-	struct {
-		struct rcar_du_plane planes[RCAR_DU_NUM_SW_PLANES];
-		unsigned int free;
-		struct mutex lock;
+	struct rcar_du_group groups[2];
 
-		struct drm_property *alpha;
-		struct drm_property *colorkey;
-		struct drm_property *zpos;
-	} planes;
+	unsigned int dpad0_source;
+	struct rcar_du_lvdsenc *lvds[2];
 };
 
-int rcar_du_get(struct rcar_du_device *rcdu);
-void rcar_du_put(struct rcar_du_device *rcdu);
+static inline bool rcar_du_has(struct rcar_du_device *rcdu,
+			       unsigned int feature)
+{
+	return rcdu->info->features & feature;
+}
 
 static inline u32 rcar_du_read(struct rcar_du_device *rcdu, u32 reg)
 {
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_encoder.c b/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
new file mode 100644
index 0000000..3daa7a1
--- /dev/null
+++ b/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
@@ -0,0 +1,202 @@
+/*
+ * rcar_du_encoder.c  --  R-Car Display Unit Encoder
+ *
+ * Copyright (C) 2013 Renesas Corporation
+ *
+ * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/export.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "rcar_du_drv.h"
+#include "rcar_du_encoder.h"
+#include "rcar_du_kms.h"
+#include "rcar_du_lvdscon.h"
+#include "rcar_du_lvdsenc.h"
+#include "rcar_du_vgacon.h"
+
+/* -----------------------------------------------------------------------------
+ * Common connector functions
+ */
+
+struct drm_encoder *
+rcar_du_connector_best_encoder(struct drm_connector *connector)
+{
+	struct rcar_du_connector *rcon = to_rcar_connector(connector);
+
+	return &rcon->encoder->encoder;
+}
+
+/* -----------------------------------------------------------------------------
+ * Encoder
+ */
+
+static void rcar_du_encoder_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct rcar_du_encoder *renc = to_rcar_encoder(encoder);
+
+	if (renc->lvds)
+		rcar_du_lvdsenc_dpms(renc->lvds, encoder->crtc, mode);
+}
+
+static bool rcar_du_encoder_mode_fixup(struct drm_encoder *encoder,
+				       const struct drm_display_mode *mode,
+				       struct drm_display_mode *adjusted_mode)
+{
+	struct rcar_du_encoder *renc = to_rcar_encoder(encoder);
+	const struct drm_display_mode *panel_mode;
+	struct drm_device *dev = encoder->dev;
+	struct drm_connector *connector;
+	bool found = false;
+
+	/* DAC encoders have currently no restriction on the mode. */
+	if (encoder->encoder_type == DRM_MODE_ENCODER_DAC)
+		return true;
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		if (connector->encoder == encoder) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		dev_dbg(dev->dev, "mode_fixup: no connector found\n");
+		return false;
+	}
+
+	if (list_empty(&connector->modes)) {
+		dev_dbg(dev->dev, "mode_fixup: empty modes list\n");
+		return false;
+	}
+
+	panel_mode = list_first_entry(&connector->modes,
+				      struct drm_display_mode, head);
+
+	/* We're not allowed to modify the resolution. */
+	if (mode->hdisplay != panel_mode->hdisplay ||
+	    mode->vdisplay != panel_mode->vdisplay)
+		return false;
+
+	/* The flat panel mode is fixed, just copy it to the adjusted mode. */
+	drm_mode_copy(adjusted_mode, panel_mode);
+
+	/* The internal LVDS encoder has a clock frequency operating range of
+	 * 30MHz to 150MHz. Clamp the clock accordingly.
+	 */
+	if (renc->lvds)
+		adjusted_mode->clock = clamp(adjusted_mode->clock,
+					     30000, 150000);
+
+	return true;
+}
+
+static void rcar_du_encoder_mode_prepare(struct drm_encoder *encoder)
+{
+	struct rcar_du_encoder *renc = to_rcar_encoder(encoder);
+
+	if (renc->lvds)
+		rcar_du_lvdsenc_dpms(renc->lvds, encoder->crtc,
+				     DRM_MODE_DPMS_OFF);
+}
+
+static void rcar_du_encoder_mode_commit(struct drm_encoder *encoder)
+{
+	struct rcar_du_encoder *renc = to_rcar_encoder(encoder);
+
+	if (renc->lvds)
+		rcar_du_lvdsenc_dpms(renc->lvds, encoder->crtc,
+				     DRM_MODE_DPMS_ON);
+}
+
+static void rcar_du_encoder_mode_set(struct drm_encoder *encoder,
+				     struct drm_display_mode *mode,
+				     struct drm_display_mode *adjusted_mode)
+{
+	struct rcar_du_encoder *renc = to_rcar_encoder(encoder);
+
+	rcar_du_crtc_route_output(encoder->crtc, renc->output);
+}
+
+static const struct drm_encoder_helper_funcs encoder_helper_funcs = {
+	.dpms = rcar_du_encoder_dpms,
+	.mode_fixup = rcar_du_encoder_mode_fixup,
+	.prepare = rcar_du_encoder_mode_prepare,
+	.commit = rcar_du_encoder_mode_commit,
+	.mode_set = rcar_du_encoder_mode_set,
+};
+
+static const struct drm_encoder_funcs encoder_funcs = {
+	.destroy = drm_encoder_cleanup,
+};
+
+int rcar_du_encoder_init(struct rcar_du_device *rcdu,
+			 enum rcar_du_encoder_type type,
+			 enum rcar_du_output output,
+			 const struct rcar_du_encoder_data *data)
+{
+	struct rcar_du_encoder *renc;
+	unsigned int encoder_type;
+	int ret;
+
+	renc = devm_kzalloc(rcdu->dev, sizeof(*renc), GFP_KERNEL);
+	if (renc == NULL)
+		return -ENOMEM;
+
+	renc->output = output;
+
+	switch (output) {
+	case RCAR_DU_OUTPUT_LVDS0:
+		renc->lvds = rcdu->lvds[0];
+		break;
+
+	case RCAR_DU_OUTPUT_LVDS1:
+		renc->lvds = rcdu->lvds[1];
+		break;
+
+	default:
+		break;
+	}
+
+	switch (type) {
+	case RCAR_DU_ENCODER_VGA:
+		encoder_type = DRM_MODE_ENCODER_DAC;
+		break;
+	case RCAR_DU_ENCODER_LVDS:
+		encoder_type = DRM_MODE_ENCODER_LVDS;
+		break;
+	case RCAR_DU_ENCODER_NONE:
+	default:
+		/* No external encoder, use the internal encoder type. */
+		encoder_type = rcdu->info->routes[output].encoder_type;
+		break;
+	}
+
+	ret = drm_encoder_init(rcdu->ddev, &renc->encoder, &encoder_funcs,
+			       encoder_type);
+	if (ret < 0)
+		return ret;
+
+	drm_encoder_helper_add(&renc->encoder, &encoder_helper_funcs);
+
+	switch (encoder_type) {
+	case DRM_MODE_ENCODER_LVDS:
+		return rcar_du_lvds_connector_init(rcdu, renc,
+						   &data->connector.lvds.panel);
+
+	case DRM_MODE_ENCODER_DAC:
+		return rcar_du_vga_connector_init(rcdu, renc);
+
+	default:
+		return -EINVAL;
+	}
+}
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_encoder.h b/drivers/gpu/drm/rcar-du/rcar_du_encoder.h
new file mode 100644
index 0000000..0e5a65e
--- /dev/null
+++ b/drivers/gpu/drm/rcar-du/rcar_du_encoder.h
@@ -0,0 +1,49 @@
+/*
+ * rcar_du_encoder.h  --  R-Car Display Unit Encoder
+ *
+ * Copyright (C) 2013 Renesas Corporation
+ *
+ * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __RCAR_DU_ENCODER_H__
+#define __RCAR_DU_ENCODER_H__
+
+#include <linux/platform_data/rcar-du.h>
+
+#include <drm/drm_crtc.h>
+
+struct rcar_du_device;
+struct rcar_du_lvdsenc;
+
+struct rcar_du_encoder {
+	struct drm_encoder encoder;
+	enum rcar_du_output output;
+	struct rcar_du_lvdsenc *lvds;
+};
+
+#define to_rcar_encoder(e) \
+	container_of(e, struct rcar_du_encoder, encoder)
+
+struct rcar_du_connector {
+	struct drm_connector connector;
+	struct rcar_du_encoder *encoder;
+};
+
+#define to_rcar_connector(c) \
+	container_of(c, struct rcar_du_connector, connector)
+
+struct drm_encoder *
+rcar_du_connector_best_encoder(struct drm_connector *connector);
+
+int rcar_du_encoder_init(struct rcar_du_device *rcdu,
+			 enum rcar_du_encoder_type type,
+			 enum rcar_du_output output,
+			 const struct rcar_du_encoder_data *data);
+
+#endif /* __RCAR_DU_ENCODER_H__ */
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_group.c b/drivers/gpu/drm/rcar-du/rcar_du_group.c
new file mode 100644
index 0000000..eb53cd9
--- /dev/null
+++ b/drivers/gpu/drm/rcar-du/rcar_du_group.c
@@ -0,0 +1,187 @@
+/*
+ * rcar_du_group.c  --  R-Car Display Unit Channels Pair
+ *
+ * Copyright (C) 2013 Renesas Corporation
+ *
+ * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+/*
+ * The R8A7779 DU is split in per-CRTC resources (scan-out engine, blending
+ * unit, timings generator, ...) and device-global resources (start/stop
+ * control, planes, ...) shared between the two CRTCs.
+ *
+ * The R8A7790 introduced a third CRTC with its own set of global resources.
+ * This would be modeled as two separate DU device instances if it wasn't for
+ * a handful or resources that are shared between the three CRTCs (mostly
+ * related to input and output routing). For this reason the R8A7790 DU must be
+ * modeled as a single device with three CRTCs, two sets of "semi-global"
+ * resources, and a few device-global resources.
+ *
+ * The rcar_du_group object is a driver specific object, without any real
+ * counterpart in the DU documentation, that models those semi-global resources.
+ */
+
+#include <linux/clk.h>
+#include <linux/io.h>
+
+#include "rcar_du_drv.h"
+#include "rcar_du_group.h"
+#include "rcar_du_regs.h"
+
+u32 rcar_du_group_read(struct rcar_du_group *rgrp, u32 reg)
+{
+	return rcar_du_read(rgrp->dev, rgrp->mmio_offset + reg);
+}
+
+void rcar_du_group_write(struct rcar_du_group *rgrp, u32 reg, u32 data)
+{
+	rcar_du_write(rgrp->dev, rgrp->mmio_offset + reg, data);
+}
+
+static void rcar_du_group_setup_defr8(struct rcar_du_group *rgrp)
+{
+	u32 defr8 = DEFR8_CODE | DEFR8_DEFE8;
+
+	if (!rcar_du_has(rgrp->dev, RCAR_DU_FEATURE_DEFR8))
+		return;
+
+	/* The DEFR8 register for the first group also controls RGB output
+	 * routing to DPAD0
+	 */
+	if (rgrp->index == 0)
+		defr8 |= DEFR8_DRGBS_DU(rgrp->dev->dpad0_source);
+
+	rcar_du_group_write(rgrp, DEFR8, defr8);
+}
+
+static void rcar_du_group_setup(struct rcar_du_group *rgrp)
+{
+	/* Enable extended features */
+	rcar_du_group_write(rgrp, DEFR, DEFR_CODE | DEFR_DEFE);
+	rcar_du_group_write(rgrp, DEFR2, DEFR2_CODE | DEFR2_DEFE2G);
+	rcar_du_group_write(rgrp, DEFR3, DEFR3_CODE | DEFR3_DEFE3);
+	rcar_du_group_write(rgrp, DEFR4, DEFR4_CODE);
+	rcar_du_group_write(rgrp, DEFR5, DEFR5_CODE | DEFR5_DEFE5);
+
+	rcar_du_group_setup_defr8(rgrp);
+
+	/* Use DS1PR and DS2PR to configure planes priorities and connects the
+	 * superposition 0 to DU0 pins. DU1 pins will be configured dynamically.
+	 */
+	rcar_du_group_write(rgrp, DORCR, DORCR_PG1D_DS1 | DORCR_DPRS);
+}
+
+/*
+ * rcar_du_group_get - Acquire a reference to the DU channels group
+ *
+ * Acquiring the first reference setups core registers. A reference must be held
+ * before accessing any hardware registers.
+ *
+ * This function must be called with the DRM mode_config lock held.
+ *
+ * Return 0 in case of success or a negative error code otherwise.
+ */
+int rcar_du_group_get(struct rcar_du_group *rgrp)
+{
+	if (rgrp->use_count)
+		goto done;
+
+	rcar_du_group_setup(rgrp);
+
+done:
+	rgrp->use_count++;
+	return 0;
+}
+
+/*
+ * rcar_du_group_put - Release a reference to the DU
+ *
+ * This function must be called with the DRM mode_config lock held.
+ */
+void rcar_du_group_put(struct rcar_du_group *rgrp)
+{
+	--rgrp->use_count;
+}
+
+static void __rcar_du_group_start_stop(struct rcar_du_group *rgrp, bool start)
+{
+	rcar_du_group_write(rgrp, DSYSR,
+		(rcar_du_group_read(rgrp, DSYSR) & ~(DSYSR_DRES | DSYSR_DEN)) |
+		(start ? DSYSR_DEN : DSYSR_DRES));
+}
+
+void rcar_du_group_start_stop(struct rcar_du_group *rgrp, bool start)
+{
+	/* Many of the configuration bits are only updated when the display
+	 * reset (DRES) bit in DSYSR is set to 1, disabling *both* CRTCs. Some
+	 * of those bits could be pre-configured, but others (especially the
+	 * bits related to plane assignment to display timing controllers) need
+	 * to be modified at runtime.
+	 *
+	 * Restart the display controller if a start is requested. Sorry for the
+	 * flicker. It should be possible to move most of the "DRES-update" bits
+	 * setup to driver initialization time and minimize the number of cases
+	 * when the display controller will have to be restarted.
+	 */
+	if (start) {
+		if (rgrp->used_crtcs++ != 0)
+			__rcar_du_group_start_stop(rgrp, false);
+		__rcar_du_group_start_stop(rgrp, true);
+	} else {
+		if (--rgrp->used_crtcs == 0)
+			__rcar_du_group_start_stop(rgrp, false);
+	}
+}
+
+void rcar_du_group_restart(struct rcar_du_group *rgrp)
+{
+	__rcar_du_group_start_stop(rgrp, false);
+	__rcar_du_group_start_stop(rgrp, true);
+}
+
+static int rcar_du_set_dpad0_routing(struct rcar_du_device *rcdu)
+{
+	int ret;
+
+	/* RGB output routing to DPAD0 is configured in the DEFR8 register of
+	 * the first group. As this function can be called with the DU0 and DU1
+	 * CRTCs disabled, we need to enable the first group clock before
+	 * accessing the register.
+	 */
+	ret = clk_prepare_enable(rcdu->crtcs[0].clock);
+	if (ret < 0)
+		return ret;
+
+	rcar_du_group_setup_defr8(&rcdu->groups[0]);
+
+	clk_disable_unprepare(rcdu->crtcs[0].clock);
+
+	return 0;
+}
+
+int rcar_du_group_set_routing(struct rcar_du_group *rgrp)
+{
+	struct rcar_du_crtc *crtc0 = &rgrp->dev->crtcs[rgrp->index * 2];
+	u32 dorcr = rcar_du_group_read(rgrp, DORCR);
+
+	dorcr &= ~(DORCR_PG2T | DORCR_DK2S | DORCR_PG2D_MASK);
+
+	/* Set the DPAD1 pins sources. Select CRTC 0 if explicitly requested and
+	 * CRTC 1 in all other cases to avoid cloning CRTC 0 to DPAD0 and DPAD1
+	 * by default.
+	 */
+	if (crtc0->outputs & BIT(RCAR_DU_OUTPUT_DPAD1))
+		dorcr |= DORCR_PG2D_DS1;
+	else
+		dorcr |= DORCR_PG2T | DORCR_DK2S | DORCR_PG2D_DS2;
+
+	rcar_du_group_write(rgrp, DORCR, dorcr);
+
+	return rcar_du_set_dpad0_routing(rgrp->dev);
+}
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_group.h b/drivers/gpu/drm/rcar-du/rcar_du_group.h
new file mode 100644
index 0000000..5025930
--- /dev/null
+++ b/drivers/gpu/drm/rcar-du/rcar_du_group.h
@@ -0,0 +1,50 @@
+/*
+ * rcar_du_group.c  --  R-Car Display Unit Planes and CRTCs Group
+ *
+ * Copyright (C) 2013 Renesas Corporation
+ *
+ * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __RCAR_DU_GROUP_H__
+#define __RCAR_DU_GROUP_H__
+
+#include "rcar_du_plane.h"
+
+struct rcar_du_device;
+
+/*
+ * struct rcar_du_group - CRTCs and planes group
+ * @dev: the DU device
+ * @mmio_offset: registers offset in the device memory map
+ * @index: group index
+ * @use_count: number of users of the group (rcar_du_group_(get|put))
+ * @used_crtcs: number of CRTCs currently in use
+ * @planes: planes handled by the group
+ */
+struct rcar_du_group {
+	struct rcar_du_device *dev;
+	unsigned int mmio_offset;
+	unsigned int index;
+
+	unsigned int use_count;
+	unsigned int used_crtcs;
+
+	struct rcar_du_planes planes;
+};
+
+u32 rcar_du_group_read(struct rcar_du_group *rgrp, u32 reg);
+void rcar_du_group_write(struct rcar_du_group *rgrp, u32 reg, u32 data);
+
+int rcar_du_group_get(struct rcar_du_group *rgrp);
+void rcar_du_group_put(struct rcar_du_group *rgrp);
+void rcar_du_group_start_stop(struct rcar_du_group *rgrp, bool start);
+void rcar_du_group_restart(struct rcar_du_group *rgrp);
+int rcar_du_group_set_routing(struct rcar_du_group *rgrp);
+
+#endif /* __RCAR_DU_GROUP_H__ */
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
index d30c2e2..b31ac08 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
@@ -19,10 +19,10 @@
 
 #include "rcar_du_crtc.h"
 #include "rcar_du_drv.h"
+#include "rcar_du_encoder.h"
 #include "rcar_du_kms.h"
-#include "rcar_du_lvds.h"
+#include "rcar_du_lvdsenc.h"
 #include "rcar_du_regs.h"
-#include "rcar_du_vga.h"
 
 /* -----------------------------------------------------------------------------
  * Format helpers
@@ -106,46 +106,24 @@
 }
 
 /* -----------------------------------------------------------------------------
- * Common connector and encoder functions
- */
-
-struct drm_encoder *
-rcar_du_connector_best_encoder(struct drm_connector *connector)
-{
-	struct rcar_du_connector *rcon = to_rcar_connector(connector);
-
-	return &rcon->encoder->encoder;
-}
-
-void rcar_du_encoder_mode_prepare(struct drm_encoder *encoder)
-{
-}
-
-void rcar_du_encoder_mode_set(struct drm_encoder *encoder,
-			      struct drm_display_mode *mode,
-			      struct drm_display_mode *adjusted_mode)
-{
-	struct rcar_du_encoder *renc = to_rcar_encoder(encoder);
-
-	rcar_du_crtc_route_output(encoder->crtc, renc->output);
-}
-
-void rcar_du_encoder_mode_commit(struct drm_encoder *encoder)
-{
-}
-
-/* -----------------------------------------------------------------------------
  * Frame buffer
  */
 
 int rcar_du_dumb_create(struct drm_file *file, struct drm_device *dev,
 			struct drm_mode_create_dumb *args)
 {
+	struct rcar_du_device *rcdu = dev->dev_private;
 	unsigned int min_pitch = DIV_ROUND_UP(args->width * args->bpp, 8);
 	unsigned int align;
 
-	/* The pitch must be aligned to a 16 pixels boundary. */
-	align = 16 * args->bpp / 8;
+	/* The R8A7779 DU requires a 16 pixels pitch alignment as documented,
+	 * but the R8A7790 DU seems to require a 128 bytes pitch alignment.
+	 */
+	if (rcar_du_has(rcdu, RCAR_DU_FEATURE_ALIGN_128B))
+		align = 128;
+	else
+		align = 16 * args->bpp / 8;
+
 	args->pitch = roundup(max(args->pitch, min_pitch), align);
 
 	return drm_gem_cma_dumb_create(file, dev, args);
@@ -155,6 +133,7 @@
 rcar_du_fb_create(struct drm_device *dev, struct drm_file *file_priv,
 		  struct drm_mode_fb_cmd2 *mode_cmd)
 {
+	struct rcar_du_device *rcdu = dev->dev_private;
 	const struct rcar_du_format_info *format;
 	unsigned int align;
 
@@ -165,7 +144,10 @@
 		return ERR_PTR(-EINVAL);
 	}
 
-	align = 16 * format->bpp / 8;
+	if (rcar_du_has(rcdu, RCAR_DU_FEATURE_ALIGN_128B))
+		align = 128;
+	else
+		align = 16 * format->bpp / 8;
 
 	if (mode_cmd->pitches[0] & (align - 1) ||
 	    mode_cmd->pitches[0] >= 8192) {
@@ -185,81 +167,124 @@
 	return drm_fb_cma_create(dev, file_priv, mode_cmd);
 }
 
+static void rcar_du_output_poll_changed(struct drm_device *dev)
+{
+	struct rcar_du_device *rcdu = dev->dev_private;
+
+	drm_fbdev_cma_hotplug_event(rcdu->fbdev);
+}
+
 static const struct drm_mode_config_funcs rcar_du_mode_config_funcs = {
 	.fb_create = rcar_du_fb_create,
+	.output_poll_changed = rcar_du_output_poll_changed,
 };
 
 int rcar_du_modeset_init(struct rcar_du_device *rcdu)
 {
+	static const unsigned int mmio_offsets[] = {
+		DU0_REG_OFFSET, DU2_REG_OFFSET
+	};
+
 	struct drm_device *dev = rcdu->ddev;
 	struct drm_encoder *encoder;
+	struct drm_fbdev_cma *fbdev;
+	unsigned int num_groups;
 	unsigned int i;
 	int ret;
 
-	drm_mode_config_init(rcdu->ddev);
+	drm_mode_config_init(dev);
 
-	rcdu->ddev->mode_config.min_width = 0;
-	rcdu->ddev->mode_config.min_height = 0;
-	rcdu->ddev->mode_config.max_width = 4095;
-	rcdu->ddev->mode_config.max_height = 2047;
-	rcdu->ddev->mode_config.funcs = &rcar_du_mode_config_funcs;
+	dev->mode_config.min_width = 0;
+	dev->mode_config.min_height = 0;
+	dev->mode_config.max_width = 4095;
+	dev->mode_config.max_height = 2047;
+	dev->mode_config.funcs = &rcar_du_mode_config_funcs;
 
-	ret = rcar_du_plane_init(rcdu);
-	if (ret < 0)
-		return ret;
+	rcdu->num_crtcs = rcdu->info->num_crtcs;
 
-	for (i = 0; i < ARRAY_SIZE(rcdu->crtcs); ++i) {
-		ret = rcar_du_crtc_create(rcdu, i);
+	/* Initialize the groups. */
+	num_groups = DIV_ROUND_UP(rcdu->num_crtcs, 2);
+
+	for (i = 0; i < num_groups; ++i) {
+		struct rcar_du_group *rgrp = &rcdu->groups[i];
+
+		rgrp->dev = rcdu;
+		rgrp->mmio_offset = mmio_offsets[i];
+		rgrp->index = i;
+
+		ret = rcar_du_planes_init(rgrp);
 		if (ret < 0)
 			return ret;
 	}
 
-	rcdu->used_crtcs = 0;
-	rcdu->num_crtcs = i;
+	/* Create the CRTCs. */
+	for (i = 0; i < rcdu->num_crtcs; ++i) {
+		struct rcar_du_group *rgrp = &rcdu->groups[i / 2];
+
+		ret = rcar_du_crtc_create(rgrp, i);
+		if (ret < 0)
+			return ret;
+	}
+
+	/* Initialize the encoders. */
+	ret = rcar_du_lvdsenc_init(rcdu);
+	if (ret < 0)
+		return ret;
 
 	for (i = 0; i < rcdu->pdata->num_encoders; ++i) {
 		const struct rcar_du_encoder_data *pdata =
 			&rcdu->pdata->encoders[i];
+		const struct rcar_du_output_routing *route =
+			&rcdu->info->routes[pdata->output];
 
-		if (pdata->output >= ARRAY_SIZE(rcdu->crtcs)) {
+		if (pdata->type == RCAR_DU_ENCODER_UNUSED)
+			continue;
+
+		if (pdata->output >= RCAR_DU_OUTPUT_MAX ||
+		    route->possible_crtcs == 0) {
 			dev_warn(rcdu->dev,
 				 "encoder %u references unexisting output %u, skipping\n",
 				 i, pdata->output);
 			continue;
 		}
 
-		switch (pdata->encoder) {
-		case RCAR_DU_ENCODER_VGA:
-			rcar_du_vga_init(rcdu, &pdata->u.vga, pdata->output);
-			break;
-
-		case RCAR_DU_ENCODER_LVDS:
-			rcar_du_lvds_init(rcdu, &pdata->u.lvds, pdata->output);
-			break;
-
-		default:
-			break;
-		}
+		rcar_du_encoder_init(rcdu, pdata->type, pdata->output, pdata);
 	}
 
-	/* Set the possible CRTCs and possible clones. All encoders can be
-	 * driven by the CRTC associated with the output they're connected to,
-	 * as well as by CRTC 0.
+	/* Set the possible CRTCs and possible clones. There's always at least
+	 * one way for all encoders to clone each other, set all bits in the
+	 * possible clones field.
 	 */
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
 		struct rcar_du_encoder *renc = to_rcar_encoder(encoder);
+		const struct rcar_du_output_routing *route =
+			&rcdu->info->routes[renc->output];
 
-		encoder->possible_crtcs = (1 << 0) | (1 << renc->output);
-		encoder->possible_clones = 1 << 0;
+		encoder->possible_crtcs = route->possible_crtcs;
+		encoder->possible_clones = (1 << rcdu->pdata->num_encoders) - 1;
 	}
 
-	ret = rcar_du_plane_register(rcdu);
-	if (ret < 0)
-		return ret;
+	/* Now that the CRTCs have been initialized register the planes. */
+	for (i = 0; i < num_groups; ++i) {
+		ret = rcar_du_planes_register(&rcdu->groups[i]);
+		if (ret < 0)
+			return ret;
+	}
 
-	drm_kms_helper_poll_init(rcdu->ddev);
+	drm_kms_helper_poll_init(dev);
 
-	drm_helper_disable_unused_functions(rcdu->ddev);
+	drm_helper_disable_unused_functions(dev);
+
+	fbdev = drm_fbdev_cma_init(dev, 32, dev->mode_config.num_crtc,
+				   dev->mode_config.num_connector);
+	if (IS_ERR(fbdev))
+		return PTR_ERR(fbdev);
+
+#ifndef CONFIG_FRAMEBUFFER_CONSOLE
+	drm_fbdev_cma_restore_mode(fbdev);
+#endif
+
+	rcdu->fbdev = fbdev;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.h b/drivers/gpu/drm/rcar-du/rcar_du_kms.h
index dba4722..5750e6a 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_kms.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.h
@@ -16,8 +16,9 @@
 
 #include <linux/types.h>
 
-#include <drm/drm_crtc.h>
-
+struct drm_file;
+struct drm_device;
+struct drm_mode_create_dumb;
 struct rcar_du_device;
 
 struct rcar_du_format_info {
@@ -28,32 +29,8 @@
 	unsigned int edf;
 };
 
-struct rcar_du_encoder {
-	struct drm_encoder encoder;
-	unsigned int output;
-};
-
-#define to_rcar_encoder(e) \
-	container_of(e, struct rcar_du_encoder, encoder)
-
-struct rcar_du_connector {
-	struct drm_connector connector;
-	struct rcar_du_encoder *encoder;
-};
-
-#define to_rcar_connector(c) \
-	container_of(c, struct rcar_du_connector, connector)
-
 const struct rcar_du_format_info *rcar_du_format_info(u32 fourcc);
 
-struct drm_encoder *
-rcar_du_connector_best_encoder(struct drm_connector *connector);
-void rcar_du_encoder_mode_prepare(struct drm_encoder *encoder);
-void rcar_du_encoder_mode_set(struct drm_encoder *encoder,
-			      struct drm_display_mode *mode,
-			      struct drm_display_mode *adjusted_mode);
-void rcar_du_encoder_mode_commit(struct drm_encoder *encoder);
-
 int rcar_du_modeset_init(struct rcar_du_device *rcdu);
 
 int rcar_du_dumb_create(struct drm_file *file, struct drm_device *dev,
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_lvds.c b/drivers/gpu/drm/rcar-du/rcar_du_lvds.c
deleted file mode 100644
index 7aefe72..0000000
--- a/drivers/gpu/drm/rcar-du/rcar_du_lvds.c
+++ /dev/null
@@ -1,216 +0,0 @@
-/*
- * rcar_du_lvds.c  --  R-Car Display Unit LVDS Encoder and Connector
- *
- * Copyright (C) 2013 Renesas Corporation
- *
- * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <drm/drmP.h>
-#include <drm/drm_crtc.h>
-#include <drm/drm_crtc_helper.h>
-
-#include "rcar_du_drv.h"
-#include "rcar_du_kms.h"
-#include "rcar_du_lvds.h"
-
-struct rcar_du_lvds_connector {
-	struct rcar_du_connector connector;
-
-	const struct rcar_du_panel_data *panel;
-};
-
-#define to_rcar_lvds_connector(c) \
-	container_of(c, struct rcar_du_lvds_connector, connector.connector)
-
-/* -----------------------------------------------------------------------------
- * Connector
- */
-
-static int rcar_du_lvds_connector_get_modes(struct drm_connector *connector)
-{
-	struct rcar_du_lvds_connector *lvdscon = to_rcar_lvds_connector(connector);
-	struct drm_display_mode *mode;
-
-	mode = drm_mode_create(connector->dev);
-	if (mode == NULL)
-		return 0;
-
-	mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER;
-	mode->clock = lvdscon->panel->mode.clock;
-	mode->hdisplay = lvdscon->panel->mode.hdisplay;
-	mode->hsync_start = lvdscon->panel->mode.hsync_start;
-	mode->hsync_end = lvdscon->panel->mode.hsync_end;
-	mode->htotal = lvdscon->panel->mode.htotal;
-	mode->vdisplay = lvdscon->panel->mode.vdisplay;
-	mode->vsync_start = lvdscon->panel->mode.vsync_start;
-	mode->vsync_end = lvdscon->panel->mode.vsync_end;
-	mode->vtotal = lvdscon->panel->mode.vtotal;
-	mode->flags = lvdscon->panel->mode.flags;
-
-	drm_mode_set_name(mode);
-	drm_mode_probed_add(connector, mode);
-
-	return 1;
-}
-
-static int rcar_du_lvds_connector_mode_valid(struct drm_connector *connector,
-					    struct drm_display_mode *mode)
-{
-	return MODE_OK;
-}
-
-static const struct drm_connector_helper_funcs connector_helper_funcs = {
-	.get_modes = rcar_du_lvds_connector_get_modes,
-	.mode_valid = rcar_du_lvds_connector_mode_valid,
-	.best_encoder = rcar_du_connector_best_encoder,
-};
-
-static void rcar_du_lvds_connector_destroy(struct drm_connector *connector)
-{
-	drm_sysfs_connector_remove(connector);
-	drm_connector_cleanup(connector);
-}
-
-static enum drm_connector_status
-rcar_du_lvds_connector_detect(struct drm_connector *connector, bool force)
-{
-	return connector_status_connected;
-}
-
-static const struct drm_connector_funcs connector_funcs = {
-	.dpms = drm_helper_connector_dpms,
-	.detect = rcar_du_lvds_connector_detect,
-	.fill_modes = drm_helper_probe_single_connector_modes,
-	.destroy = rcar_du_lvds_connector_destroy,
-};
-
-static int rcar_du_lvds_connector_init(struct rcar_du_device *rcdu,
-				       struct rcar_du_encoder *renc,
-				       const struct rcar_du_panel_data *panel)
-{
-	struct rcar_du_lvds_connector *lvdscon;
-	struct drm_connector *connector;
-	int ret;
-
-	lvdscon = devm_kzalloc(rcdu->dev, sizeof(*lvdscon), GFP_KERNEL);
-	if (lvdscon == NULL)
-		return -ENOMEM;
-
-	lvdscon->panel = panel;
-
-	connector = &lvdscon->connector.connector;
-	connector->display_info.width_mm = panel->width_mm;
-	connector->display_info.height_mm = panel->height_mm;
-
-	ret = drm_connector_init(rcdu->ddev, connector, &connector_funcs,
-				 DRM_MODE_CONNECTOR_LVDS);
-	if (ret < 0)
-		return ret;
-
-	drm_connector_helper_add(connector, &connector_helper_funcs);
-	ret = drm_sysfs_connector_add(connector);
-	if (ret < 0)
-		return ret;
-
-	drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
-	drm_object_property_set_value(&connector->base,
-		rcdu->ddev->mode_config.dpms_property, DRM_MODE_DPMS_OFF);
-
-	ret = drm_mode_connector_attach_encoder(connector, &renc->encoder);
-	if (ret < 0)
-		return ret;
-
-	connector->encoder = &renc->encoder;
-	lvdscon->connector.encoder = renc;
-
-	return 0;
-}
-
-/* -----------------------------------------------------------------------------
- * Encoder
- */
-
-static void rcar_du_lvds_encoder_dpms(struct drm_encoder *encoder, int mode)
-{
-}
-
-static bool rcar_du_lvds_encoder_mode_fixup(struct drm_encoder *encoder,
-					   const struct drm_display_mode *mode,
-					   struct drm_display_mode *adjusted_mode)
-{
-	const struct drm_display_mode *panel_mode;
-	struct drm_device *dev = encoder->dev;
-	struct drm_connector *connector;
-	bool found = false;
-
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
-		if (connector->encoder == encoder) {
-			found = true;
-			break;
-		}
-	}
-
-	if (!found) {
-		dev_dbg(dev->dev, "mode_fixup: no connector found\n");
-		return false;
-	}
-
-	if (list_empty(&connector->modes)) {
-		dev_dbg(dev->dev, "mode_fixup: empty modes list\n");
-		return false;
-	}
-
-	panel_mode = list_first_entry(&connector->modes,
-				      struct drm_display_mode, head);
-
-	/* We're not allowed to modify the resolution. */
-	if (mode->hdisplay != panel_mode->hdisplay ||
-	    mode->vdisplay != panel_mode->vdisplay)
-		return false;
-
-	/* The flat panel mode is fixed, just copy it to the adjusted mode. */
-	drm_mode_copy(adjusted_mode, panel_mode);
-
-	return true;
-}
-
-static const struct drm_encoder_helper_funcs encoder_helper_funcs = {
-	.dpms = rcar_du_lvds_encoder_dpms,
-	.mode_fixup = rcar_du_lvds_encoder_mode_fixup,
-	.prepare = rcar_du_encoder_mode_prepare,
-	.commit = rcar_du_encoder_mode_commit,
-	.mode_set = rcar_du_encoder_mode_set,
-};
-
-static const struct drm_encoder_funcs encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
-int rcar_du_lvds_init(struct rcar_du_device *rcdu,
-		      const struct rcar_du_encoder_lvds_data *data,
-		      unsigned int output)
-{
-	struct rcar_du_encoder *renc;
-	int ret;
-
-	renc = devm_kzalloc(rcdu->dev, sizeof(*renc), GFP_KERNEL);
-	if (renc == NULL)
-		return -ENOMEM;
-
-	renc->output = output;
-
-	ret = drm_encoder_init(rcdu->ddev, &renc->encoder, &encoder_funcs,
-			       DRM_MODE_ENCODER_LVDS);
-	if (ret < 0)
-		return ret;
-
-	drm_encoder_helper_add(&renc->encoder, &encoder_helper_funcs);
-
-	return rcar_du_lvds_connector_init(rcdu, renc, &data->panel);
-}
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_lvds.h b/drivers/gpu/drm/rcar-du/rcar_du_lvds.h
deleted file mode 100644
index b47f832..0000000
--- a/drivers/gpu/drm/rcar-du/rcar_du_lvds.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * rcar_du_lvds.h  --  R-Car Display Unit LVDS Encoder and Connector
- *
- * Copyright (C) 2013 Renesas Corporation
- *
- * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef __RCAR_DU_LVDS_H__
-#define __RCAR_DU_LVDS_H__
-
-struct rcar_du_device;
-struct rcar_du_encoder_lvds_data;
-
-int rcar_du_lvds_init(struct rcar_du_device *rcdu,
-		      const struct rcar_du_encoder_lvds_data *data,
-		      unsigned int output);
-
-#endif /* __RCAR_DU_LVDS_H__ */
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c b/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c
new file mode 100644
index 0000000..4f3ba93
--- /dev/null
+++ b/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c
@@ -0,0 +1,131 @@
+/*
+ * rcar_du_lvdscon.c  --  R-Car Display Unit LVDS Connector
+ *
+ * Copyright (C) 2013 Renesas Corporation
+ *
+ * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "rcar_du_drv.h"
+#include "rcar_du_encoder.h"
+#include "rcar_du_kms.h"
+#include "rcar_du_lvdscon.h"
+
+struct rcar_du_lvds_connector {
+	struct rcar_du_connector connector;
+
+	const struct rcar_du_panel_data *panel;
+};
+
+#define to_rcar_lvds_connector(c) \
+	container_of(c, struct rcar_du_lvds_connector, connector.connector)
+
+static int rcar_du_lvds_connector_get_modes(struct drm_connector *connector)
+{
+	struct rcar_du_lvds_connector *lvdscon =
+		to_rcar_lvds_connector(connector);
+	struct drm_display_mode *mode;
+
+	mode = drm_mode_create(connector->dev);
+	if (mode == NULL)
+		return 0;
+
+	mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER;
+	mode->clock = lvdscon->panel->mode.clock;
+	mode->hdisplay = lvdscon->panel->mode.hdisplay;
+	mode->hsync_start = lvdscon->panel->mode.hsync_start;
+	mode->hsync_end = lvdscon->panel->mode.hsync_end;
+	mode->htotal = lvdscon->panel->mode.htotal;
+	mode->vdisplay = lvdscon->panel->mode.vdisplay;
+	mode->vsync_start = lvdscon->panel->mode.vsync_start;
+	mode->vsync_end = lvdscon->panel->mode.vsync_end;
+	mode->vtotal = lvdscon->panel->mode.vtotal;
+	mode->flags = lvdscon->panel->mode.flags;
+
+	drm_mode_set_name(mode);
+	drm_mode_probed_add(connector, mode);
+
+	return 1;
+}
+
+static int rcar_du_lvds_connector_mode_valid(struct drm_connector *connector,
+					    struct drm_display_mode *mode)
+{
+	return MODE_OK;
+}
+
+static const struct drm_connector_helper_funcs connector_helper_funcs = {
+	.get_modes = rcar_du_lvds_connector_get_modes,
+	.mode_valid = rcar_du_lvds_connector_mode_valid,
+	.best_encoder = rcar_du_connector_best_encoder,
+};
+
+static void rcar_du_lvds_connector_destroy(struct drm_connector *connector)
+{
+	drm_sysfs_connector_remove(connector);
+	drm_connector_cleanup(connector);
+}
+
+static enum drm_connector_status
+rcar_du_lvds_connector_detect(struct drm_connector *connector, bool force)
+{
+	return connector_status_connected;
+}
+
+static const struct drm_connector_funcs connector_funcs = {
+	.dpms = drm_helper_connector_dpms,
+	.detect = rcar_du_lvds_connector_detect,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = rcar_du_lvds_connector_destroy,
+};
+
+int rcar_du_lvds_connector_init(struct rcar_du_device *rcdu,
+				struct rcar_du_encoder *renc,
+				const struct rcar_du_panel_data *panel)
+{
+	struct rcar_du_lvds_connector *lvdscon;
+	struct drm_connector *connector;
+	int ret;
+
+	lvdscon = devm_kzalloc(rcdu->dev, sizeof(*lvdscon), GFP_KERNEL);
+	if (lvdscon == NULL)
+		return -ENOMEM;
+
+	lvdscon->panel = panel;
+
+	connector = &lvdscon->connector.connector;
+	connector->display_info.width_mm = panel->width_mm;
+	connector->display_info.height_mm = panel->height_mm;
+
+	ret = drm_connector_init(rcdu->ddev, connector, &connector_funcs,
+				 DRM_MODE_CONNECTOR_LVDS);
+	if (ret < 0)
+		return ret;
+
+	drm_connector_helper_add(connector, &connector_helper_funcs);
+	ret = drm_sysfs_connector_add(connector);
+	if (ret < 0)
+		return ret;
+
+	drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
+	drm_object_property_set_value(&connector->base,
+		rcdu->ddev->mode_config.dpms_property, DRM_MODE_DPMS_OFF);
+
+	ret = drm_mode_connector_attach_encoder(connector, &renc->encoder);
+	if (ret < 0)
+		return ret;
+
+	connector->encoder = &renc->encoder;
+	lvdscon->connector.encoder = renc;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.h b/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.h
new file mode 100644
index 0000000..bff8683
--- /dev/null
+++ b/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.h
@@ -0,0 +1,25 @@
+/*
+ * rcar_du_lvdscon.h  --  R-Car Display Unit LVDS Connector
+ *
+ * Copyright (C) 2013 Renesas Corporation
+ *
+ * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __RCAR_DU_LVDSCON_H__
+#define __RCAR_DU_LVDSCON_H__
+
+struct rcar_du_device;
+struct rcar_du_encoder;
+struct rcar_du_panel_data;
+
+int rcar_du_lvds_connector_init(struct rcar_du_device *rcdu,
+				struct rcar_du_encoder *renc,
+				const struct rcar_du_panel_data *panel);
+
+#endif /* __RCAR_DU_LVDSCON_H__ */
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_lvdsenc.c b/drivers/gpu/drm/rcar-du/rcar_du_lvdsenc.c
new file mode 100644
index 0000000..a0f6a17
--- /dev/null
+++ b/drivers/gpu/drm/rcar-du/rcar_du_lvdsenc.c
@@ -0,0 +1,196 @@
+/*
+ * rcar_du_lvdsenc.c  --  R-Car Display Unit LVDS Encoder
+ *
+ * Copyright (C) 2013 Renesas Corporation
+ *
+ * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "rcar_du_drv.h"
+#include "rcar_du_encoder.h"
+#include "rcar_du_lvdsenc.h"
+#include "rcar_lvds_regs.h"
+
+struct rcar_du_lvdsenc {
+	struct rcar_du_device *dev;
+
+	unsigned int index;
+	void __iomem *mmio;
+	struct clk *clock;
+	int dpms;
+
+	enum rcar_lvds_input input;
+};
+
+static void rcar_lvds_write(struct rcar_du_lvdsenc *lvds, u32 reg, u32 data)
+{
+	iowrite32(data, lvds->mmio + reg);
+}
+
+static int rcar_du_lvdsenc_start(struct rcar_du_lvdsenc *lvds,
+				 struct rcar_du_crtc *rcrtc)
+{
+	const struct drm_display_mode *mode = &rcrtc->crtc.mode;
+	unsigned int freq = mode->clock;
+	u32 lvdcr0;
+	u32 pllcr;
+	int ret;
+
+	if (lvds->dpms == DRM_MODE_DPMS_ON)
+		return 0;
+
+	ret = clk_prepare_enable(lvds->clock);
+	if (ret < 0)
+		return ret;
+
+	/* PLL clock configuration */
+	if (freq <= 38000)
+		pllcr = LVDPLLCR_CEEN | LVDPLLCR_COSEL | LVDPLLCR_PLLDLYCNT_38M;
+	else if (freq <= 60000)
+		pllcr = LVDPLLCR_CEEN | LVDPLLCR_COSEL | LVDPLLCR_PLLDLYCNT_60M;
+	else if (freq <= 121000)
+		pllcr = LVDPLLCR_CEEN | LVDPLLCR_COSEL | LVDPLLCR_PLLDLYCNT_121M;
+	else
+		pllcr = LVDPLLCR_PLLDLYCNT_150M;
+
+	rcar_lvds_write(lvds, LVDPLLCR, pllcr);
+
+	/* Hardcode the channels and control signals routing for now.
+	 *
+	 * HSYNC -> CTRL0
+	 * VSYNC -> CTRL1
+	 * DISP  -> CTRL2
+	 * 0     -> CTRL3
+	 *
+	 * Channels 1 and 3 are switched on ES1.
+	 */
+	rcar_lvds_write(lvds, LVDCTRCR, LVDCTRCR_CTR3SEL_ZERO |
+			LVDCTRCR_CTR2SEL_DISP | LVDCTRCR_CTR1SEL_VSYNC |
+			LVDCTRCR_CTR0SEL_HSYNC);
+	rcar_lvds_write(lvds, LVDCHCR,
+			LVDCHCR_CHSEL_CH(0, 0) | LVDCHCR_CHSEL_CH(1, 3) |
+			LVDCHCR_CHSEL_CH(2, 2) | LVDCHCR_CHSEL_CH(3, 1));
+
+	/* Select the input, hardcode mode 0, enable LVDS operation and turn
+	 * bias circuitry on.
+	 */
+	lvdcr0 = LVDCR0_BEN | LVDCR0_LVEN;
+	if (rcrtc->index == 2)
+		lvdcr0 |= LVDCR0_DUSEL;
+	rcar_lvds_write(lvds, LVDCR0, lvdcr0);
+
+	/* Turn all the channels on. */
+	rcar_lvds_write(lvds, LVDCR1, LVDCR1_CHSTBY(3) | LVDCR1_CHSTBY(2) |
+			LVDCR1_CHSTBY(1) | LVDCR1_CHSTBY(0) | LVDCR1_CLKSTBY);
+
+	/* Turn the PLL on, wait for the startup delay, and turn the output
+	 * on.
+	 */
+	lvdcr0 |= LVDCR0_PLLEN;
+	rcar_lvds_write(lvds, LVDCR0, lvdcr0);
+
+	usleep_range(100, 150);
+
+	lvdcr0 |= LVDCR0_LVRES;
+	rcar_lvds_write(lvds, LVDCR0, lvdcr0);
+
+	lvds->dpms = DRM_MODE_DPMS_ON;
+	return 0;
+}
+
+static void rcar_du_lvdsenc_stop(struct rcar_du_lvdsenc *lvds)
+{
+	if (lvds->dpms == DRM_MODE_DPMS_OFF)
+		return;
+
+	rcar_lvds_write(lvds, LVDCR0, 0);
+	rcar_lvds_write(lvds, LVDCR1, 0);
+
+	clk_disable_unprepare(lvds->clock);
+
+	lvds->dpms = DRM_MODE_DPMS_OFF;
+}
+
+int rcar_du_lvdsenc_dpms(struct rcar_du_lvdsenc *lvds,
+			 struct drm_crtc *crtc, int mode)
+{
+	if (mode == DRM_MODE_DPMS_OFF) {
+		rcar_du_lvdsenc_stop(lvds);
+		return 0;
+	} else if (crtc) {
+		struct rcar_du_crtc *rcrtc = to_rcar_crtc(crtc);
+		return rcar_du_lvdsenc_start(lvds, rcrtc);
+	} else
+		return -EINVAL;
+}
+
+static int rcar_du_lvdsenc_get_resources(struct rcar_du_lvdsenc *lvds,
+					 struct platform_device *pdev)
+{
+	struct resource *mem;
+	char name[7];
+
+	sprintf(name, "lvds.%u", lvds->index);
+
+	mem = platform_get_resource_byname(pdev, IORESOURCE_MEM, name);
+	if (mem == NULL) {
+		dev_err(&pdev->dev, "failed to get memory resource for %s\n",
+			name);
+		return -EINVAL;
+	}
+
+	lvds->mmio = devm_ioremap_resource(&pdev->dev, mem);
+	if (lvds->mmio == NULL) {
+		dev_err(&pdev->dev, "failed to remap memory resource for %s\n",
+			name);
+		return -ENOMEM;
+	}
+
+	lvds->clock = devm_clk_get(&pdev->dev, name);
+	if (IS_ERR(lvds->clock)) {
+		dev_err(&pdev->dev, "failed to get clock for %s\n", name);
+		return PTR_ERR(lvds->clock);
+	}
+
+	return 0;
+}
+
+int rcar_du_lvdsenc_init(struct rcar_du_device *rcdu)
+{
+	struct platform_device *pdev = to_platform_device(rcdu->dev);
+	struct rcar_du_lvdsenc *lvds;
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < rcdu->info->num_lvds; ++i) {
+		lvds = devm_kzalloc(&pdev->dev, sizeof(*lvds), GFP_KERNEL);
+		if (lvds == NULL) {
+			dev_err(&pdev->dev, "failed to allocate private data\n");
+			return -ENOMEM;
+		}
+
+		lvds->dev = rcdu;
+		lvds->index = i;
+		lvds->input = i ? RCAR_LVDS_INPUT_DU1 : RCAR_LVDS_INPUT_DU0;
+		lvds->dpms = DRM_MODE_DPMS_OFF;
+
+		ret = rcar_du_lvdsenc_get_resources(lvds, pdev);
+		if (ret < 0)
+			return ret;
+
+		rcdu->lvds[i] = lvds;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_lvdsenc.h b/drivers/gpu/drm/rcar-du/rcar_du_lvdsenc.h
new file mode 100644
index 0000000..7051c6d
--- /dev/null
+++ b/drivers/gpu/drm/rcar-du/rcar_du_lvdsenc.h
@@ -0,0 +1,46 @@
+/*
+ * rcar_du_lvdsenc.h  --  R-Car Display Unit LVDS Encoder
+ *
+ * Copyright (C) 2013 Renesas Corporation
+ *
+ * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __RCAR_DU_LVDSENC_H__
+#define __RCAR_DU_LVDSENC_H__
+
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_data/rcar-du.h>
+
+struct rcar_drm_crtc;
+struct rcar_du_lvdsenc;
+
+enum rcar_lvds_input {
+	RCAR_LVDS_INPUT_DU0,
+	RCAR_LVDS_INPUT_DU1,
+	RCAR_LVDS_INPUT_DU2,
+};
+
+#if IS_ENABLED(CONFIG_DRM_RCAR_LVDS)
+int rcar_du_lvdsenc_init(struct rcar_du_device *rcdu);
+int rcar_du_lvdsenc_dpms(struct rcar_du_lvdsenc *lvds,
+			 struct drm_crtc *crtc, int mode);
+#else
+static inline int rcar_du_lvdsenc_init(struct rcar_du_device *rcdu)
+{
+	return 0;
+}
+static inline int rcar_du_lvdsenc_dpms(struct rcar_du_lvdsenc *lvds,
+				       struct drm_crtc *crtc, int mode)
+{
+	return 0;
+}
+#endif
+
+#endif /* __RCAR_DU_LVDSENC_H__ */
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_plane.c b/drivers/gpu/drm/rcar-du/rcar_du_plane.c
index a65f81d..5300064 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_plane.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_plane.c
@@ -36,90 +36,95 @@
 	return container_of(plane, struct rcar_du_kms_plane, plane)->hwplane;
 }
 
-static u32 rcar_du_plane_read(struct rcar_du_device *rcdu,
+static u32 rcar_du_plane_read(struct rcar_du_group *rgrp,
 			      unsigned int index, u32 reg)
 {
-	return rcar_du_read(rcdu, index * PLANE_OFF + reg);
+	return rcar_du_read(rgrp->dev,
+			    rgrp->mmio_offset + index * PLANE_OFF + reg);
 }
 
-static void rcar_du_plane_write(struct rcar_du_device *rcdu,
+static void rcar_du_plane_write(struct rcar_du_group *rgrp,
 				unsigned int index, u32 reg, u32 data)
 {
-	rcar_du_write(rcdu, index * PLANE_OFF + reg, data);
+	rcar_du_write(rgrp->dev, rgrp->mmio_offset + index * PLANE_OFF + reg,
+		      data);
 }
 
 int rcar_du_plane_reserve(struct rcar_du_plane *plane,
 			  const struct rcar_du_format_info *format)
 {
-	struct rcar_du_device *rcdu = plane->dev;
+	struct rcar_du_group *rgrp = plane->group;
 	unsigned int i;
 	int ret = -EBUSY;
 
-	mutex_lock(&rcdu->planes.lock);
+	mutex_lock(&rgrp->planes.lock);
 
-	for (i = 0; i < ARRAY_SIZE(rcdu->planes.planes); ++i) {
-		if (!(rcdu->planes.free & (1 << i)))
+	for (i = 0; i < ARRAY_SIZE(rgrp->planes.planes); ++i) {
+		if (!(rgrp->planes.free & (1 << i)))
 			continue;
 
 		if (format->planes == 1 ||
-		    rcdu->planes.free & (1 << ((i + 1) % 8)))
+		    rgrp->planes.free & (1 << ((i + 1) % 8)))
 			break;
 	}
 
-	if (i == ARRAY_SIZE(rcdu->planes.planes))
+	if (i == ARRAY_SIZE(rgrp->planes.planes))
 		goto done;
 
-	rcdu->planes.free &= ~(1 << i);
+	rgrp->planes.free &= ~(1 << i);
 	if (format->planes == 2)
-		rcdu->planes.free &= ~(1 << ((i + 1) % 8));
+		rgrp->planes.free &= ~(1 << ((i + 1) % 8));
 
 	plane->hwindex = i;
 
 	ret = 0;
 
 done:
-	mutex_unlock(&rcdu->planes.lock);
+	mutex_unlock(&rgrp->planes.lock);
 	return ret;
 }
 
 void rcar_du_plane_release(struct rcar_du_plane *plane)
 {
-	struct rcar_du_device *rcdu = plane->dev;
+	struct rcar_du_group *rgrp = plane->group;
 
 	if (plane->hwindex == -1)
 		return;
 
-	mutex_lock(&rcdu->planes.lock);
-	rcdu->planes.free |= 1 << plane->hwindex;
+	mutex_lock(&rgrp->planes.lock);
+	rgrp->planes.free |= 1 << plane->hwindex;
 	if (plane->format->planes == 2)
-		rcdu->planes.free |= 1 << ((plane->hwindex + 1) % 8);
-	mutex_unlock(&rcdu->planes.lock);
+		rgrp->planes.free |= 1 << ((plane->hwindex + 1) % 8);
+	mutex_unlock(&rgrp->planes.lock);
 
 	plane->hwindex = -1;
 }
 
 void rcar_du_plane_update_base(struct rcar_du_plane *plane)
 {
-	struct rcar_du_device *rcdu = plane->dev;
+	struct rcar_du_group *rgrp = plane->group;
 	unsigned int index = plane->hwindex;
 
-	/* According to the datasheet the Y position is expressed in raster line
-	 * units. However, 32bpp formats seem to require a doubled Y position
-	 * value. Similarly, for the second plane, NV12 and NV21 formats seem to
+	/* The Y position is expressed in raster line units and must be doubled
+	 * for 32bpp formats, according to the R8A7790 datasheet. No mention of
+	 * doubling the Y position is found in the R8A7779 datasheet, but the
+	 * rule seems to apply there as well.
+	 *
+	 * Similarly, for the second plane, NV12 and NV21 formats seem to
 	 * require a halved Y position value.
 	 */
-	rcar_du_plane_write(rcdu, index, PnSPXR, plane->src_x);
-	rcar_du_plane_write(rcdu, index, PnSPYR, plane->src_y *
+	rcar_du_plane_write(rgrp, index, PnSPXR, plane->src_x);
+	rcar_du_plane_write(rgrp, index, PnSPYR, plane->src_y *
 			    (plane->format->bpp == 32 ? 2 : 1));
-	rcar_du_plane_write(rcdu, index, PnDSA0R, plane->dma[0]);
+	rcar_du_plane_write(rgrp, index, PnDSA0R, plane->dma[0]);
 
 	if (plane->format->planes == 2) {
 		index = (index + 1) % 8;
 
-		rcar_du_plane_write(rcdu, index, PnSPXR, plane->src_x);
-		rcar_du_plane_write(rcdu, index, PnSPYR, plane->src_y *
+		rcar_du_plane_write(rgrp, index, PnSPXR, plane->src_x);
+		rcar_du_plane_write(rgrp, index, PnSPYR, plane->src_y *
 				    (plane->format->bpp == 16 ? 2 : 1) / 2);
-		rcar_du_plane_write(rcdu, index, PnDSA0R, plane->dma[1]);
+		rcar_du_plane_write(rgrp, index, PnDSA0R, plane->dma[1]);
 	}
 }
 
@@ -140,7 +145,7 @@
 static void rcar_du_plane_setup_mode(struct rcar_du_plane *plane,
 				     unsigned int index)
 {
-	struct rcar_du_device *rcdu = plane->dev;
+	struct rcar_du_group *rgrp = plane->group;
 	u32 colorkey;
 	u32 pnmr;
 
@@ -154,9 +159,9 @@
 	 * enable alpha-blending regardless of the X bit value.
 	 */
 	if (plane->format->fourcc != DRM_FORMAT_XRGB1555)
-		rcar_du_plane_write(rcdu, index, PnALPHAR, PnALPHAR_ABIT_0);
+		rcar_du_plane_write(rgrp, index, PnALPHAR, PnALPHAR_ABIT_0);
 	else
-		rcar_du_plane_write(rcdu, index, PnALPHAR,
+		rcar_du_plane_write(rgrp, index, PnALPHAR,
 				    PnALPHAR_ABIT_X | plane->alpha);
 
 	pnmr = PnMR_BM_MD | plane->format->pnmr;
@@ -172,14 +177,14 @@
 	if (plane->format->fourcc == DRM_FORMAT_YUYV)
 		pnmr |= PnMR_YCDF_YUYV;
 
-	rcar_du_plane_write(rcdu, index, PnMR, pnmr);
+	rcar_du_plane_write(rgrp, index, PnMR, pnmr);
 
 	switch (plane->format->fourcc) {
 	case DRM_FORMAT_RGB565:
 		colorkey = ((plane->colorkey & 0xf80000) >> 8)
 			 | ((plane->colorkey & 0x00fc00) >> 5)
 			 | ((plane->colorkey & 0x0000f8) >> 3);
-		rcar_du_plane_write(rcdu, index, PnTC2R, colorkey);
+		rcar_du_plane_write(rgrp, index, PnTC2R, colorkey);
 		break;
 
 	case DRM_FORMAT_ARGB1555:
@@ -187,12 +192,12 @@
 		colorkey = ((plane->colorkey & 0xf80000) >> 9)
 			 | ((plane->colorkey & 0x00f800) >> 6)
 			 | ((plane->colorkey & 0x0000f8) >> 3);
-		rcar_du_plane_write(rcdu, index, PnTC2R, colorkey);
+		rcar_du_plane_write(rgrp, index, PnTC2R, colorkey);
 		break;
 
 	case DRM_FORMAT_XRGB8888:
 	case DRM_FORMAT_ARGB8888:
-		rcar_du_plane_write(rcdu, index, PnTC3R,
+		rcar_du_plane_write(rgrp, index, PnTC3R,
 				    PnTC3R_CODE | (plane->colorkey & 0xffffff));
 		break;
 	}
@@ -201,7 +206,7 @@
 static void __rcar_du_plane_setup(struct rcar_du_plane *plane,
 				  unsigned int index)
 {
-	struct rcar_du_device *rcdu = plane->dev;
+	struct rcar_du_group *rgrp = plane->group;
 	u32 ddcr2 = PnDDCR2_CODE;
 	u32 ddcr4;
 	u32 mwr;
@@ -211,7 +216,7 @@
 	 * The data format is selected by the DDDF field in PnMR and the EDF
 	 * field in DDCR4.
 	 */
-	ddcr4 = rcar_du_plane_read(rcdu, index, PnDDCR4);
+	ddcr4 = rcar_du_plane_read(rgrp, index, PnDDCR4);
 	ddcr4 &= ~PnDDCR4_EDF_MASK;
 	ddcr4 |= plane->format->edf | PnDDCR4_CODE;
 
@@ -232,8 +237,8 @@
 		}
 	}
 
-	rcar_du_plane_write(rcdu, index, PnDDCR2, ddcr2);
-	rcar_du_plane_write(rcdu, index, PnDDCR4, ddcr4);
+	rcar_du_plane_write(rgrp, index, PnDDCR2, ddcr2);
+	rcar_du_plane_write(rgrp, index, PnDDCR4, ddcr4);
 
 	/* Memory pitch (expressed in pixels) */
 	if (plane->format->planes == 2)
@@ -241,19 +246,19 @@
 	else
 		mwr = plane->pitch * 8 / plane->format->bpp;
 
-	rcar_du_plane_write(rcdu, index, PnMWR, mwr);
+	rcar_du_plane_write(rgrp, index, PnMWR, mwr);
 
 	/* Destination position and size */
-	rcar_du_plane_write(rcdu, index, PnDSXR, plane->width);
-	rcar_du_plane_write(rcdu, index, PnDSYR, plane->height);
-	rcar_du_plane_write(rcdu, index, PnDPXR, plane->dst_x);
-	rcar_du_plane_write(rcdu, index, PnDPYR, plane->dst_y);
+	rcar_du_plane_write(rgrp, index, PnDSXR, plane->width);
+	rcar_du_plane_write(rgrp, index, PnDSYR, plane->height);
+	rcar_du_plane_write(rgrp, index, PnDPXR, plane->dst_x);
+	rcar_du_plane_write(rgrp, index, PnDPYR, plane->dst_y);
 
 	/* Wrap-around and blinking, disabled */
-	rcar_du_plane_write(rcdu, index, PnWASPR, 0);
-	rcar_du_plane_write(rcdu, index, PnWAMWR, 4095);
-	rcar_du_plane_write(rcdu, index, PnBTR, 0);
-	rcar_du_plane_write(rcdu, index, PnMLR, 0);
+	rcar_du_plane_write(rgrp, index, PnWASPR, 0);
+	rcar_du_plane_write(rgrp, index, PnWAMWR, 4095);
+	rcar_du_plane_write(rgrp, index, PnBTR, 0);
+	rcar_du_plane_write(rgrp, index, PnMLR, 0);
 }
 
 void rcar_du_plane_setup(struct rcar_du_plane *plane)
@@ -273,7 +278,7 @@
 		       uint32_t src_w, uint32_t src_h)
 {
 	struct rcar_du_plane *rplane = to_rcar_plane(plane);
-	struct rcar_du_device *rcdu = plane->dev->dev_private;
+	struct rcar_du_device *rcdu = rplane->group->dev;
 	const struct rcar_du_format_info *format;
 	unsigned int nplanes;
 	int ret;
@@ -316,26 +321,25 @@
 	rcar_du_plane_compute_base(rplane, fb);
 	rcar_du_plane_setup(rplane);
 
-	mutex_lock(&rcdu->planes.lock);
+	mutex_lock(&rplane->group->planes.lock);
 	rplane->enabled = true;
 	rcar_du_crtc_update_planes(rplane->crtc);
-	mutex_unlock(&rcdu->planes.lock);
+	mutex_unlock(&rplane->group->planes.lock);
 
 	return 0;
 }
 
 static int rcar_du_plane_disable(struct drm_plane *plane)
 {
-	struct rcar_du_device *rcdu = plane->dev->dev_private;
 	struct rcar_du_plane *rplane = to_rcar_plane(plane);
 
 	if (!rplane->enabled)
 		return 0;
 
-	mutex_lock(&rcdu->planes.lock);
+	mutex_lock(&rplane->group->planes.lock);
 	rplane->enabled = false;
 	rcar_du_crtc_update_planes(rplane->crtc);
-	mutex_unlock(&rcdu->planes.lock);
+	mutex_unlock(&rplane->group->planes.lock);
 
 	rcar_du_plane_release(rplane);
 
@@ -377,9 +381,7 @@
 static void rcar_du_plane_set_zpos(struct rcar_du_plane *plane,
 				   unsigned int zpos)
 {
-	struct rcar_du_device *rcdu = plane->dev;
-
-	mutex_lock(&rcdu->planes.lock);
+	mutex_lock(&plane->group->planes.lock);
 	if (plane->zpos == zpos)
 		goto done;
 
@@ -390,21 +392,21 @@
 	rcar_du_crtc_update_planes(plane->crtc);
 
 done:
-	mutex_unlock(&rcdu->planes.lock);
+	mutex_unlock(&plane->group->planes.lock);
 }
 
 static int rcar_du_plane_set_property(struct drm_plane *plane,
 				      struct drm_property *property,
 				      uint64_t value)
 {
-	struct rcar_du_device *rcdu = plane->dev->dev_private;
 	struct rcar_du_plane *rplane = to_rcar_plane(plane);
+	struct rcar_du_group *rgrp = rplane->group;
 
-	if (property == rcdu->planes.alpha)
+	if (property == rgrp->planes.alpha)
 		rcar_du_plane_set_alpha(rplane, value);
-	else if (property == rcdu->planes.colorkey)
+	else if (property == rgrp->planes.colorkey)
 		rcar_du_plane_set_colorkey(rplane, value);
-	else if (property == rcdu->planes.zpos)
+	else if (property == rgrp->planes.zpos)
 		rcar_du_plane_set_zpos(rplane, value);
 	else
 		return -EINVAL;
@@ -432,37 +434,39 @@
 	DRM_FORMAT_NV16,
 };
 
-int rcar_du_plane_init(struct rcar_du_device *rcdu)
+int rcar_du_planes_init(struct rcar_du_group *rgrp)
 {
+	struct rcar_du_planes *planes = &rgrp->planes;
+	struct rcar_du_device *rcdu = rgrp->dev;
 	unsigned int i;
 
-	mutex_init(&rcdu->planes.lock);
-	rcdu->planes.free = 0xff;
+	mutex_init(&planes->lock);
+	planes->free = 0xff;
 
-	rcdu->planes.alpha =
+	planes->alpha =
 		drm_property_create_range(rcdu->ddev, 0, "alpha", 0, 255);
-	if (rcdu->planes.alpha == NULL)
+	if (planes->alpha == NULL)
 		return -ENOMEM;
 
 	/* The color key is expressed as an RGB888 triplet stored in a 32-bit
 	 * integer in XRGB8888 format. Bit 24 is used as a flag to disable (0)
 	 * or enable source color keying (1).
 	 */
-	rcdu->planes.colorkey =
+	planes->colorkey =
 		drm_property_create_range(rcdu->ddev, 0, "colorkey",
 					  0, 0x01ffffff);
-	if (rcdu->planes.colorkey == NULL)
+	if (planes->colorkey == NULL)
 		return -ENOMEM;
 
-	rcdu->planes.zpos =
+	planes->zpos =
 		drm_property_create_range(rcdu->ddev, 0, "zpos", 1, 7);
-	if (rcdu->planes.zpos == NULL)
+	if (planes->zpos == NULL)
 		return -ENOMEM;
 
-	for (i = 0; i < ARRAY_SIZE(rcdu->planes.planes); ++i) {
-		struct rcar_du_plane *plane = &rcdu->planes.planes[i];
+	for (i = 0; i < ARRAY_SIZE(planes->planes); ++i) {
+		struct rcar_du_plane *plane = &planes->planes[i];
 
-		plane->dev = rcdu;
+		plane->group = rgrp;
 		plane->hwindex = -1;
 		plane->alpha = 255;
 		plane->colorkey = RCAR_DU_COLORKEY_NONE;
@@ -472,11 +476,16 @@
 	return 0;
 }
 
-int rcar_du_plane_register(struct rcar_du_device *rcdu)
+int rcar_du_planes_register(struct rcar_du_group *rgrp)
 {
+	struct rcar_du_planes *planes = &rgrp->planes;
+	struct rcar_du_device *rcdu = rgrp->dev;
+	unsigned int crtcs;
 	unsigned int i;
 	int ret;
 
+	crtcs = ((1 << rcdu->num_crtcs) - 1) & (3 << (2 * rgrp->index));
+
 	for (i = 0; i < RCAR_DU_NUM_KMS_PLANES; ++i) {
 		struct rcar_du_kms_plane *plane;
 
@@ -484,23 +493,22 @@
 		if (plane == NULL)
 			return -ENOMEM;
 
-		plane->hwplane = &rcdu->planes.planes[i + 2];
+		plane->hwplane = &planes->planes[i + 2];
 		plane->hwplane->zpos = 1;
 
-		ret = drm_plane_init(rcdu->ddev, &plane->plane,
-				     (1 << rcdu->num_crtcs) - 1,
+		ret = drm_plane_init(rcdu->ddev, &plane->plane, crtcs,
 				     &rcar_du_plane_funcs, formats,
 				     ARRAY_SIZE(formats), false);
 		if (ret < 0)
 			return ret;
 
 		drm_object_attach_property(&plane->plane.base,
-					   rcdu->planes.alpha, 255);
+					   planes->alpha, 255);
 		drm_object_attach_property(&plane->plane.base,
-					   rcdu->planes.colorkey,
+					   planes->colorkey,
 					   RCAR_DU_COLORKEY_NONE);
 		drm_object_attach_property(&plane->plane.base,
-					   rcdu->planes.zpos, 1);
+					   planes->zpos, 1);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_plane.h b/drivers/gpu/drm/rcar-du/rcar_du_plane.h
index 5397dba..f94f9ce 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_plane.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_plane.h
@@ -14,10 +14,13 @@
 #ifndef __RCAR_DU_PLANE_H__
 #define __RCAR_DU_PLANE_H__
 
-struct drm_crtc;
-struct drm_framebuffer;
-struct rcar_du_device;
+#include <linux/mutex.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc.h>
+
 struct rcar_du_format_info;
+struct rcar_du_group;
 
 /* The RCAR DU has 8 hardware planes, shared between KMS planes and CRTCs. As
  * using KMS planes requires at least one of the CRTCs being enabled, no more
@@ -30,7 +33,7 @@
 #define RCAR_DU_NUM_SW_PLANES		9
 
 struct rcar_du_plane {
-	struct rcar_du_device *dev;
+	struct rcar_du_group *group;
 	struct drm_crtc *crtc;
 
 	bool enabled;
@@ -54,8 +57,19 @@
 	unsigned int dst_y;
 };
 
-int rcar_du_plane_init(struct rcar_du_device *rcdu);
-int rcar_du_plane_register(struct rcar_du_device *rcdu);
+struct rcar_du_planes {
+	struct rcar_du_plane planes[RCAR_DU_NUM_SW_PLANES];
+	unsigned int free;
+	struct mutex lock;
+
+	struct drm_property *alpha;
+	struct drm_property *colorkey;
+	struct drm_property *zpos;
+};
+
+int rcar_du_planes_init(struct rcar_du_group *rgrp);
+int rcar_du_planes_register(struct rcar_du_group *rgrp);
+
 void rcar_du_plane_setup(struct rcar_du_plane *plane);
 void rcar_du_plane_update_base(struct rcar_du_plane *plane);
 void rcar_du_plane_compute_base(struct rcar_du_plane *plane,
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_regs.h b/drivers/gpu/drm/rcar-du/rcar_du_regs.h
index 69f21f1..73f7347 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_regs.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_regs.h
@@ -13,14 +13,15 @@
 #ifndef __RCAR_DU_REGS_H__
 #define __RCAR_DU_REGS_H__
 
-#define DISP2_REG_OFFSET	 0x30000
+#define DU0_REG_OFFSET		0x00000
+#define DU1_REG_OFFSET		0x30000
+#define DU2_REG_OFFSET		0x40000
 
 /* -----------------------------------------------------------------------------
  * Display Control Registers
  */
 
 #define DSYSR			0x00000	/* display 1 */
-#define D2SYSR			0x30000	/* display 2 */
 #define DSYSR_ILTS		(1 << 29)
 #define DSYSR_DSEC		(1 << 20)
 #define DSYSR_IUPD		(1 << 16)
@@ -35,7 +36,6 @@
 #define DSYSR_SCM_INT_VIDEO	(3 << 4)
 
 #define DSMR			0x00004
-#define D2SMR			0x30004
 #define DSMR_VSPM		(1 << 28)
 #define DSMR_ODPM		(1 << 27)
 #define DSMR_DIPM_DISP		(0 << 25)
@@ -60,7 +60,6 @@
 #define DSMR_CSY_MASK		(3 << 6)
 
 #define DSSR			0x00008
-#define D2SSR			0x30008
 #define DSSR_VC1FB_DSA0		(0 << 30)
 #define DSSR_VC1FB_DSA1		(1 << 30)
 #define DSSR_VC1FB_DSA2		(2 << 30)
@@ -80,7 +79,6 @@
 #define DSSR_ADC(n)		(1 << ((n)-1))
 
 #define DSRCR			0x0000c
-#define D2SRCR			0x3000c
 #define DSRCR_TVCL		(1 << 15)
 #define DSRCR_FRCL		(1 << 14)
 #define DSRCR_VBCL		(1 << 11)
@@ -90,7 +88,6 @@
 #define DSRCR_MASK		0x0000cbff
 
 #define DIER			0x00010
-#define D2IER			0x30010
 #define DIER_TVE		(1 << 15)
 #define DIER_FRE		(1 << 14)
 #define DIER_VBE		(1 << 11)
@@ -114,7 +111,6 @@
 #define DPPR_BPP32		(DPPR_BPP32_P1 | DPPR_BPP32_P2)	/* plane1 & 2 */
 
 #define DEFR			0x00020
-#define D2EFR			0x30020
 #define DEFR_CODE		(0x7773 << 16)
 #define DEFR_EXSL		(1 << 12)
 #define DEFR_EXVL		(1 << 11)
@@ -137,12 +133,10 @@
 #define DCPCR_DCE		(1 << 0)
 
 #define DEFR2			0x00034
-#define D2EFR2			0x30034
 #define DEFR2_CODE		(0x7775 << 16)
 #define DEFR2_DEFE2G		(1 << 0)
 
 #define DEFR3			0x00038
-#define D2EFR3			0x30038
 #define DEFR3_CODE		(0x7776 << 16)
 #define DEFR3_EVDA		(1 << 14)
 #define DEFR3_EVDM_1		(1 << 12)
@@ -153,7 +147,6 @@
 #define DEFR3_DEFE3		(1 << 0)
 
 #define DEFR4			0x0003c
-#define D2EFR4			0x3003c
 #define DEFR4_CODE		(0x7777 << 16)
 #define DEFR4_LRUO		(1 << 5)
 #define DEFR4_SPCE		(1 << 4)
@@ -205,6 +198,68 @@
 #define DEFR6_DEFAULT		(DEFR6_CODE | DEFR6_TCNE2)
 
 /* -----------------------------------------------------------------------------
+ * R8A7790-only Control Registers
+ */
+
+#define DD1SSR			0x20008
+#define DD1SSR_TVR		(1 << 15)
+#define DD1SSR_FRM		(1 << 14)
+#define DD1SSR_BUF		(1 << 12)
+#define DD1SSR_VBK		(1 << 11)
+#define DD1SSR_RINT		(1 << 9)
+#define DD1SSR_HBK		(1 << 8)
+#define DD1SSR_ADC(n)		(1 << ((n)-1))
+
+#define DD1SRCR			0x2000c
+#define DD1SRCR_TVR		(1 << 15)
+#define DD1SRCR_FRM		(1 << 14)
+#define DD1SRCR_BUF		(1 << 12)
+#define DD1SRCR_VBK		(1 << 11)
+#define DD1SRCR_RINT		(1 << 9)
+#define DD1SRCR_HBK		(1 << 8)
+#define DD1SRCR_ADC(n)		(1 << ((n)-1))
+
+#define DD1IER			0x20010
+#define DD1IER_TVR		(1 << 15)
+#define DD1IER_FRM		(1 << 14)
+#define DD1IER_BUF		(1 << 12)
+#define DD1IER_VBK		(1 << 11)
+#define DD1IER_RINT		(1 << 9)
+#define DD1IER_HBK		(1 << 8)
+#define DD1IER_ADC(n)		(1 << ((n)-1))
+
+#define DEFR8			0x20020
+#define DEFR8_CODE		(0x7790 << 16)
+#define DEFR8_VSCS		(1 << 6)
+#define DEFR8_DRGBS_DU(n)	((n) << 4)
+#define DEFR8_DRGBS_MASK	(3 << 4)
+#define DEFR8_DEFE8		(1 << 0)
+
+#define DOFLR			0x20024
+#define DOFLR_CODE		(0x7790 << 16)
+#define DOFLR_HSYCFL1		(1 << 13)
+#define DOFLR_VSYCFL1		(1 << 12)
+#define DOFLR_ODDFL1		(1 << 11)
+#define DOFLR_DISPFL1		(1 << 10)
+#define DOFLR_CDEFL1		(1 << 9)
+#define DOFLR_RGBFL1		(1 << 8)
+#define DOFLR_HSYCFL0		(1 << 5)
+#define DOFLR_VSYCFL0		(1 << 4)
+#define DOFLR_ODDFL0		(1 << 3)
+#define DOFLR_DISPFL0		(1 << 2)
+#define DOFLR_CDEFL0		(1 << 1)
+#define DOFLR_RGBFL0		(1 << 0)
+
+#define DIDSR			0x20028
+#define DIDSR_CODE		(0x7790 << 16)
+#define DIDSR_LCDS_DCLKIN(n)	(0 << (8 + (n) * 2))
+#define DIDSR_LCDS_LVDS0(n)	(2 << (8 + (n) * 2))
+#define DIDSR_LCDS_LVDS1(n)	(3 << (8 + (n) * 2))
+#define DIDSR_LCDS_MASK(n)	(3 << (8 + (n) * 2))
+#define DIDSR_PCDS_CLK(n, clk)	(clk << ((n) * 2))
+#define DIDSR_PCDS_MASK(n)	(3 << ((n) * 2))
+
+/* -----------------------------------------------------------------------------
  * Display Timing Generation Registers
  */
 
@@ -349,21 +404,34 @@
 #define APnMR_BM_AD		(2 << 4)	/* Auto Display Change Mode */
 
 #define APnMWR			0x0a104
+
+#define APnDSXR			0x0a110
+#define APnDSYR			0x0a114
+#define APnDPXR			0x0a118
+#define APnDPYR			0x0a11c
+
 #define APnDSA0R		0x0a120
 #define APnDSA1R		0x0a124
 #define APnDSA2R		0x0a128
+
+#define APnSPXR			0x0a130
+#define APnSPYR			0x0a134
+#define APnWASPR		0x0a138
+#define APnWAMWR		0x0a13c
+
+#define APnBTR			0x0a140
+
 #define APnMLR			0x0a150
+#define APnSWAPR		0x0a180
 
 /* -----------------------------------------------------------------------------
  * Display Capture Registers
  */
 
+#define DCMR			0x0c100
 #define DCMWR			0x0c104
-#define DC2MWR			0x0c204
 #define DCSAR			0x0c120
-#define DC2SAR			0x0c220
 #define DCMLR			0x0c150
-#define DC2MLR			0x0c250
 
 /* -----------------------------------------------------------------------------
  * Color Palette Registers
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vga.c b/drivers/gpu/drm/rcar-du/rcar_du_vga.c
deleted file mode 100644
index 327289e..0000000
--- a/drivers/gpu/drm/rcar-du/rcar_du_vga.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * rcar_du_vga.c  --  R-Car Display Unit VGA DAC and Connector
- *
- * Copyright (C) 2013 Renesas Corporation
- *
- * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <drm/drmP.h>
-#include <drm/drm_crtc.h>
-#include <drm/drm_crtc_helper.h>
-
-#include "rcar_du_drv.h"
-#include "rcar_du_kms.h"
-#include "rcar_du_vga.h"
-
-/* -----------------------------------------------------------------------------
- * Connector
- */
-
-static int rcar_du_vga_connector_get_modes(struct drm_connector *connector)
-{
-	return 0;
-}
-
-static int rcar_du_vga_connector_mode_valid(struct drm_connector *connector,
-					    struct drm_display_mode *mode)
-{
-	return MODE_OK;
-}
-
-static const struct drm_connector_helper_funcs connector_helper_funcs = {
-	.get_modes = rcar_du_vga_connector_get_modes,
-	.mode_valid = rcar_du_vga_connector_mode_valid,
-	.best_encoder = rcar_du_connector_best_encoder,
-};
-
-static void rcar_du_vga_connector_destroy(struct drm_connector *connector)
-{
-	drm_sysfs_connector_remove(connector);
-	drm_connector_cleanup(connector);
-}
-
-static enum drm_connector_status
-rcar_du_vga_connector_detect(struct drm_connector *connector, bool force)
-{
-	return connector_status_unknown;
-}
-
-static const struct drm_connector_funcs connector_funcs = {
-	.dpms = drm_helper_connector_dpms,
-	.detect = rcar_du_vga_connector_detect,
-	.fill_modes = drm_helper_probe_single_connector_modes,
-	.destroy = rcar_du_vga_connector_destroy,
-};
-
-static int rcar_du_vga_connector_init(struct rcar_du_device *rcdu,
-				      struct rcar_du_encoder *renc)
-{
-	struct rcar_du_connector *rcon;
-	struct drm_connector *connector;
-	int ret;
-
-	rcon = devm_kzalloc(rcdu->dev, sizeof(*rcon), GFP_KERNEL);
-	if (rcon == NULL)
-		return -ENOMEM;
-
-	connector = &rcon->connector;
-	connector->display_info.width_mm = 0;
-	connector->display_info.height_mm = 0;
-
-	ret = drm_connector_init(rcdu->ddev, connector, &connector_funcs,
-				 DRM_MODE_CONNECTOR_VGA);
-	if (ret < 0)
-		return ret;
-
-	drm_connector_helper_add(connector, &connector_helper_funcs);
-	ret = drm_sysfs_connector_add(connector);
-	if (ret < 0)
-		return ret;
-
-	drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
-	drm_object_property_set_value(&connector->base,
-		rcdu->ddev->mode_config.dpms_property, DRM_MODE_DPMS_OFF);
-
-	ret = drm_mode_connector_attach_encoder(connector, &renc->encoder);
-	if (ret < 0)
-		return ret;
-
-	connector->encoder = &renc->encoder;
-	rcon->encoder = renc;
-
-	return 0;
-}
-
-/* -----------------------------------------------------------------------------
- * Encoder
- */
-
-static void rcar_du_vga_encoder_dpms(struct drm_encoder *encoder, int mode)
-{
-}
-
-static bool rcar_du_vga_encoder_mode_fixup(struct drm_encoder *encoder,
-					   const struct drm_display_mode *mode,
-					   struct drm_display_mode *adjusted_mode)
-{
-	return true;
-}
-
-static const struct drm_encoder_helper_funcs encoder_helper_funcs = {
-	.dpms = rcar_du_vga_encoder_dpms,
-	.mode_fixup = rcar_du_vga_encoder_mode_fixup,
-	.prepare = rcar_du_encoder_mode_prepare,
-	.commit = rcar_du_encoder_mode_commit,
-	.mode_set = rcar_du_encoder_mode_set,
-};
-
-static const struct drm_encoder_funcs encoder_funcs = {
-	.destroy = drm_encoder_cleanup,
-};
-
-int rcar_du_vga_init(struct rcar_du_device *rcdu,
-		     const struct rcar_du_encoder_vga_data *data,
-		     unsigned int output)
-{
-	struct rcar_du_encoder *renc;
-	int ret;
-
-	renc = devm_kzalloc(rcdu->dev, sizeof(*renc), GFP_KERNEL);
-	if (renc == NULL)
-		return -ENOMEM;
-
-	renc->output = output;
-
-	ret = drm_encoder_init(rcdu->ddev, &renc->encoder, &encoder_funcs,
-			       DRM_MODE_ENCODER_DAC);
-	if (ret < 0)
-		return ret;
-
-	drm_encoder_helper_add(&renc->encoder, &encoder_helper_funcs);
-
-	return rcar_du_vga_connector_init(rcdu, renc);
-}
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vga.h b/drivers/gpu/drm/rcar-du/rcar_du_vga.h
deleted file mode 100644
index 66b4d2d..0000000
--- a/drivers/gpu/drm/rcar-du/rcar_du_vga.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * rcar_du_vga.h  --  R-Car Display Unit VGA DAC and Connector
- *
- * Copyright (C) 2013 Renesas Corporation
- *
- * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef __RCAR_DU_VGA_H__
-#define __RCAR_DU_VGA_H__
-
-struct rcar_du_device;
-struct rcar_du_encoder_vga_data;
-
-int rcar_du_vga_init(struct rcar_du_device *rcdu,
-		     const struct rcar_du_encoder_vga_data *data,
-		     unsigned int output);
-
-#endif /* __RCAR_DU_VGA_H__ */
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vgacon.c b/drivers/gpu/drm/rcar-du/rcar_du_vgacon.c
new file mode 100644
index 0000000..41d563a
--- /dev/null
+++ b/drivers/gpu/drm/rcar-du/rcar_du_vgacon.c
@@ -0,0 +1,96 @@
+/*
+ * rcar_du_vgacon.c  --  R-Car Display Unit VGA Connector
+ *
+ * Copyright (C) 2013 Renesas Corporation
+ *
+ * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_crtc_helper.h>
+
+#include "rcar_du_drv.h"
+#include "rcar_du_encoder.h"
+#include "rcar_du_kms.h"
+#include "rcar_du_vgacon.h"
+
+static int rcar_du_vga_connector_get_modes(struct drm_connector *connector)
+{
+	return 0;
+}
+
+static int rcar_du_vga_connector_mode_valid(struct drm_connector *connector,
+					    struct drm_display_mode *mode)
+{
+	return MODE_OK;
+}
+
+static const struct drm_connector_helper_funcs connector_helper_funcs = {
+	.get_modes = rcar_du_vga_connector_get_modes,
+	.mode_valid = rcar_du_vga_connector_mode_valid,
+	.best_encoder = rcar_du_connector_best_encoder,
+};
+
+static void rcar_du_vga_connector_destroy(struct drm_connector *connector)
+{
+	drm_sysfs_connector_remove(connector);
+	drm_connector_cleanup(connector);
+}
+
+static enum drm_connector_status
+rcar_du_vga_connector_detect(struct drm_connector *connector, bool force)
+{
+	return connector_status_connected;
+}
+
+static const struct drm_connector_funcs connector_funcs = {
+	.dpms = drm_helper_connector_dpms,
+	.detect = rcar_du_vga_connector_detect,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = rcar_du_vga_connector_destroy,
+};
+
+int rcar_du_vga_connector_init(struct rcar_du_device *rcdu,
+			       struct rcar_du_encoder *renc)
+{
+	struct rcar_du_connector *rcon;
+	struct drm_connector *connector;
+	int ret;
+
+	rcon = devm_kzalloc(rcdu->dev, sizeof(*rcon), GFP_KERNEL);
+	if (rcon == NULL)
+		return -ENOMEM;
+
+	connector = &rcon->connector;
+	connector->display_info.width_mm = 0;
+	connector->display_info.height_mm = 0;
+
+	ret = drm_connector_init(rcdu->ddev, connector, &connector_funcs,
+				 DRM_MODE_CONNECTOR_VGA);
+	if (ret < 0)
+		return ret;
+
+	drm_connector_helper_add(connector, &connector_helper_funcs);
+	ret = drm_sysfs_connector_add(connector);
+	if (ret < 0)
+		return ret;
+
+	drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
+	drm_object_property_set_value(&connector->base,
+		rcdu->ddev->mode_config.dpms_property, DRM_MODE_DPMS_OFF);
+
+	ret = drm_mode_connector_attach_encoder(connector, &renc->encoder);
+	if (ret < 0)
+		return ret;
+
+	connector->encoder = &renc->encoder;
+	rcon->encoder = renc;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vgacon.h b/drivers/gpu/drm/rcar-du/rcar_du_vgacon.h
new file mode 100644
index 0000000..b12b0cf
--- /dev/null
+++ b/drivers/gpu/drm/rcar-du/rcar_du_vgacon.h
@@ -0,0 +1,23 @@
+/*
+ * rcar_du_vgacon.h  --  R-Car Display Unit VGA Connector
+ *
+ * Copyright (C) 2013 Renesas Corporation
+ *
+ * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __RCAR_DU_VGACON_H__
+#define __RCAR_DU_VGACON_H__
+
+struct rcar_du_device;
+struct rcar_du_encoder;
+
+int rcar_du_vga_connector_init(struct rcar_du_device *rcdu,
+			       struct rcar_du_encoder *renc);
+
+#endif /* __RCAR_DU_VGACON_H__ */
diff --git a/drivers/gpu/drm/rcar-du/rcar_lvds_regs.h b/drivers/gpu/drm/rcar-du/rcar_lvds_regs.h
new file mode 100644
index 0000000..77cf928
--- /dev/null
+++ b/drivers/gpu/drm/rcar-du/rcar_lvds_regs.h
@@ -0,0 +1,69 @@
+/*
+ * rcar_lvds_regs.h  --  R-Car LVDS Interface Registers Definitions
+ *
+ * Copyright (C) 2013 Renesas Electronics Corporation
+ *
+ * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef __RCAR_LVDS_REGS_H__
+#define __RCAR_LVDS_REGS_H__
+
+#define LVDCR0				0x0000
+#define LVDCR0_DUSEL			(1 << 15)
+#define LVDCR0_DMD			(1 << 12)
+#define LVDCR0_LVMD_MASK		(0xf << 8)
+#define LVDCR0_LVMD_SHIFT		8
+#define LVDCR0_PLLEN			(1 << 4)
+#define LVDCR0_BEN			(1 << 2)
+#define LVDCR0_LVEN			(1 << 1)
+#define LVDCR0_LVRES			(1 << 0)
+
+#define LVDCR1				0x0004
+#define LVDCR1_CKSEL			(1 << 15)
+#define LVDCR1_CHSTBY(n)		(3 << (2 + (n) * 2))
+#define LVDCR1_CLKSTBY			(3 << 0)
+
+#define LVDPLLCR			0x0008
+#define LVDPLLCR_CEEN			(1 << 14)
+#define LVDPLLCR_FBEN			(1 << 13)
+#define LVDPLLCR_COSEL			(1 << 12)
+#define LVDPLLCR_PLLDLYCNT_150M		(0x1bf << 0)
+#define LVDPLLCR_PLLDLYCNT_121M		(0x22c << 0)
+#define LVDPLLCR_PLLDLYCNT_60M		(0x77b << 0)
+#define LVDPLLCR_PLLDLYCNT_38M		(0x69a << 0)
+#define LVDPLLCR_PLLDLYCNT_MASK		(0x7ff << 0)
+
+#define LVDCTRCR			0x000c
+#define LVDCTRCR_CTR3SEL_ZERO		(0 << 12)
+#define LVDCTRCR_CTR3SEL_ODD		(1 << 12)
+#define LVDCTRCR_CTR3SEL_CDE		(2 << 12)
+#define LVDCTRCR_CTR3SEL_MASK		(7 << 12)
+#define LVDCTRCR_CTR2SEL_DISP		(0 << 8)
+#define LVDCTRCR_CTR2SEL_ODD		(1 << 8)
+#define LVDCTRCR_CTR2SEL_CDE		(2 << 8)
+#define LVDCTRCR_CTR2SEL_HSYNC		(3 << 8)
+#define LVDCTRCR_CTR2SEL_VSYNC		(4 << 8)
+#define LVDCTRCR_CTR2SEL_MASK		(7 << 8)
+#define LVDCTRCR_CTR1SEL_VSYNC		(0 << 4)
+#define LVDCTRCR_CTR1SEL_DISP		(1 << 4)
+#define LVDCTRCR_CTR1SEL_ODD		(2 << 4)
+#define LVDCTRCR_CTR1SEL_CDE		(3 << 4)
+#define LVDCTRCR_CTR1SEL_HSYNC		(4 << 4)
+#define LVDCTRCR_CTR1SEL_MASK		(7 << 4)
+#define LVDCTRCR_CTR0SEL_HSYNC		(0 << 0)
+#define LVDCTRCR_CTR0SEL_VSYNC		(1 << 0)
+#define LVDCTRCR_CTR0SEL_DISP		(2 << 0)
+#define LVDCTRCR_CTR0SEL_ODD		(3 << 0)
+#define LVDCTRCR_CTR0SEL_CDE		(4 << 0)
+#define LVDCTRCR_CTR0SEL_MASK		(7 << 0)
+
+#define LVDCHCR				0x0010
+#define LVDCHCR_CHSEL_CH(n, c)		((((c) - (n)) & 3) << ((n) * 4))
+#define LVDCHCR_CHSEL_MASK(n)		(3 << ((n) * 4))
+
+#endif /* __RCAR_LVDS_REGS_H__ */
diff --git a/drivers/gpu/drm/savage/savage_bci.c b/drivers/gpu/drm/savage/savage_bci.c
index bd6b2cf..b17d071 100644
--- a/drivers/gpu/drm/savage/savage_bci.c
+++ b/drivers/gpu/drm/savage/savage_bci.c
@@ -1072,7 +1072,7 @@
 		drm_idlelock_release(&file_priv->master->lock);
 }
 
-struct drm_ioctl_desc savage_ioctls[] = {
+const struct drm_ioctl_desc savage_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(SAVAGE_BCI_INIT, savage_bci_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(SAVAGE_BCI_CMDBUF, savage_bci_cmdbuf, DRM_AUTH),
 	DRM_IOCTL_DEF_DRV(SAVAGE_BCI_EVENT_EMIT, savage_bci_event_emit, DRM_AUTH),
diff --git a/drivers/gpu/drm/savage/savage_drv.c b/drivers/gpu/drm/savage/savage_drv.c
index 71b2081..3c03021 100644
--- a/drivers/gpu/drm/savage/savage_drv.c
+++ b/drivers/gpu/drm/savage/savage_drv.c
@@ -42,7 +42,6 @@
 	.unlocked_ioctl = drm_ioctl,
 	.mmap = drm_mmap,
 	.poll = drm_poll,
-	.fasync = drm_fasync,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = drm_compat_ioctl,
 #endif
@@ -51,7 +50,7 @@
 
 static struct drm_driver driver = {
 	.driver_features =
-	    DRIVER_USE_AGP | DRIVER_USE_MTRR | DRIVER_HAVE_DMA | DRIVER_PCI_DMA,
+	    DRIVER_USE_AGP | DRIVER_HAVE_DMA | DRIVER_PCI_DMA,
 	.dev_priv_size = sizeof(drm_savage_buf_priv_t),
 	.load = savage_driver_load,
 	.firstopen = savage_driver_firstopen,
diff --git a/drivers/gpu/drm/savage/savage_drv.h b/drivers/gpu/drm/savage/savage_drv.h
index c05082a..335f8fc 100644
--- a/drivers/gpu/drm/savage/savage_drv.h
+++ b/drivers/gpu/drm/savage/savage_drv.h
@@ -104,7 +104,7 @@
 	S3_LAST
 };
 
-extern struct drm_ioctl_desc savage_ioctls[];
+extern const struct drm_ioctl_desc savage_ioctls[];
 extern int savage_max_ioctl;
 
 #define S3_SAVAGE3D_SERIES(chip)  ((chip>=S3_SAVAGE3D) && (chip<=S3_SAVAGE_MX))
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_crtc.c b/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
index 99e2034..54bad98 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
+++ b/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
@@ -465,7 +465,8 @@
 
 static int shmob_drm_crtc_page_flip(struct drm_crtc *crtc,
 				    struct drm_framebuffer *fb,
-				    struct drm_pending_vblank_event *event)
+				    struct drm_pending_vblank_event *event,
+				    uint32_t page_flip_flags)
 {
 	struct shmob_drm_crtc *scrtc = to_shmob_crtc(crtc);
 	struct drm_device *dev = scrtc->crtc.dev;
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_drv.c b/drivers/gpu/drm/shmobile/shmob_drm_drv.c
index 5f83f9a..0155518 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_drv.c
+++ b/drivers/gpu/drm/shmobile/shmob_drm_drv.c
@@ -257,7 +257,6 @@
 #endif
 	.poll		= drm_poll,
 	.read		= drm_read,
-	.fasync		= drm_fasync,
 	.llseek		= no_llseek,
 	.mmap		= drm_gem_cma_mmap,
 };
@@ -285,7 +284,7 @@
 	.gem_prime_mmap		= drm_gem_cma_prime_mmap,
 	.dumb_create		= drm_gem_cma_dumb_create,
 	.dumb_map_offset	= drm_gem_cma_dumb_map_offset,
-	.dumb_destroy		= drm_gem_cma_dumb_destroy,
+	.dumb_destroy		= drm_gem_dumb_destroy,
 	.fops			= &shmob_drm_fops,
 	.name			= "shmob-drm",
 	.desc			= "Renesas SH Mobile DRM",
diff --git a/drivers/gpu/drm/sis/sis_drv.c b/drivers/gpu/drm/sis/sis_drv.c
index 5a5325e..4383b74 100644
--- a/drivers/gpu/drm/sis/sis_drv.c
+++ b/drivers/gpu/drm/sis/sis_drv.c
@@ -72,7 +72,6 @@
 	.unlocked_ioctl = drm_ioctl,
 	.mmap = drm_mmap,
 	.poll = drm_poll,
-	.fasync = drm_fasync,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = drm_compat_ioctl,
 #endif
@@ -103,7 +102,7 @@
 }
 
 static struct drm_driver driver = {
-	.driver_features = DRIVER_USE_AGP | DRIVER_USE_MTRR,
+	.driver_features = DRIVER_USE_AGP,
 	.load = sis_driver_load,
 	.unload = sis_driver_unload,
 	.open = sis_driver_open,
diff --git a/drivers/gpu/drm/sis/sis_drv.h b/drivers/gpu/drm/sis/sis_drv.h
index 13b527b..c31c025 100644
--- a/drivers/gpu/drm/sis/sis_drv.h
+++ b/drivers/gpu/drm/sis/sis_drv.h
@@ -70,7 +70,7 @@
 				       struct drm_file *file_priv);
 extern void sis_lastclose(struct drm_device *dev);
 
-extern struct drm_ioctl_desc sis_ioctls[];
+extern const struct drm_ioctl_desc sis_ioctls[];
 extern int sis_max_ioctl;
 
 #endif
diff --git a/drivers/gpu/drm/sis/sis_mm.c b/drivers/gpu/drm/sis/sis_mm.c
index 9a43d98..01857d8 100644
--- a/drivers/gpu/drm/sis/sis_mm.c
+++ b/drivers/gpu/drm/sis/sis_mm.c
@@ -109,7 +109,8 @@
 	if (pool == AGP_TYPE) {
 		retval = drm_mm_insert_node(&dev_priv->agp_mm,
 					    &item->mm_node,
-					    mem->size, 0);
+					    mem->size, 0,
+					    DRM_MM_SEARCH_DEFAULT);
 		offset = item->mm_node.start;
 	} else {
 #if defined(CONFIG_FB_SIS) || defined(CONFIG_FB_SIS_MODULE)
@@ -121,7 +122,8 @@
 #else
 		retval = drm_mm_insert_node(&dev_priv->vram_mm,
 					    &item->mm_node,
-					    mem->size, 0);
+					    mem->size, 0,
+					    DRM_MM_SEARCH_DEFAULT);
 		offset = item->mm_node.start;
 #endif
 	}
@@ -348,7 +350,7 @@
 	return;
 }
 
-struct drm_ioctl_desc sis_ioctls[] = {
+const struct drm_ioctl_desc sis_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(SIS_FB_ALLOC, sis_fb_alloc, DRM_AUTH),
 	DRM_IOCTL_DEF_DRV(SIS_FB_FREE, sis_drm_free, DRM_AUTH),
 	DRM_IOCTL_DEF_DRV(SIS_AGP_INIT, sis_ioctl_agp_init, DRM_AUTH | DRM_MASTER | DRM_ROOT_ONLY),
diff --git a/drivers/gpu/drm/tdfx/tdfx_drv.c b/drivers/gpu/drm/tdfx/tdfx_drv.c
index ddfa743..3492ca5 100644
--- a/drivers/gpu/drm/tdfx/tdfx_drv.c
+++ b/drivers/gpu/drm/tdfx/tdfx_drv.c
@@ -48,7 +48,6 @@
 	.unlocked_ioctl = drm_ioctl,
 	.mmap = drm_mmap,
 	.poll = drm_poll,
-	.fasync = drm_fasync,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = drm_compat_ioctl,
 #endif
@@ -56,7 +55,6 @@
 };
 
 static struct drm_driver driver = {
-	.driver_features = DRIVER_USE_MTRR,
 	.fops = &tdfx_driver_fops,
 	.name = DRIVER_NAME,
 	.desc = DRIVER_DESC,
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
index 7418dcd..d36efc1 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
@@ -15,7 +15,7 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <linux/kfifo.h>
+#include "drm_flip_work.h"
 
 #include "tilcdc_drv.h"
 #include "tilcdc_regs.h"
@@ -35,21 +35,18 @@
 	struct drm_framebuffer *scanout[2];
 
 	/* for deferred fb unref's: */
-	DECLARE_KFIFO_PTR(unref_fifo, struct drm_framebuffer *);
-	struct work_struct work;
+	struct drm_flip_work unref_work;
 };
 #define to_tilcdc_crtc(x) container_of(x, struct tilcdc_crtc, base)
 
-static void unref_worker(struct work_struct *work)
+static void unref_worker(struct drm_flip_work *work, void *val)
 {
 	struct tilcdc_crtc *tilcdc_crtc =
-		container_of(work, struct tilcdc_crtc, work);
+		container_of(work, struct tilcdc_crtc, unref_work);
 	struct drm_device *dev = tilcdc_crtc->base.dev;
-	struct drm_framebuffer *fb;
 
 	mutex_lock(&dev->mode_config.mutex);
-	while (kfifo_get(&tilcdc_crtc->unref_fifo, &fb))
-		drm_framebuffer_unreference(fb);
+	drm_framebuffer_unreference(val);
 	mutex_unlock(&dev->mode_config.mutex);
 }
 
@@ -68,19 +65,14 @@
 	};
 	struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
+	struct tilcdc_drm_private *priv = dev->dev_private;
 
 	pm_runtime_get_sync(dev->dev);
 	tilcdc_write(dev, base_reg[n], tilcdc_crtc->start);
 	tilcdc_write(dev, ceil_reg[n], tilcdc_crtc->end);
 	if (tilcdc_crtc->scanout[n]) {
-		if (kfifo_put(&tilcdc_crtc->unref_fifo,
-				(const struct drm_framebuffer **)&tilcdc_crtc->scanout[n])) {
-			struct tilcdc_drm_private *priv = dev->dev_private;
-			queue_work(priv->wq, &tilcdc_crtc->work);
-		} else {
-			dev_err(dev->dev, "unref fifo full!\n");
-			drm_framebuffer_unreference(tilcdc_crtc->scanout[n]);
-		}
+		drm_flip_work_queue(&tilcdc_crtc->unref_work, tilcdc_crtc->scanout[n]);
+		drm_flip_work_commit(&tilcdc_crtc->unref_work, priv->wq);
 	}
 	tilcdc_crtc->scanout[n] = crtc->fb;
 	drm_framebuffer_reference(tilcdc_crtc->scanout[n]);
@@ -149,14 +141,15 @@
 	WARN_ON(tilcdc_crtc->dpms == DRM_MODE_DPMS_ON);
 
 	drm_crtc_cleanup(crtc);
-	WARN_ON(!kfifo_is_empty(&tilcdc_crtc->unref_fifo));
-	kfifo_free(&tilcdc_crtc->unref_fifo);
+	drm_flip_work_cleanup(&tilcdc_crtc->unref_work);
+
 	kfree(tilcdc_crtc);
 }
 
 static int tilcdc_crtc_page_flip(struct drm_crtc *crtc,
 		struct drm_framebuffer *fb,
-		struct drm_pending_vblank_event *event)
+		struct drm_pending_vblank_event *event,
+		uint32_t page_flip_flags)
 {
 	struct tilcdc_crtc *tilcdc_crtc = to_tilcdc_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
@@ -379,7 +372,12 @@
 	else
 		tilcdc_clear(dev, LCDC_RASTER_TIMING_2_REG, LCDC_SYNC_EDGE);
 
-	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+	/*
+	 * use value from adjusted_mode here as this might have been
+	 * changed as part of the fixup for slave encoders to solve the
+	 * issue where tilcdc timings are not VESA compliant
+	 */
+	if (adjusted_mode->flags & DRM_MODE_FLAG_NHSYNC)
 		tilcdc_set(dev, LCDC_RASTER_TIMING_2_REG, LCDC_INVERT_HSYNC);
 	else
 		tilcdc_clear(dev, LCDC_RASTER_TIMING_2_REG, LCDC_INVERT_HSYNC);
@@ -666,14 +664,13 @@
 	tilcdc_crtc->dpms = DRM_MODE_DPMS_OFF;
 	init_waitqueue_head(&tilcdc_crtc->frame_done_wq);
 
-	ret = kfifo_alloc(&tilcdc_crtc->unref_fifo, 16, GFP_KERNEL);
+	ret = drm_flip_work_init(&tilcdc_crtc->unref_work, 16,
+			"unref", unref_worker);
 	if (ret) {
 		dev_err(dev->dev, "could not allocate unref FIFO\n");
 		goto fail;
 	}
 
-	INIT_WORK(&tilcdc_crtc->work, unref_worker);
-
 	ret = drm_crtc_init(dev, crtc, &tilcdc_crtc_funcs);
 	if (ret < 0)
 		goto fail;
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.c b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
index 40b71da..116da19 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_drv.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
@@ -497,7 +497,6 @@
 #endif
 	.poll               = drm_poll,
 	.read               = drm_read,
-	.fasync             = drm_fasync,
 	.llseek             = no_llseek,
 	.mmap               = drm_gem_cma_mmap,
 };
@@ -519,7 +518,7 @@
 	.gem_vm_ops         = &drm_gem_cma_vm_ops,
 	.dumb_create        = drm_gem_cma_dumb_create,
 	.dumb_map_offset    = drm_gem_cma_dumb_map_offset,
-	.dumb_destroy       = drm_gem_cma_dumb_destroy,
+	.dumb_destroy       = drm_gem_dumb_destroy,
 #ifdef CONFIG_DEBUG_FS
 	.debugfs_init       = tilcdc_debugfs_init,
 	.debugfs_cleanup    = tilcdc_debugfs_cleanup,
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_slave.c b/drivers/gpu/drm/tilcdc/tilcdc_slave.c
index dfffaf0..23b3203 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_slave.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_slave.c
@@ -73,13 +73,38 @@
 	tilcdc_crtc_set_panel_info(encoder->crtc, &slave_info);
 }
 
+static bool slave_encoder_fixup(struct drm_encoder *encoder,
+		const struct drm_display_mode *mode,
+		struct drm_display_mode *adjusted_mode)
+{
+	/*
+	 * tilcdc does not generate VESA-complient sync but aligns
+	 * VS on the second edge of HS instead of first edge.
+	 * We use adjusted_mode, to fixup sync by aligning both rising
+	 * edges and add HSKEW offset to let the slave encoder fix it up.
+	 */
+	adjusted_mode->hskew = mode->hsync_end - mode->hsync_start;
+	adjusted_mode->flags |= DRM_MODE_FLAG_HSKEW;
+
+	if (mode->flags & DRM_MODE_FLAG_NHSYNC) {
+		adjusted_mode->flags |= DRM_MODE_FLAG_PHSYNC;
+		adjusted_mode->flags &= ~DRM_MODE_FLAG_NHSYNC;
+	} else {
+		adjusted_mode->flags |= DRM_MODE_FLAG_NHSYNC;
+		adjusted_mode->flags &= ~DRM_MODE_FLAG_PHSYNC;
+	}
+
+	return drm_i2c_encoder_mode_fixup(encoder, mode, adjusted_mode);
+}
+
+
 static const struct drm_encoder_funcs slave_encoder_funcs = {
 		.destroy        = slave_encoder_destroy,
 };
 
 static const struct drm_encoder_helper_funcs slave_encoder_helper_funcs = {
 		.dpms           = drm_i2c_encoder_dpms,
-		.mode_fixup     = drm_i2c_encoder_mode_fixup,
+		.mode_fixup     = slave_encoder_fixup,
 		.prepare        = slave_encoder_prepare,
 		.commit         = drm_i2c_encoder_commit,
 		.mode_set       = drm_i2c_encoder_mode_set,
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index cb9dd67..f1a857e 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -45,7 +45,6 @@
 #define TTM_DEBUG(fmt, arg...)
 #define TTM_BO_HASH_ORDER 13
 
-static int ttm_bo_setup_vm(struct ttm_buffer_object *bo);
 static int ttm_bo_swapout(struct ttm_mem_shrink *shrink);
 static void ttm_bo_global_kobj_release(struct kobject *kobj);
 
@@ -615,13 +614,7 @@
 	struct ttm_bo_device *bdev = bo->bdev;
 	struct ttm_mem_type_manager *man = &bdev->man[bo->mem.mem_type];
 
-	write_lock(&bdev->vm_lock);
-	if (likely(bo->vm_node != NULL)) {
-		rb_erase(&bo->vm_rb, &bdev->addr_space_rb);
-		drm_mm_put_block(bo->vm_node);
-		bo->vm_node = NULL;
-	}
-	write_unlock(&bdev->vm_lock);
+	drm_vma_offset_remove(&bdev->vma_manager, &bo->vma_node);
 	ttm_mem_io_lock(man, false);
 	ttm_mem_io_free_vm(bo);
 	ttm_mem_io_unlock(man);
@@ -1129,6 +1122,7 @@
 	bo->resv = &bo->ttm_resv;
 	reservation_object_init(bo->resv);
 	atomic_inc(&bo->glob->bo_count);
+	drm_vma_node_reset(&bo->vma_node);
 
 	ret = ttm_bo_check_placement(bo, placement);
 
@@ -1139,7 +1133,8 @@
 	if (likely(!ret) &&
 	    (bo->type == ttm_bo_type_device ||
 	     bo->type == ttm_bo_type_sg))
-		ret = ttm_bo_setup_vm(bo);
+		ret = drm_vma_offset_add(&bdev->vma_manager, &bo->vma_node,
+					 bo->mem.num_pages);
 
 	locked = ww_mutex_trylock(&bo->resv->lock);
 	WARN_ON(!locked);
@@ -1424,10 +1419,7 @@
 		TTM_DEBUG("Swap list was clean\n");
 	spin_unlock(&glob->lru_lock);
 
-	BUG_ON(!drm_mm_clean(&bdev->addr_space_mm));
-	write_lock(&bdev->vm_lock);
-	drm_mm_takedown(&bdev->addr_space_mm);
-	write_unlock(&bdev->vm_lock);
+	drm_vma_offset_manager_destroy(&bdev->vma_manager);
 
 	return ret;
 }
@@ -1441,7 +1433,6 @@
 {
 	int ret = -EINVAL;
 
-	rwlock_init(&bdev->vm_lock);
 	bdev->driver = driver;
 
 	memset(bdev->man, 0, sizeof(bdev->man));
@@ -1454,9 +1445,8 @@
 	if (unlikely(ret != 0))
 		goto out_no_sys;
 
-	bdev->addr_space_rb = RB_ROOT;
-	drm_mm_init(&bdev->addr_space_mm, file_page_offset, 0x10000000);
-
+	drm_vma_offset_manager_init(&bdev->vma_manager, file_page_offset,
+				    0x10000000);
 	INIT_DELAYED_WORK(&bdev->wq, ttm_bo_delayed_workqueue);
 	INIT_LIST_HEAD(&bdev->ddestroy);
 	bdev->dev_mapping = NULL;
@@ -1498,12 +1488,8 @@
 void ttm_bo_unmap_virtual_locked(struct ttm_buffer_object *bo)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
-	loff_t offset = (loff_t) bo->addr_space_offset;
-	loff_t holelen = ((loff_t) bo->mem.num_pages) << PAGE_SHIFT;
 
-	if (!bdev->dev_mapping)
-		return;
-	unmap_mapping_range(bdev->dev_mapping, offset, holelen, 1);
+	drm_vma_node_unmap(&bo->vma_node, bdev->dev_mapping);
 	ttm_mem_io_free_vm(bo);
 }
 
@@ -1520,78 +1506,6 @@
 
 EXPORT_SYMBOL(ttm_bo_unmap_virtual);
 
-static void ttm_bo_vm_insert_rb(struct ttm_buffer_object *bo)
-{
-	struct ttm_bo_device *bdev = bo->bdev;
-	struct rb_node **cur = &bdev->addr_space_rb.rb_node;
-	struct rb_node *parent = NULL;
-	struct ttm_buffer_object *cur_bo;
-	unsigned long offset = bo->vm_node->start;
-	unsigned long cur_offset;
-
-	while (*cur) {
-		parent = *cur;
-		cur_bo = rb_entry(parent, struct ttm_buffer_object, vm_rb);
-		cur_offset = cur_bo->vm_node->start;
-		if (offset < cur_offset)
-			cur = &parent->rb_left;
-		else if (offset > cur_offset)
-			cur = &parent->rb_right;
-		else
-			BUG();
-	}
-
-	rb_link_node(&bo->vm_rb, parent, cur);
-	rb_insert_color(&bo->vm_rb, &bdev->addr_space_rb);
-}
-
-/**
- * ttm_bo_setup_vm:
- *
- * @bo: the buffer to allocate address space for
- *
- * Allocate address space in the drm device so that applications
- * can mmap the buffer and access the contents. This only
- * applies to ttm_bo_type_device objects as others are not
- * placed in the drm device address space.
- */
-
-static int ttm_bo_setup_vm(struct ttm_buffer_object *bo)
-{
-	struct ttm_bo_device *bdev = bo->bdev;
-	int ret;
-
-retry_pre_get:
-	ret = drm_mm_pre_get(&bdev->addr_space_mm);
-	if (unlikely(ret != 0))
-		return ret;
-
-	write_lock(&bdev->vm_lock);
-	bo->vm_node = drm_mm_search_free(&bdev->addr_space_mm,
-					 bo->mem.num_pages, 0, 0);
-
-	if (unlikely(bo->vm_node == NULL)) {
-		ret = -ENOMEM;
-		goto out_unlock;
-	}
-
-	bo->vm_node = drm_mm_get_block_atomic(bo->vm_node,
-					      bo->mem.num_pages, 0);
-
-	if (unlikely(bo->vm_node == NULL)) {
-		write_unlock(&bdev->vm_lock);
-		goto retry_pre_get;
-	}
-
-	ttm_bo_vm_insert_rb(bo);
-	write_unlock(&bdev->vm_lock);
-	bo->addr_space_offset = ((uint64_t) bo->vm_node->start) << PAGE_SHIFT;
-
-	return 0;
-out_unlock:
-	write_unlock(&bdev->vm_lock);
-	return ret;
-}
 
 int ttm_bo_wait(struct ttm_buffer_object *bo,
 		bool lazy, bool interruptible, bool no_wait)
diff --git a/drivers/gpu/drm/ttm/ttm_bo_manager.c b/drivers/gpu/drm/ttm/ttm_bo_manager.c
index e4367f9..c58eba33 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_manager.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_manager.c
@@ -61,28 +61,25 @@
 	lpfn = placement->lpfn;
 	if (!lpfn)
 		lpfn = man->size;
-	do {
-		ret = drm_mm_pre_get(mm);
-		if (unlikely(ret))
-			return ret;
 
-		spin_lock(&rman->lock);
-		node = drm_mm_search_free_in_range(mm,
-					mem->num_pages, mem->page_alignment,
-					placement->fpfn, lpfn, 1);
-		if (unlikely(node == NULL)) {
-			spin_unlock(&rman->lock);
-			return 0;
-		}
-		node = drm_mm_get_block_atomic_range(node, mem->num_pages,
-						     mem->page_alignment,
-						     placement->fpfn,
-						     lpfn);
-		spin_unlock(&rman->lock);
-	} while (node == NULL);
+	node = kzalloc(sizeof(*node), GFP_KERNEL);
+	if (!node)
+		return -ENOMEM;
 
-	mem->mm_node = node;
-	mem->start = node->start;
+	spin_lock(&rman->lock);
+	ret = drm_mm_insert_node_in_range(mm, node, mem->num_pages,
+					  mem->page_alignment,
+					  placement->fpfn, lpfn,
+					  DRM_MM_SEARCH_BEST);
+	spin_unlock(&rman->lock);
+
+	if (unlikely(ret)) {
+		kfree(node);
+	} else {
+		mem->mm_node = node;
+		mem->start = node->start;
+	}
+
 	return 0;
 }
 
@@ -93,8 +90,10 @@
 
 	if (mem->mm_node) {
 		spin_lock(&rman->lock);
-		drm_mm_put_block(mem->mm_node);
+		drm_mm_remove_node(mem->mm_node);
 		spin_unlock(&rman->lock);
+
+		kfree(mem->mm_node);
 		mem->mm_node = NULL;
 	}
 }
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 319cf41..7cc904d 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -30,6 +30,7 @@
 
 #include <drm/ttm/ttm_bo_driver.h>
 #include <drm/ttm/ttm_placement.h>
+#include <drm/drm_vma_manager.h>
 #include <linux/io.h>
 #include <linux/highmem.h>
 #include <linux/wait.h>
@@ -450,7 +451,7 @@
 	INIT_LIST_HEAD(&fbo->lru);
 	INIT_LIST_HEAD(&fbo->swap);
 	INIT_LIST_HEAD(&fbo->io_reserve_lru);
-	fbo->vm_node = NULL;
+	drm_vma_node_reset(&fbo->vma_node);
 	atomic_set(&fbo->cpu_writers, 0);
 
 	spin_lock(&bdev->fence_lock);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 3df9f16..1006c15 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -33,6 +33,7 @@
 #include <ttm/ttm_module.h>
 #include <ttm/ttm_bo_driver.h>
 #include <ttm/ttm_placement.h>
+#include <drm/drm_vma_manager.h>
 #include <linux/mm.h>
 #include <linux/rbtree.h>
 #include <linux/module.h>
@@ -40,37 +41,6 @@
 
 #define TTM_BO_VM_NUM_PREFAULT 16
 
-static struct ttm_buffer_object *ttm_bo_vm_lookup_rb(struct ttm_bo_device *bdev,
-						     unsigned long page_start,
-						     unsigned long num_pages)
-{
-	struct rb_node *cur = bdev->addr_space_rb.rb_node;
-	unsigned long cur_offset;
-	struct ttm_buffer_object *bo;
-	struct ttm_buffer_object *best_bo = NULL;
-
-	while (likely(cur != NULL)) {
-		bo = rb_entry(cur, struct ttm_buffer_object, vm_rb);
-		cur_offset = bo->vm_node->start;
-		if (page_start >= cur_offset) {
-			cur = cur->rb_right;
-			best_bo = bo;
-			if (page_start == cur_offset)
-				break;
-		} else
-			cur = cur->rb_left;
-	}
-
-	if (unlikely(best_bo == NULL))
-		return NULL;
-
-	if (unlikely((best_bo->vm_node->start + best_bo->num_pages) <
-		     (page_start + num_pages)))
-		return NULL;
-
-	return best_bo;
-}
-
 static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
@@ -146,9 +116,9 @@
 	}
 
 	page_offset = ((address - vma->vm_start) >> PAGE_SHIFT) +
-	    bo->vm_node->start - vma->vm_pgoff;
+	    drm_vma_node_start(&bo->vma_node) - vma->vm_pgoff;
 	page_last = vma_pages(vma) +
-	    bo->vm_node->start - vma->vm_pgoff;
+	    drm_vma_node_start(&bo->vma_node) - vma->vm_pgoff;
 
 	if (unlikely(page_offset >= bo->num_pages)) {
 		retval = VM_FAULT_SIGBUS;
@@ -249,6 +219,30 @@
 	.close = ttm_bo_vm_close
 };
 
+static struct ttm_buffer_object *ttm_bo_vm_lookup(struct ttm_bo_device *bdev,
+						  unsigned long offset,
+						  unsigned long pages)
+{
+	struct drm_vma_offset_node *node;
+	struct ttm_buffer_object *bo = NULL;
+
+	drm_vma_offset_lock_lookup(&bdev->vma_manager);
+
+	node = drm_vma_offset_lookup_locked(&bdev->vma_manager, offset, pages);
+	if (likely(node)) {
+		bo = container_of(node, struct ttm_buffer_object, vma_node);
+		if (!kref_get_unless_zero(&bo->kref))
+			bo = NULL;
+	}
+
+	drm_vma_offset_unlock_lookup(&bdev->vma_manager);
+
+	if (!bo)
+		pr_err("Could not find buffer object to map\n");
+
+	return bo;
+}
+
 int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma,
 		struct ttm_bo_device *bdev)
 {
@@ -256,17 +250,9 @@
 	struct ttm_buffer_object *bo;
 	int ret;
 
-	read_lock(&bdev->vm_lock);
-	bo = ttm_bo_vm_lookup_rb(bdev, vma->vm_pgoff,
-				 vma_pages(vma));
-	if (likely(bo != NULL) && !kref_get_unless_zero(&bo->kref))
-		bo = NULL;
-	read_unlock(&bdev->vm_lock);
-
-	if (unlikely(bo == NULL)) {
-		pr_err("Could not find buffer object to map\n");
+	bo = ttm_bo_vm_lookup(bdev, vma->vm_pgoff, vma_pages(vma));
+	if (unlikely(!bo))
 		return -EINVAL;
-	}
 
 	driver = bo->bdev->driver;
 	if (unlikely(!driver->verify_access)) {
@@ -304,162 +290,3 @@
 	return 0;
 }
 EXPORT_SYMBOL(ttm_fbdev_mmap);
-
-
-ssize_t ttm_bo_io(struct ttm_bo_device *bdev, struct file *filp,
-		  const char __user *wbuf, char __user *rbuf, size_t count,
-		  loff_t *f_pos, bool write)
-{
-	struct ttm_buffer_object *bo;
-	struct ttm_bo_driver *driver;
-	struct ttm_bo_kmap_obj map;
-	unsigned long dev_offset = (*f_pos >> PAGE_SHIFT);
-	unsigned long kmap_offset;
-	unsigned long kmap_end;
-	unsigned long kmap_num;
-	size_t io_size;
-	unsigned int page_offset;
-	char *virtual;
-	int ret;
-	bool no_wait = false;
-	bool dummy;
-
-	read_lock(&bdev->vm_lock);
-	bo = ttm_bo_vm_lookup_rb(bdev, dev_offset, 1);
-	if (likely(bo != NULL))
-		ttm_bo_reference(bo);
-	read_unlock(&bdev->vm_lock);
-
-	if (unlikely(bo == NULL))
-		return -EFAULT;
-
-	driver = bo->bdev->driver;
-	if (unlikely(!driver->verify_access)) {
-		ret = -EPERM;
-		goto out_unref;
-	}
-
-	ret = driver->verify_access(bo, filp);
-	if (unlikely(ret != 0))
-		goto out_unref;
-
-	kmap_offset = dev_offset - bo->vm_node->start;
-	if (unlikely(kmap_offset >= bo->num_pages)) {
-		ret = -EFBIG;
-		goto out_unref;
-	}
-
-	page_offset = *f_pos & ~PAGE_MASK;
-	io_size = bo->num_pages - kmap_offset;
-	io_size = (io_size << PAGE_SHIFT) - page_offset;
-	if (count < io_size)
-		io_size = count;
-
-	kmap_end = (*f_pos + count - 1) >> PAGE_SHIFT;
-	kmap_num = kmap_end - kmap_offset + 1;
-
-	ret = ttm_bo_reserve(bo, true, no_wait, false, 0);
-
-	switch (ret) {
-	case 0:
-		break;
-	case -EBUSY:
-		ret = -EAGAIN;
-		goto out_unref;
-	default:
-		goto out_unref;
-	}
-
-	ret = ttm_bo_kmap(bo, kmap_offset, kmap_num, &map);
-	if (unlikely(ret != 0)) {
-		ttm_bo_unreserve(bo);
-		goto out_unref;
-	}
-
-	virtual = ttm_kmap_obj_virtual(&map, &dummy);
-	virtual += page_offset;
-
-	if (write)
-		ret = copy_from_user(virtual, wbuf, io_size);
-	else
-		ret = copy_to_user(rbuf, virtual, io_size);
-
-	ttm_bo_kunmap(&map);
-	ttm_bo_unreserve(bo);
-	ttm_bo_unref(&bo);
-
-	if (unlikely(ret != 0))
-		return -EFBIG;
-
-	*f_pos += io_size;
-
-	return io_size;
-out_unref:
-	ttm_bo_unref(&bo);
-	return ret;
-}
-
-ssize_t ttm_bo_fbdev_io(struct ttm_buffer_object *bo, const char __user *wbuf,
-			char __user *rbuf, size_t count, loff_t *f_pos,
-			bool write)
-{
-	struct ttm_bo_kmap_obj map;
-	unsigned long kmap_offset;
-	unsigned long kmap_end;
-	unsigned long kmap_num;
-	size_t io_size;
-	unsigned int page_offset;
-	char *virtual;
-	int ret;
-	bool no_wait = false;
-	bool dummy;
-
-	kmap_offset = (*f_pos >> PAGE_SHIFT);
-	if (unlikely(kmap_offset >= bo->num_pages))
-		return -EFBIG;
-
-	page_offset = *f_pos & ~PAGE_MASK;
-	io_size = bo->num_pages - kmap_offset;
-	io_size = (io_size << PAGE_SHIFT) - page_offset;
-	if (count < io_size)
-		io_size = count;
-
-	kmap_end = (*f_pos + count - 1) >> PAGE_SHIFT;
-	kmap_num = kmap_end - kmap_offset + 1;
-
-	ret = ttm_bo_reserve(bo, true, no_wait, false, 0);
-
-	switch (ret) {
-	case 0:
-		break;
-	case -EBUSY:
-		return -EAGAIN;
-	default:
-		return ret;
-	}
-
-	ret = ttm_bo_kmap(bo, kmap_offset, kmap_num, &map);
-	if (unlikely(ret != 0)) {
-		ttm_bo_unreserve(bo);
-		return ret;
-	}
-
-	virtual = ttm_kmap_obj_virtual(&map, &dummy);
-	virtual += page_offset;
-
-	if (write)
-		ret = copy_from_user(virtual, wbuf, io_size);
-	else
-		ret = copy_to_user(rbuf, virtual, io_size);
-
-	ttm_bo_kunmap(&map);
-	ttm_bo_unreserve(bo);
-	ttm_bo_unref(&bo);
-
-	if (unlikely(ret != 0))
-		return ret;
-
-	*f_pos += io_size;
-
-	return io_size;
-}
diff --git a/drivers/gpu/drm/udl/udl_drv.c b/drivers/gpu/drm/udl/udl_drv.c
index c0770db..7650dc0 100644
--- a/drivers/gpu/drm/udl/udl_drv.c
+++ b/drivers/gpu/drm/udl/udl_drv.c
@@ -65,7 +65,6 @@
 	.read = drm_read,
 	.unlocked_ioctl	= drm_ioctl,
 	.release = drm_release,
-	.fasync = drm_fasync,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = drm_compat_ioctl,
 #endif
@@ -84,7 +83,7 @@
 
 	.dumb_create = udl_dumb_create,
 	.dumb_map_offset = udl_gem_mmap,
-	.dumb_destroy = udl_dumb_destroy,
+	.dumb_destroy = drm_gem_dumb_destroy,
 	.fops = &udl_driver_fops,
 
 	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
diff --git a/drivers/gpu/drm/udl/udl_drv.h b/drivers/gpu/drm/udl/udl_drv.h
index cc6d90f..56aec94 100644
--- a/drivers/gpu/drm/udl/udl_drv.h
+++ b/drivers/gpu/drm/udl/udl_drv.h
@@ -114,8 +114,6 @@
 		    struct drm_mode_create_dumb *args);
 int udl_gem_mmap(struct drm_file *file_priv, struct drm_device *dev,
 		 uint32_t handle, uint64_t *offset);
-int udl_dumb_destroy(struct drm_file *file_priv, struct drm_device *dev,
-		     uint32_t handle);
 
 int udl_gem_init_object(struct drm_gem_object *obj);
 void udl_gem_free_object(struct drm_gem_object *gem_obj);
diff --git a/drivers/gpu/drm/udl/udl_gem.c b/drivers/gpu/drm/udl/udl_gem.c
index ef034fa..8dbe9d0 100644
--- a/drivers/gpu/drm/udl/udl_gem.c
+++ b/drivers/gpu/drm/udl/udl_gem.c
@@ -66,12 +66,6 @@
 			      args->size, &args->handle);
 }
 
-int udl_dumb_destroy(struct drm_file *file, struct drm_device *dev,
-		     uint32_t handle)
-{
-	return drm_gem_handle_delete(file, handle);
-}
-
 int udl_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
 {
 	int ret;
@@ -123,55 +117,23 @@
 
 static int udl_gem_get_pages(struct udl_gem_object *obj, gfp_t gfpmask)
 {
-	int page_count, i;
-	struct page *page;
-	struct inode *inode;
-	struct address_space *mapping;
+	struct page **pages;
 
 	if (obj->pages)
 		return 0;
 
-	page_count = obj->base.size / PAGE_SIZE;
-	BUG_ON(obj->pages != NULL);
-	obj->pages = drm_malloc_ab(page_count, sizeof(struct page *));
-	if (obj->pages == NULL)
-		return -ENOMEM;
+	pages = drm_gem_get_pages(&obj->base, gfpmask);
+	if (IS_ERR(pages))
+		return PTR_ERR(pages);
 
-	inode = file_inode(obj->base.filp);
-	mapping = inode->i_mapping;
-	gfpmask |= mapping_gfp_mask(mapping);
-
-	for (i = 0; i < page_count; i++) {
-		page = shmem_read_mapping_page_gfp(mapping, i, gfpmask);
-		if (IS_ERR(page))
-			goto err_pages;
-		obj->pages[i] = page;
-	}
+	obj->pages = pages;
 
 	return 0;
-err_pages:
-	while (i--)
-		page_cache_release(obj->pages[i]);
-	drm_free_large(obj->pages);
-	obj->pages = NULL;
-	return PTR_ERR(page);
 }
 
 static void udl_gem_put_pages(struct udl_gem_object *obj)
 {
-	int page_count = obj->base.size / PAGE_SIZE;
-	int i;
-
-	if (obj->base.import_attach) {
-		drm_free_large(obj->pages);
-		obj->pages = NULL;
-		return;
-	}
-
-	for (i = 0; i < page_count; i++)
-		page_cache_release(obj->pages[i]);
-
-	drm_free_large(obj->pages);
+	drm_gem_put_pages(&obj->base, obj->pages, false, false);
 	obj->pages = NULL;
 }
 
@@ -223,8 +185,7 @@
 	if (obj->pages)
 		udl_gem_put_pages(obj);
 
-	if (gem_obj->map_list.map)
-		drm_gem_free_mmap_offset(gem_obj);
+	drm_gem_free_mmap_offset(gem_obj);
 }
 
 /* the dumb interface doesn't work with the GEM straight MMAP
@@ -247,13 +208,11 @@
 	ret = udl_gem_get_pages(gobj, GFP_KERNEL);
 	if (ret)
 		goto out;
-	if (!gobj->base.map_list.map) {
-		ret = drm_gem_create_mmap_offset(obj);
-		if (ret)
-			goto out;
-	}
+	ret = drm_gem_create_mmap_offset(obj);
+	if (ret)
+		goto out;
 
-	*offset = (u64)gobj->base.map_list.hash.key << PAGE_SHIFT;
+	*offset = drm_vma_node_offset_addr(&gobj->base.vma_node);
 
 out:
 	drm_gem_object_unreference(&gobj->base);
diff --git a/drivers/gpu/drm/udl/udl_main.c b/drivers/gpu/drm/udl/udl_main.c
index 0ce2d71..f5ae574 100644
--- a/drivers/gpu/drm/udl/udl_main.c
+++ b/drivers/gpu/drm/udl/udl_main.c
@@ -41,8 +41,8 @@
 	total_len = usb_get_descriptor(usbdev, 0x5f, /* vendor specific */
 				    0, desc, MAX_VENDOR_DESCRIPTOR_SIZE);
 	if (total_len > 5) {
-		DRM_INFO("vendor descriptor length:%x data:%*ph\n",
-			total_len, 11, desc);
+		DRM_INFO("vendor descriptor length:%x data:%11ph\n",
+			total_len, desc);
 
 		if ((desc[0] != total_len) || /* descriptor length */
 		    (desc[1] != 0x5f) ||   /* vendor descriptor type */
diff --git a/drivers/gpu/drm/via/via_dma.c b/drivers/gpu/drm/via/via_dma.c
index 13558f5..652f9b4 100644
--- a/drivers/gpu/drm/via/via_dma.c
+++ b/drivers/gpu/drm/via/via_dma.c
@@ -720,7 +720,7 @@
 	return ret;
 }
 
-struct drm_ioctl_desc via_ioctls[] = {
+const struct drm_ioctl_desc via_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(VIA_ALLOCMEM, via_mem_alloc, DRM_AUTH),
 	DRM_IOCTL_DEF_DRV(VIA_FREEMEM, via_mem_free, DRM_AUTH),
 	DRM_IOCTL_DEF_DRV(VIA_AGP_INIT, via_agp_init, DRM_AUTH|DRM_MASTER),
diff --git a/drivers/gpu/drm/via/via_drv.c b/drivers/gpu/drm/via/via_drv.c
index f4ae203..92684a9 100644
--- a/drivers/gpu/drm/via/via_drv.c
+++ b/drivers/gpu/drm/via/via_drv.c
@@ -64,7 +64,6 @@
 	.unlocked_ioctl = drm_ioctl,
 	.mmap = drm_mmap,
 	.poll = drm_poll,
-	.fasync = drm_fasync,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = drm_compat_ioctl,
 #endif
@@ -73,7 +72,7 @@
 
 static struct drm_driver driver = {
 	.driver_features =
-	    DRIVER_USE_AGP | DRIVER_USE_MTRR | DRIVER_HAVE_IRQ |
+	    DRIVER_USE_AGP | DRIVER_HAVE_IRQ |
 	    DRIVER_IRQ_SHARED,
 	.load = via_driver_load,
 	.unload = via_driver_unload,
diff --git a/drivers/gpu/drm/via/via_drv.h b/drivers/gpu/drm/via/via_drv.h
index 893a650..a811ef2 100644
--- a/drivers/gpu/drm/via/via_drv.h
+++ b/drivers/gpu/drm/via/via_drv.h
@@ -114,7 +114,7 @@
 #define VIA_READ8(reg)		DRM_READ8(VIA_BASE, reg)
 #define VIA_WRITE8(reg, val)	DRM_WRITE8(VIA_BASE, reg, val)
 
-extern struct drm_ioctl_desc via_ioctls[];
+extern const struct drm_ioctl_desc via_ioctls[];
 extern int via_max_ioctl;
 
 extern int via_fb_init(struct drm_device *dev, void *data, struct drm_file *file_priv);
diff --git a/drivers/gpu/drm/via/via_mm.c b/drivers/gpu/drm/via/via_mm.c
index 0ab93ff..7e3ad87 100644
--- a/drivers/gpu/drm/via/via_mm.c
+++ b/drivers/gpu/drm/via/via_mm.c
@@ -140,11 +140,11 @@
 	if (mem->type == VIA_MEM_AGP)
 		retval = drm_mm_insert_node(&dev_priv->agp_mm,
 					    &item->mm_node,
-					    tmpSize, 0);
+					    tmpSize, 0, DRM_MM_SEARCH_DEFAULT);
 	else
 		retval = drm_mm_insert_node(&dev_priv->vram_mm,
 					    &item->mm_node,
-					    tmpSize, 0);
+					    tmpSize, 0, DRM_MM_SEARCH_DEFAULT);
 	if (retval)
 		goto fail_alloc;
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 78e2164..1a90f0a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -124,7 +124,7 @@
  * Ioctl definitions.
  */
 
-static struct drm_ioctl_desc vmw_ioctls[] = {
+static const struct drm_ioctl_desc vmw_ioctls[] = {
 	VMW_IOCTL_DEF(VMW_GET_PARAM, vmw_getparam_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
 	VMW_IOCTL_DEF(VMW_ALLOC_DMABUF, vmw_dmabuf_alloc_ioctl,
@@ -622,8 +622,10 @@
 	}
 
 	dev_priv->fman = vmw_fence_manager_init(dev_priv);
-	if (unlikely(dev_priv->fman == NULL))
+	if (unlikely(dev_priv->fman == NULL)) {
+		ret = -ENOMEM;
 		goto out_no_fman;
+	}
 
 	vmw_kms_save_vga(dev_priv);
 
@@ -782,7 +784,7 @@
 
 	if ((nr >= DRM_COMMAND_BASE) && (nr < DRM_COMMAND_END)
 	    && (nr < DRM_COMMAND_BASE + dev->driver->num_ioctls)) {
-		struct drm_ioctl_desc *ioctl =
+		const struct drm_ioctl_desc *ioctl =
 		    &vmw_ioctls[nr - DRM_COMMAND_BASE];
 
 		if (unlikely(ioctl->cmd_drv != cmd)) {
@@ -795,29 +797,12 @@
 	return drm_ioctl(filp, cmd, arg);
 }
 
-static int vmw_firstopen(struct drm_device *dev)
-{
-	struct vmw_private *dev_priv = vmw_priv(dev);
-	dev_priv->is_opened = true;
-
-	return 0;
-}
-
 static void vmw_lastclose(struct drm_device *dev)
 {
-	struct vmw_private *dev_priv = vmw_priv(dev);
 	struct drm_crtc *crtc;
 	struct drm_mode_set set;
 	int ret;
 
-	/**
-	 * Do nothing on the lastclose call from drm_unload.
-	 */
-
-	if (!dev_priv->is_opened)
-		return;
-
-	dev_priv->is_opened = false;
 	set.x = 0;
 	set.y = 0;
 	set.fb = NULL;
@@ -1120,7 +1105,6 @@
 	.mmap = vmw_mmap,
 	.poll = vmw_fops_poll,
 	.read = vmw_fops_read,
-	.fasync = drm_fasync,
 #if defined(CONFIG_COMPAT)
 	.compat_ioctl = drm_compat_ioctl,
 #endif
@@ -1132,7 +1116,6 @@
 	DRIVER_MODESET,
 	.load = vmw_driver_load,
 	.unload = vmw_driver_unload,
-	.firstopen = vmw_firstopen,
 	.lastclose = vmw_lastclose,
 	.irq_preinstall = vmw_irq_preinstall,
 	.irq_postinstall = vmw_irq_postinstall,
@@ -1143,7 +1126,6 @@
 	.disable_vblank = vmw_disable_vblank,
 	.ioctls = vmw_ioctls,
 	.num_ioctls = DRM_ARRAY_SIZE(vmw_ioctls),
-	.dma_quiescent = NULL,	/*vmw_dma_quiescent, */
 	.master_create = vmw_master_create,
 	.master_destroy = vmw_master_destroy,
 	.master_set = vmw_master_set,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 13aeda7..150ec64 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -324,7 +324,6 @@
 	 */
 
 	bool stealth;
-	bool is_opened;
 	bool enable_fb;
 
 	/**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c
index 3751730..1a0bf07 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c
@@ -29,7 +29,9 @@
 #include <drm/drmP.h>
 #include <drm/ttm/ttm_bo_driver.h>
 
-#define VMW_PPN_SIZE sizeof(unsigned long)
+#define VMW_PPN_SIZE (sizeof(unsigned long))
+/* A future safe maximum remap size. */
+#define VMW_PPN_PER_REMAP ((31 * 1024) / VMW_PPN_SIZE)
 
 static int vmw_gmr2_bind(struct vmw_private *dev_priv,
 			 struct page *pages[],
@@ -38,43 +40,61 @@
 {
 	SVGAFifoCmdDefineGMR2 define_cmd;
 	SVGAFifoCmdRemapGMR2 remap_cmd;
-	uint32_t define_size = sizeof(define_cmd) + 4;
-	uint32_t remap_size = VMW_PPN_SIZE * num_pages + sizeof(remap_cmd) + 4;
 	uint32_t *cmd;
 	uint32_t *cmd_orig;
+	uint32_t define_size = sizeof(define_cmd) + sizeof(*cmd);
+	uint32_t remap_num = num_pages / VMW_PPN_PER_REMAP + ((num_pages % VMW_PPN_PER_REMAP) > 0);
+	uint32_t remap_size = VMW_PPN_SIZE * num_pages + (sizeof(remap_cmd) + sizeof(*cmd)) * remap_num;
+	uint32_t remap_pos = 0;
+	uint32_t cmd_size = define_size + remap_size;
 	uint32_t i;
 
-	cmd_orig = cmd = vmw_fifo_reserve(dev_priv, define_size + remap_size);
+	cmd_orig = cmd = vmw_fifo_reserve(dev_priv, cmd_size);
 	if (unlikely(cmd == NULL))
 		return -ENOMEM;
 
 	define_cmd.gmrId = gmr_id;
 	define_cmd.numPages = num_pages;
 
+	*cmd++ = SVGA_CMD_DEFINE_GMR2;
+	memcpy(cmd, &define_cmd, sizeof(define_cmd));
+	cmd += sizeof(define_cmd) / sizeof(*cmd);
+
+	/*
+	 * Need to split the command if there are too many
+	 * pages that goes into the gmr.
+	 */
+
 	remap_cmd.gmrId = gmr_id;
 	remap_cmd.flags = (VMW_PPN_SIZE > sizeof(*cmd)) ?
 		SVGA_REMAP_GMR2_PPN64 : SVGA_REMAP_GMR2_PPN32;
-	remap_cmd.offsetPages = 0;
-	remap_cmd.numPages = num_pages;
 
-	*cmd++ = SVGA_CMD_DEFINE_GMR2;
-	memcpy(cmd, &define_cmd, sizeof(define_cmd));
-	cmd += sizeof(define_cmd) / sizeof(uint32);
+	while (num_pages > 0) {
+		unsigned long nr = min(num_pages, (unsigned long)VMW_PPN_PER_REMAP);
 
-	*cmd++ = SVGA_CMD_REMAP_GMR2;
-	memcpy(cmd, &remap_cmd, sizeof(remap_cmd));
-	cmd += sizeof(remap_cmd) / sizeof(uint32);
+		remap_cmd.offsetPages = remap_pos;
+		remap_cmd.numPages = nr;
 
-	for (i = 0; i < num_pages; ++i) {
-		if (VMW_PPN_SIZE <= 4)
-			*cmd = page_to_pfn(*pages++);
-		else
-			*((uint64_t *)cmd) = page_to_pfn(*pages++);
+		*cmd++ = SVGA_CMD_REMAP_GMR2;
+		memcpy(cmd, &remap_cmd, sizeof(remap_cmd));
+		cmd += sizeof(remap_cmd) / sizeof(*cmd);
 
-		cmd += VMW_PPN_SIZE / sizeof(*cmd);
+		for (i = 0; i < nr; ++i) {
+			if (VMW_PPN_SIZE <= 4)
+				*cmd = page_to_pfn(*pages++);
+			else
+				*((uint64_t *)cmd) = page_to_pfn(*pages++);
+
+			cmd += VMW_PPN_SIZE / sizeof(*cmd);
+		}
+
+		num_pages -= nr;
+		remap_pos += nr;
 	}
 
-	vmw_fifo_commit(dev_priv, define_size + remap_size);
+	BUG_ON(cmd != cmd_orig + cmd_size / sizeof(*cmd));
+
+	vmw_fifo_commit(dev_priv, cmd_size);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index d4607b2..fc43c06 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -1706,7 +1706,8 @@
 
 int vmw_du_page_flip(struct drm_crtc *crtc,
 		     struct drm_framebuffer *fb,
-		     struct drm_pending_vblank_event *event)
+		     struct drm_pending_vblank_event *event,
+		     uint32_t page_flip_flags)
 {
 	struct vmw_private *dev_priv = vmw_priv(crtc->dev);
 	struct drm_framebuffer *old_fb = crtc->fb;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
index 6fa89c9..8d038c3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
@@ -123,7 +123,8 @@
 void vmw_display_unit_cleanup(struct vmw_display_unit *du);
 int vmw_du_page_flip(struct drm_crtc *crtc,
 		     struct drm_framebuffer *fb,
-		     struct drm_pending_vblank_event *event);
+		     struct drm_pending_vblank_event *event,
+		     uint32_t page_flip_flags);
 void vmw_du_crtc_save(struct drm_crtc *crtc);
 void vmw_du_crtc_restore(struct drm_crtc *crtc);
 void vmw_du_crtc_gamma_set(struct drm_crtc *crtc,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index 7953d1f..0e67cf4 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -500,7 +500,7 @@
 		goto out_no_dmabuf;
 
 	rep->handle = handle;
-	rep->map_handle = dma_buf->base.addr_space_offset;
+	rep->map_handle = drm_vma_node_offset_addr(&dma_buf->base.vma_node);
 	rep->cur_gmr_id = handle;
 	rep->cur_gmr_offset = 0;
 
@@ -834,7 +834,7 @@
 	if (ret != 0)
 		return -EINVAL;
 
-	*offset = out_buf->base.addr_space_offset;
+	*offset = drm_vma_node_offset_addr(&out_buf->base.vma_node);
 	vmw_dmabuf_unreference(&out_buf);
 	return 0;
 }
diff --git a/drivers/gpu/host1x/drm/dc.c b/drivers/gpu/host1x/drm/dc.c
index 5360e5a..b1a05ad 100644
--- a/drivers/gpu/host1x/drm/dc.c
+++ b/drivers/gpu/host1x/drm/dc.c
@@ -235,7 +235,7 @@
 }
 
 static int tegra_dc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
-			      struct drm_pending_vblank_event *event)
+			      struct drm_pending_vblank_event *event, uint32_t page_flip_flags)
 {
 	struct tegra_dc *dc = to_tegra_dc(crtc);
 	struct drm_device *drm = crtc->dev;
diff --git a/drivers/gpu/host1x/drm/drm.c b/drivers/gpu/host1x/drm/drm.c
index e184b00..8c61cee 100644
--- a/drivers/gpu/host1x/drm/drm.c
+++ b/drivers/gpu/host1x/drm/drm.c
@@ -356,7 +356,7 @@
 
 	bo = to_tegra_bo(gem);
 
-	args->offset = tegra_bo_get_mmap_offset(bo);
+	args->offset = drm_vma_node_offset_addr(&bo->gem.vma_node);
 
 	drm_gem_object_unreference(gem);
 
@@ -487,7 +487,7 @@
 }
 #endif
 
-static struct drm_ioctl_desc tegra_drm_ioctls[] = {
+static const struct drm_ioctl_desc tegra_drm_ioctls[] = {
 #ifdef CONFIG_DRM_TEGRA_STAGING
 	DRM_IOCTL_DEF_DRV(TEGRA_GEM_CREATE, tegra_gem_create, DRM_UNLOCKED | DRM_AUTH),
 	DRM_IOCTL_DEF_DRV(TEGRA_GEM_MMAP, tegra_gem_mmap, DRM_UNLOCKED),
@@ -508,7 +508,6 @@
 	.unlocked_ioctl = drm_ioctl,
 	.mmap = tegra_drm_mmap,
 	.poll = drm_poll,
-	.fasync = drm_fasync,
 	.read = drm_read,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = drm_compat_ioctl,
@@ -633,7 +632,7 @@
 	.gem_vm_ops = &tegra_bo_vm_ops,
 	.dumb_create = tegra_bo_dumb_create,
 	.dumb_map_offset = tegra_bo_dumb_map_offset,
-	.dumb_destroy = tegra_bo_dumb_destroy,
+	.dumb_destroy = drm_gem_dumb_destroy,
 
 	.ioctls = tegra_drm_ioctls,
 	.num_ioctls = ARRAY_SIZE(tegra_drm_ioctls),
diff --git a/drivers/gpu/host1x/drm/gem.c b/drivers/gpu/host1x/drm/gem.c
index c5e9a9b..59623de 100644
--- a/drivers/gpu/host1x/drm/gem.c
+++ b/drivers/gpu/host1x/drm/gem.c
@@ -106,11 +106,6 @@
 	dma_free_writecombine(drm->dev, bo->gem.size, bo->vaddr, bo->paddr);
 }
 
-unsigned int tegra_bo_get_mmap_offset(struct tegra_bo *bo)
-{
-	return (unsigned int)bo->gem.map_list.hash.key << PAGE_SHIFT;
-}
-
 struct tegra_bo *tegra_bo_create(struct drm_device *drm, unsigned int size)
 {
 	struct tegra_bo *bo;
@@ -182,8 +177,7 @@
 {
 	struct tegra_bo *bo = to_tegra_bo(gem);
 
-	if (gem->map_list.map)
-		drm_gem_free_mmap_offset(gem);
+	drm_gem_free_mmap_offset(gem);
 
 	drm_gem_object_release(gem);
 	tegra_bo_destroy(gem->dev, bo);
@@ -228,7 +222,7 @@
 
 	bo = to_tegra_bo(gem);
 
-	*offset = tegra_bo_get_mmap_offset(bo);
+	*offset = drm_vma_node_offset_addr(&bo->gem.vma_node);
 
 	drm_gem_object_unreference(gem);
 
@@ -262,9 +256,3 @@
 
 	return ret;
 }
-
-int tegra_bo_dumb_destroy(struct drm_file *file, struct drm_device *drm,
-			  unsigned int handle)
-{
-	return drm_gem_handle_delete(file, handle);
-}
diff --git a/drivers/gpu/host1x/drm/gem.h b/drivers/gpu/host1x/drm/gem.h
index 34de2b4..492533a 100644
--- a/drivers/gpu/host1x/drm/gem.h
+++ b/drivers/gpu/host1x/drm/gem.h
@@ -44,13 +44,10 @@
 					    unsigned int size,
 					    unsigned int *handle);
 void tegra_bo_free_object(struct drm_gem_object *gem);
-unsigned int tegra_bo_get_mmap_offset(struct tegra_bo *bo);
 int tegra_bo_dumb_create(struct drm_file *file, struct drm_device *drm,
 			 struct drm_mode_create_dumb *args);
 int tegra_bo_dumb_map_offset(struct drm_file *file, struct drm_device *drm,
 			     uint32_t handle, uint64_t *offset);
-int tegra_bo_dumb_destroy(struct drm_file *file, struct drm_device *drm,
-			  unsigned int handle);
 
 int tegra_drm_mmap(struct file *file, struct vm_area_struct *vma);
 
diff --git a/drivers/gpu/host1x/drm/hdmi.c b/drivers/gpu/host1x/drm/hdmi.c
index bf7f027..644d95c 100644
--- a/drivers/gpu/host1x/drm/hdmi.c
+++ b/drivers/gpu/host1x/drm/hdmi.c
@@ -551,24 +551,8 @@
 		return;
 	}
 
-	memset(&frame, 0, sizeof(frame));
-
-	frame.type = HDMI_INFOFRAME_TYPE_VENDOR;
-	frame.version = 0x01;
-	frame.length = 6;
-
-	frame.data[0] = 0x03; /* regid0 */
-	frame.data[1] = 0x0c; /* regid1 */
-	frame.data[2] = 0x00; /* regid2 */
-	frame.data[3] = 0x02 << 5; /* video format */
-
-	/* TODO: 74 MHz limit? */
-	if (1) {
-		frame.data[4] = 0x00 << 4; /* 3D structure */
-	} else {
-		frame.data[4] = 0x08 << 4; /* 3D structure */
-		frame.data[5] = 0x00 << 4; /* 3D ext. data */
-	}
+	hdmi_vendor_infoframe_init(&frame);
+	frame.s3d_struct = HDMI_3D_STRUCTURE_FRAME_PACKING;
 
 	err = hdmi_vendor_infoframe_pack(&frame, buffer, sizeof(buffer));
 	if (err < 0) {
diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index cf787e1..ec0ae2d 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -27,6 +27,7 @@
 #include <linux/pci.h>
 #include <linux/console.h>
 #include <linux/vga_switcheroo.h>
+#include <linux/pm_runtime.h>
 
 #include <linux/vgaarb.h>
 
@@ -37,6 +38,7 @@
 	const struct vga_switcheroo_client_ops *ops;
 	int id;
 	bool active;
+	bool driver_power_control;
 	struct list_head list;
 };
 
@@ -132,7 +134,7 @@
 
 static int register_client(struct pci_dev *pdev,
 			   const struct vga_switcheroo_client_ops *ops,
-			   int id, bool active)
+			   int id, bool active, bool driver_power_control)
 {
 	struct vga_switcheroo_client *client;
 
@@ -145,6 +147,7 @@
 	client->ops = ops;
 	client->id = id;
 	client->active = active;
+	client->driver_power_control = driver_power_control;
 
 	mutex_lock(&vgasr_mutex);
 	list_add_tail(&client->list, &vgasr_priv.clients);
@@ -160,10 +163,11 @@
 }
 
 int vga_switcheroo_register_client(struct pci_dev *pdev,
-				   const struct vga_switcheroo_client_ops *ops)
+				   const struct vga_switcheroo_client_ops *ops,
+				   bool driver_power_control)
 {
 	return register_client(pdev, ops, -1,
-			       pdev == vga_default_device());
+			       pdev == vga_default_device(), driver_power_control);
 }
 EXPORT_SYMBOL(vga_switcheroo_register_client);
 
@@ -171,7 +175,7 @@
 					 const struct vga_switcheroo_client_ops *ops,
 					 int id, bool active)
 {
-	return register_client(pdev, ops, id | ID_BIT_AUDIO, active);
+	return register_client(pdev, ops, id | ID_BIT_AUDIO, active, false);
 }
 EXPORT_SYMBOL(vga_switcheroo_register_audio_client);
 
@@ -258,10 +262,11 @@
 	int i = 0;
 	mutex_lock(&vgasr_mutex);
 	list_for_each_entry(client, &vgasr_priv.clients, list) {
-		seq_printf(m, "%d:%s%s:%c:%s:%s\n", i,
+		seq_printf(m, "%d:%s%s:%c:%s%s:%s\n", i,
 			   client_id(client) == VGA_SWITCHEROO_DIS ? "DIS" : "IGD",
 			   client_is_vga(client) ? "" : "-Audio",
 			   client->active ? '+' : ' ',
+			   client->driver_power_control ? "Dyn" : "",
 			   client->pwr_state ? "Pwr" : "Off",
 			   pci_name(client->pdev));
 		i++;
@@ -277,6 +282,8 @@
 
 static int vga_switchon(struct vga_switcheroo_client *client)
 {
+	if (client->driver_power_control)
+		return 0;
 	if (vgasr_priv.handler->power_state)
 		vgasr_priv.handler->power_state(client->id, VGA_SWITCHEROO_ON);
 	/* call the driver callback to turn on device */
@@ -287,6 +294,8 @@
 
 static int vga_switchoff(struct vga_switcheroo_client *client)
 {
+	if (client->driver_power_control)
+		return 0;
 	/* call the driver callback to turn off device */
 	client->ops->set_gpu_state(client->pdev, VGA_SWITCHEROO_OFF);
 	if (vgasr_priv.handler->power_state)
@@ -402,6 +411,8 @@
 		list_for_each_entry(client, &vgasr_priv.clients, list) {
 			if (client->active || client_is_audio(client))
 				continue;
+			if (client->driver_power_control)
+				continue;
 			set_audio_state(client->id, VGA_SWITCHEROO_OFF);
 			if (client->pwr_state == VGA_SWITCHEROO_ON)
 				vga_switchoff(client);
@@ -413,6 +424,8 @@
 		list_for_each_entry(client, &vgasr_priv.clients, list) {
 			if (client->active || client_is_audio(client))
 				continue;
+			if (client->driver_power_control)
+				continue;
 			if (client->pwr_state == VGA_SWITCHEROO_OFF)
 				vga_switchon(client);
 			set_audio_state(client->id, VGA_SWITCHEROO_ON);
@@ -565,3 +578,127 @@
 	return err;
 }
 EXPORT_SYMBOL(vga_switcheroo_process_delayed_switch);
+
+static void vga_switcheroo_power_switch(struct pci_dev *pdev, enum vga_switcheroo_state state)
+{
+	struct vga_switcheroo_client *client;
+
+	if (!vgasr_priv.handler->power_state)
+		return;
+
+	client = find_client_from_pci(&vgasr_priv.clients, pdev);
+	if (!client)
+		return;
+
+	if (!client->driver_power_control)
+		return;
+
+	vgasr_priv.handler->power_state(client->id, state);
+}
+
+/* force a PCI device to a certain state - mainly to turn off audio clients */
+
+void vga_switcheroo_set_dynamic_switch(struct pci_dev *pdev, enum vga_switcheroo_state dynamic)
+{
+	struct vga_switcheroo_client *client;
+
+	client = find_client_from_pci(&vgasr_priv.clients, pdev);
+	if (!client)
+		return;
+
+	if (!client->driver_power_control)
+		return;
+
+	client->pwr_state = dynamic;
+	set_audio_state(client->id, dynamic);
+}
+EXPORT_SYMBOL(vga_switcheroo_set_dynamic_switch);
+
+/* switcheroo power domain */
+static int vga_switcheroo_runtime_suspend(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int ret;
+
+	ret = dev->bus->pm->runtime_suspend(dev);
+	if (ret)
+		return ret;
+
+	vga_switcheroo_power_switch(pdev, VGA_SWITCHEROO_OFF);
+	return 0;
+}
+
+static int vga_switcheroo_runtime_resume(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int ret;
+
+	vga_switcheroo_power_switch(pdev, VGA_SWITCHEROO_ON);
+	ret = dev->bus->pm->runtime_resume(dev);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+/* this version is for the case where the power switch is separate
+   to the device being powered down. */
+int vga_switcheroo_init_domain_pm_ops(struct device *dev, struct dev_pm_domain *domain)
+{
+	/* copy over all the bus versions */
+	if (dev->bus && dev->bus->pm) {
+		domain->ops = *dev->bus->pm;
+		domain->ops.runtime_suspend = vga_switcheroo_runtime_suspend;
+		domain->ops.runtime_resume = vga_switcheroo_runtime_resume;
+
+		dev->pm_domain = domain;
+		return 0;
+	}
+	dev->pm_domain = NULL;
+	return -EINVAL;
+}
+EXPORT_SYMBOL(vga_switcheroo_init_domain_pm_ops);
+
+static int vga_switcheroo_runtime_resume_hdmi_audio(struct device *dev)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int ret;
+	struct vga_switcheroo_client *client, *found = NULL;
+
+	/* we need to check if we have to switch back on the video
+	   device so the audio device can come back */
+	list_for_each_entry(client, &vgasr_priv.clients, list) {
+		if (PCI_SLOT(client->pdev->devfn) == PCI_SLOT(pdev->devfn) && client_is_vga(client)) {
+			found = client;
+			ret = pm_runtime_get_sync(&client->pdev->dev);
+			if (ret) {
+				if (ret != 1)
+					return ret;
+			}
+			break;
+		}
+	}
+	ret = dev->bus->pm->runtime_resume(dev);
+
+	/* put the reference for the gpu */
+	if (found) {
+		pm_runtime_mark_last_busy(&found->pdev->dev);
+		pm_runtime_put_autosuspend(&found->pdev->dev);
+	}
+	return ret;
+}
+
+int vga_switcheroo_init_domain_pm_optimus_hdmi_audio(struct device *dev, struct dev_pm_domain *domain)
+{
+	/* copy over all the bus versions */
+	if (dev->bus && dev->bus->pm) {
+		domain->ops = *dev->bus->pm;
+		domain->ops.runtime_resume = vga_switcheroo_runtime_resume_hdmi_audio;
+
+		dev->pm_domain = domain;
+		return 0;
+	}
+	dev->pm_domain = NULL;
+	return -EINVAL;
+}
+EXPORT_SYMBOL(vga_switcheroo_init_domain_pm_optimus_hdmi_audio);
diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c
index 5207591a..cd33084 100644
--- a/drivers/hid/hid-logitech-dj.c
+++ b/drivers/hid/hid-logitech-dj.c
@@ -192,6 +192,7 @@
 static int logi_dj_output_hidraw_report(struct hid_device *hid, u8 * buf,
 					size_t count,
 					unsigned char report_type);
+static int logi_dj_recv_query_paired_devices(struct dj_receiver_dev *djrcv_dev);
 
 static void logi_dj_recv_destroy_djhid_device(struct dj_receiver_dev *djrcv_dev,
 						struct dj_report *dj_report)
@@ -232,6 +233,7 @@
 	if (dj_report->report_params[DEVICE_PAIRED_PARAM_SPFUNCTION] &
 	    SPFUNCTION_DEVICE_LIST_EMPTY) {
 		dbg_hid("%s: device list is empty\n", __func__);
+		djrcv_dev->querying_devices = false;
 		return;
 	}
 
@@ -242,6 +244,12 @@
 		return;
 	}
 
+	if (djrcv_dev->paired_dj_devices[dj_report->device_index]) {
+		/* The device is already known. No need to reallocate it. */
+		dbg_hid("%s: device is already known\n", __func__);
+		return;
+	}
+
 	dj_hiddev = hid_allocate_device();
 	if (IS_ERR(dj_hiddev)) {
 		dev_err(&djrcv_hdev->dev, "%s: hid_allocate_device failed\n",
@@ -305,6 +313,7 @@
 	struct dj_report dj_report;
 	unsigned long flags;
 	int count;
+	int retval;
 
 	dbg_hid("%s\n", __func__);
 
@@ -337,6 +346,25 @@
 		logi_dj_recv_destroy_djhid_device(djrcv_dev, &dj_report);
 		break;
 	default:
+	/* A normal report (i. e. not belonging to a pair/unpair notification)
+	 * arriving here, means that the report arrived but we did not have a
+	 * paired dj_device associated to the report's device_index, this
+	 * means that the original "device paired" notification corresponding
+	 * to this dj_device never arrived to this driver. The reason is that
+	 * hid-core discards all packets coming from a device while probe() is
+	 * executing. */
+	if (!djrcv_dev->paired_dj_devices[dj_report.device_index]) {
+		/* ok, we don't know the device, just re-ask the
+		 * receiver for the list of connected devices. */
+		retval = logi_dj_recv_query_paired_devices(djrcv_dev);
+		if (!retval) {
+			/* everything went fine, so just leave */
+			break;
+		}
+		dev_err(&djrcv_dev->hdev->dev,
+			"%s:logi_dj_recv_query_paired_devices "
+			"error:%d\n", __func__, retval);
+		}
 		dbg_hid("%s: unexpected report type\n", __func__);
 	}
 }
@@ -367,6 +395,12 @@
 	if (!djdev) {
 		dbg_hid("djrcv_dev->paired_dj_devices[dj_report->device_index]"
 			" is NULL, index %d\n", dj_report->device_index);
+		kfifo_in(&djrcv_dev->notif_fifo, dj_report, sizeof(struct dj_report));
+
+		if (schedule_work(&djrcv_dev->work) == 0) {
+			dbg_hid("%s: did not schedule the work item, was already "
+			"queued\n", __func__);
+		}
 		return;
 	}
 
@@ -397,6 +431,12 @@
 	if (dj_device == NULL) {
 		dbg_hid("djrcv_dev->paired_dj_devices[dj_report->device_index]"
 			" is NULL, index %d\n", dj_report->device_index);
+		kfifo_in(&djrcv_dev->notif_fifo, dj_report, sizeof(struct dj_report));
+
+		if (schedule_work(&djrcv_dev->work) == 0) {
+			dbg_hid("%s: did not schedule the work item, was already "
+			"queued\n", __func__);
+		}
 		return;
 	}
 
@@ -444,6 +484,10 @@
 	struct dj_report *dj_report;
 	int retval;
 
+	/* no need to protect djrcv_dev->querying_devices */
+	if (djrcv_dev->querying_devices)
+		return 0;
+
 	dj_report = kzalloc(sizeof(struct dj_report), GFP_KERNEL);
 	if (!dj_report)
 		return -ENOMEM;
@@ -455,6 +499,7 @@
 	return retval;
 }
 
+
 static int logi_dj_recv_switch_to_dj_mode(struct dj_receiver_dev *djrcv_dev,
 					  unsigned timeout)
 {
diff --git a/drivers/hid/hid-logitech-dj.h b/drivers/hid/hid-logitech-dj.h
index fd28a5e..4a40003 100644
--- a/drivers/hid/hid-logitech-dj.h
+++ b/drivers/hid/hid-logitech-dj.h
@@ -101,6 +101,7 @@
 	struct work_struct work;
 	struct kfifo notif_fifo;
 	spinlock_t lock;
+	bool querying_devices;
 };
 
 struct dj_device {
diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c
index ecbc749..87fbe292 100644
--- a/drivers/hid/hid-sony.c
+++ b/drivers/hid/hid-sony.c
@@ -369,7 +369,8 @@
 	if (sc->quirks & PS3REMOTE)
 		return ps3remote_mapping(hdev, hi, field, usage, bit, max);
 
-	return -1;
+	/* Let hid-core decide for the others */
+	return 0;
 }
 
 /*
diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
index a745163..6f1feb2 100644
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -518,7 +518,6 @@
 		goto out;
 	}
 
-	mutex_unlock(&minors_lock);
 	init_waitqueue_head(&dev->wait);
 	INIT_LIST_HEAD(&dev->list);
 
@@ -528,6 +527,7 @@
 	dev->exist = 1;
 	hid->hidraw = dev;
 
+	mutex_unlock(&minors_lock);
 out:
 	return result;
 
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 4c605c7..deb5c25 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -562,7 +562,7 @@
 				struct hv_hotadd_state *has)
 {
 	int ret = 0;
-	int i, nid, t;
+	int i, nid;
 	unsigned long start_pfn;
 	unsigned long processed_pfn;
 	unsigned long total_pfn = pfn_count;
@@ -607,14 +607,11 @@
 
 		/*
 		 * Wait for the memory block to be onlined.
+		 * Since the hot add has succeeded, it is ok to
+		 * proceed even if the pages in the hot added region
+		 * have not been "onlined" within the allowed time.
 		 */
-		t = wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ);
-		if (t == 0) {
-			pr_info("hot_add memory timedout\n");
-			has->ha_end_pfn -= HA_CHUNK;
-			has->covered_end_pfn -=  processed_pfn;
-			break;
-		}
+		wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ);
 
 	}
 
@@ -978,6 +975,14 @@
 				dm->num_pages_ballooned +
 				compute_balloon_floor();
 
+	/*
+	 * If our transaction ID is no longer current, just don't
+	 * send the status. This can happen if we were interrupted
+	 * after we picked our transaction ID.
+	 */
+	if (status.hdr.trans_id != atomic_read(&trans_id))
+		return;
+
 	vmbus_sendpacket(dm->dev->channel, &status,
 				sizeof(struct dm_status),
 				(unsigned long)NULL,
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index a2464bf0..e8e071f 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -690,7 +690,7 @@
 	if (ret)
 		pr_err("Unable to register child device\n");
 	else
-		pr_info("child device %s registered\n",
+		pr_debug("child device %s registered\n",
 			dev_name(&child_device_obj->device));
 
 	return ret;
@@ -702,14 +702,14 @@
  */
 void vmbus_device_unregister(struct hv_device *device_obj)
 {
+	pr_debug("child device %s unregistered\n",
+		dev_name(&device_obj->device));
+
 	/*
 	 * Kick off the process of unregistering the device.
 	 * This will call vmbus_remove() and eventually vmbus_device_release()
 	 */
 	device_unregister(&device_obj->device);
-
-	pr_info("child device %s unregistered\n",
-		dev_name(&device_obj->device));
 }
 
 
diff --git a/drivers/hwmon/adt7470.c b/drivers/hwmon/adt7470.c
index 0f34bca..6099f50 100644
--- a/drivers/hwmon/adt7470.c
+++ b/drivers/hwmon/adt7470.c
@@ -215,7 +215,7 @@
 					  u16 value)
 {
 	return i2c_smbus_write_byte_data(client, reg, value & 0xFF)
-	       && i2c_smbus_write_byte_data(client, reg + 1, value >> 8);
+	       || i2c_smbus_write_byte_data(client, reg + 1, value >> 8);
 }
 
 static void adt7470_init_client(struct i2c_client *client)
diff --git a/drivers/hwmon/max6697.c b/drivers/hwmon/max6697.c
index 328fb03..a41b5f3 100644
--- a/drivers/hwmon/max6697.c
+++ b/drivers/hwmon/max6697.c
@@ -605,12 +605,12 @@
 		if (ret < 0)
 			return ret;
 		ret = i2c_smbus_write_byte_data(client, MAX6581_REG_IDEALITY,
-						pdata->ideality_mask >> 1);
+						pdata->ideality_value);
 		if (ret < 0)
 			return ret;
 		ret = i2c_smbus_write_byte_data(client,
 						MAX6581_REG_IDEALITY_SELECT,
-						pdata->ideality_value);
+						pdata->ideality_mask >> 1);
 		if (ret < 0)
 			return ret;
 	}
diff --git a/drivers/i2c/busses/i2c-kempld.c b/drivers/i2c/busses/i2c-kempld.c
index ccec916..af8f65f 100644
--- a/drivers/i2c/busses/i2c-kempld.c
+++ b/drivers/i2c/busses/i2c-kempld.c
@@ -246,9 +246,9 @@
 		bus_frequency = KEMPLD_I2C_FREQ_MAX;
 
 	if (pld->info.spec_major == 1)
-		prescale = pld->pld_clock / bus_frequency * 5 - 1000;
+		prescale = pld->pld_clock / (bus_frequency * 5) - 1000;
 	else
-		prescale = pld->pld_clock / bus_frequency * 4 - 3000;
+		prescale = pld->pld_clock / (bus_frequency * 4) - 3000;
 
 	if (prescale < 0)
 		prescale = 0;
diff --git a/drivers/i2c/busses/i2c-mxs.c b/drivers/i2c/busses/i2c-mxs.c
index df8ff5a..e2e9a0d 100644
--- a/drivers/i2c/busses/i2c-mxs.c
+++ b/drivers/i2c/busses/i2c-mxs.c
@@ -493,7 +493,7 @@
 	 * based on this empirical measurement and a lot of previous frobbing.
 	 */
 	i2c->cmd_err = 0;
-	if (msg->len < 8) {
+	if (0) {	/* disable PIO mode until a proper fix is made */
 		ret = mxs_i2c_pio_setup_xfer(adap, msg, flags);
 		if (ret)
 			mxs_i2c_reset(i2c);
diff --git a/drivers/iio/adc/ti_am335x_adc.c b/drivers/iio/adc/ti_am335x_adc.c
index 0ad208a..3ceac3e 100644
--- a/drivers/iio/adc/ti_am335x_adc.c
+++ b/drivers/iio/adc/ti_am335x_adc.c
@@ -60,7 +60,6 @@
 {
 	unsigned int stepconfig;
 	int i, steps;
-	u32 step_en;
 
 	/*
 	 * There are 16 configurable steps and 8 analog input
@@ -86,8 +85,7 @@
 		adc_dev->channel_step[i] = steps;
 		steps++;
 	}
-	step_en = get_adc_step_mask(adc_dev);
-	am335x_tsc_se_set(adc_dev->mfd_tscadc, step_en);
+
 }
 
 static const char * const chan_name_ain[] = {
@@ -142,10 +140,22 @@
 		int *val, int *val2, long mask)
 {
 	struct tiadc_device *adc_dev = iio_priv(indio_dev);
-	int i;
-	unsigned int fifo1count, read;
+	int i, map_val;
+	unsigned int fifo1count, read, stepid;
 	u32 step = UINT_MAX;
 	bool found = false;
+	u32 step_en;
+	unsigned long timeout = jiffies + usecs_to_jiffies
+				(IDLE_TIMEOUT * adc_dev->channels);
+	step_en = get_adc_step_mask(adc_dev);
+	am335x_tsc_se_set(adc_dev->mfd_tscadc, step_en);
+
+	/* Wait for ADC sequencer to complete sampling */
+	while (tiadc_readl(adc_dev, REG_ADCFSM) & SEQ_STATUS) {
+		if (time_after(jiffies, timeout))
+			return -EAGAIN;
+		}
+	map_val = chan->channel + TOTAL_CHANNELS;
 
 	/*
 	 * When the sub-system is first enabled,
@@ -170,12 +180,16 @@
 	fifo1count = tiadc_readl(adc_dev, REG_FIFO1CNT);
 	for (i = 0; i < fifo1count; i++) {
 		read = tiadc_readl(adc_dev, REG_FIFO1);
-		if (read >> 16 == step) {
-			*val = read & 0xfff;
+		stepid = read & FIFOREAD_CHNLID_MASK;
+		stepid = stepid >> 0x10;
+
+		if (stepid == map_val) {
+			read = read & FIFOREAD_DATA_MASK;
 			found = true;
+			*val = read;
 		}
 	}
-	am335x_tsc_se_update(adc_dev->mfd_tscadc);
+
 	if (found == false)
 		return -EBUSY;
 	return IIO_VAL_INT;
diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c
index ea8a414..0dd9bb8 100644
--- a/drivers/iio/industrialio-trigger.c
+++ b/drivers/iio/industrialio-trigger.c
@@ -127,12 +127,17 @@
 void iio_trigger_poll(struct iio_trigger *trig, s64 time)
 {
 	int i;
-	if (!trig->use_count)
-		for (i = 0; i < CONFIG_IIO_CONSUMERS_PER_TRIGGER; i++)
-			if (trig->subirqs[i].enabled) {
-				trig->use_count++;
+
+	if (!atomic_read(&trig->use_count)) {
+		atomic_set(&trig->use_count, CONFIG_IIO_CONSUMERS_PER_TRIGGER);
+
+		for (i = 0; i < CONFIG_IIO_CONSUMERS_PER_TRIGGER; i++) {
+			if (trig->subirqs[i].enabled)
 				generic_handle_irq(trig->subirq_base + i);
-			}
+			else
+				iio_trigger_notify_done(trig);
+		}
+	}
 }
 EXPORT_SYMBOL(iio_trigger_poll);
 
@@ -146,19 +151,24 @@
 void iio_trigger_poll_chained(struct iio_trigger *trig, s64 time)
 {
 	int i;
-	if (!trig->use_count)
-		for (i = 0; i < CONFIG_IIO_CONSUMERS_PER_TRIGGER; i++)
-			if (trig->subirqs[i].enabled) {
-				trig->use_count++;
+
+	if (!atomic_read(&trig->use_count)) {
+		atomic_set(&trig->use_count, CONFIG_IIO_CONSUMERS_PER_TRIGGER);
+
+		for (i = 0; i < CONFIG_IIO_CONSUMERS_PER_TRIGGER; i++) {
+			if (trig->subirqs[i].enabled)
 				handle_nested_irq(trig->subirq_base + i);
-			}
+			else
+				iio_trigger_notify_done(trig);
+		}
+	}
 }
 EXPORT_SYMBOL(iio_trigger_poll_chained);
 
 void iio_trigger_notify_done(struct iio_trigger *trig)
 {
-	trig->use_count--;
-	if (trig->use_count == 0 && trig->ops && trig->ops->try_reenable)
+	if (atomic_dec_and_test(&trig->use_count) && trig->ops &&
+		trig->ops->try_reenable)
 		if (trig->ops->try_reenable(trig))
 			/* Missed an interrupt so launch new poll now */
 			iio_trigger_poll(trig, 0);
diff --git a/drivers/iio/light/adjd_s311.c b/drivers/iio/light/adjd_s311.c
index 5f4749e..c1cd569 100644
--- a/drivers/iio/light/adjd_s311.c
+++ b/drivers/iio/light/adjd_s311.c
@@ -232,7 +232,8 @@
 
 	switch (mask) {
 	case IIO_CHAN_INFO_RAW:
-		ret = adjd_s311_read_data(indio_dev, chan->address, val);
+		ret = adjd_s311_read_data(indio_dev,
+			ADJD_S311_DATA_REG(chan->address), val);
 		if (ret < 0)
 			return ret;
 		return IIO_VAL_INT;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index f1c279f..7c0f953 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -423,7 +423,7 @@
 	struct sockaddr_ib *addr;
 	union ib_gid gid, sgid, *dgid;
 	u16 pkey, index;
-	u8 port, p;
+	u8 p;
 	int i;
 
 	cma_dev = NULL;
@@ -443,7 +443,7 @@
 				if (!memcmp(&gid, dgid, sizeof(gid))) {
 					cma_dev = cur_dev;
 					sgid = gid;
-					port = p;
+					id_priv->id.port_num = p;
 					goto found;
 				}
 
@@ -451,7 +451,7 @@
 						 dgid->global.subnet_prefix)) {
 					cma_dev = cur_dev;
 					sgid = gid;
-					port = p;
+					id_priv->id.port_num = p;
 				}
 			}
 		}
@@ -462,7 +462,6 @@
 
 found:
 	cma_attach_to_dev(id_priv, cma_dev);
-	id_priv->id.port_num = port;
 	addr = (struct sockaddr_ib *) cma_src_addr(id_priv);
 	memcpy(&addr->sib_addr, &sgid, sizeof sgid);
 	cma_translate_ib(addr, &id_priv->id.route.addr.dev_addr);
@@ -880,7 +879,8 @@
 {
 	struct cma_hdr *hdr;
 
-	if (listen_id->route.addr.src_addr.ss_family == AF_IB) {
+	if ((listen_id->route.addr.src_addr.ss_family == AF_IB) &&
+	    (ib_event->event == IB_CM_REQ_RECEIVED)) {
 		cma_save_ib_info(id, listen_id, ib_event->param.req_rcvd.primary_path);
 		return 0;
 	}
@@ -2677,29 +2677,32 @@
 {
 	struct ib_cm_sidr_req_param req;
 	struct ib_cm_id	*id;
+	void *private_data;
 	int offset, ret;
 
+	memset(&req, 0, sizeof req);
 	offset = cma_user_data_offset(id_priv);
 	req.private_data_len = offset + conn_param->private_data_len;
 	if (req.private_data_len < conn_param->private_data_len)
 		return -EINVAL;
 
 	if (req.private_data_len) {
-		req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
-		if (!req.private_data)
+		private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
+		if (!private_data)
 			return -ENOMEM;
 	} else {
-		req.private_data = NULL;
+		private_data = NULL;
 	}
 
 	if (conn_param->private_data && conn_param->private_data_len)
-		memcpy((void *) req.private_data + offset,
-		       conn_param->private_data, conn_param->private_data_len);
+		memcpy(private_data + offset, conn_param->private_data,
+		       conn_param->private_data_len);
 
-	if (req.private_data) {
-		ret = cma_format_hdr((void *) req.private_data, id_priv);
+	if (private_data) {
+		ret = cma_format_hdr(private_data, id_priv);
 		if (ret)
 			goto out;
+		req.private_data = private_data;
 	}
 
 	id = ib_create_cm_id(id_priv->id.device, cma_sidr_rep_handler,
@@ -2721,7 +2724,7 @@
 		id_priv->cm_id.ib = NULL;
 	}
 out:
-	kfree(req.private_data);
+	kfree(private_data);
 	return ret;
 }
 
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index dc3fd1e..4c837e6 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -2663,6 +2663,7 @@
 	int ret, i;
 	struct ib_qp_attr *attr;
 	struct ib_qp *qp;
+	u16 pkey_index;
 
 	attr = kmalloc(sizeof *attr, GFP_KERNEL);
 	if (!attr) {
@@ -2670,6 +2671,11 @@
 		return -ENOMEM;
 	}
 
+	ret = ib_find_pkey(port_priv->device, port_priv->port_num,
+			   IB_DEFAULT_PKEY_FULL, &pkey_index);
+	if (ret)
+		pkey_index = 0;
+
 	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
 		qp = port_priv->qp_info[i].qp;
 		if (!qp)
@@ -2680,7 +2686,7 @@
 		 * one is needed for the Reset to Init transition
 		 */
 		attr->qp_state = IB_QPS_INIT;
-		attr->pkey_index = 0;
+		attr->pkey_index = pkey_index;
 		attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY;
 		ret = ib_modify_qp(qp, attr, IB_QP_STATE |
 					     IB_QP_PKEY_INDEX | IB_QP_QKEY);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index e87f220..d228383 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -226,6 +226,7 @@
 			mm->len = PAGE_ALIGN(((1UL << uresp.size_log2) + 1) *
 					     sizeof(struct t3_cqe));
 			uresp.memsize = mm->len;
+			uresp.reserved = 0;
 			resplen = sizeof uresp;
 		}
 		if (ib_copy_to_udata(udata, &uresp, resplen)) {
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 2320404..a4975e1 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -1657,6 +1657,8 @@
 		if (mm5) {
 			uresp.ma_sync_key = ucontext->key;
 			ucontext->key += PAGE_SIZE;
+		} else {
+			uresp.ma_sync_key =  0;
 		}
 		uresp.sq_key = ucontext->key;
 		ucontext->key += PAGE_SIZE;
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 4d599ce..f2a3f48 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -1511,8 +1511,14 @@
 
 	memset(&attr, 0, sizeof attr);
 	attr.qp_state = IB_QPS_INIT;
-	attr.pkey_index =
-		to_mdev(ctx->ib_dev)->pkeys.virt2phys_pkey[ctx->slave][ctx->port - 1][0];
+	ret = 0;
+	if (create_tun)
+		ret = find_slave_port_pkey_ix(to_mdev(ctx->ib_dev), ctx->slave,
+					      ctx->port, IB_DEFAULT_PKEY_FULL,
+					      &attr.pkey_index);
+	if (ret || !create_tun)
+		attr.pkey_index =
+			to_mdev(ctx->ib_dev)->pkeys.virt2phys_pkey[ctx->slave][ctx->port - 1][0];
 	attr.qkey = IB_QP1_QKEY;
 	attr.port_num = ctx->port;
 	ret = ib_modify_qp(tun_qp->qp, &attr, qp_attr_mask_INIT);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 8000fff..3f831de 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -619,7 +619,8 @@
 
 	resp.tot_uuars = req.total_num_uuars;
 	resp.num_ports = dev->mdev.caps.num_ports;
-	err = ib_copy_to_udata(udata, &resp, sizeof(resp));
+	err = ib_copy_to_udata(udata, &resp,
+			       sizeof(resp) - sizeof(resp.reserved));
 	if (err)
 		goto out_uars;
 
@@ -1426,7 +1427,8 @@
 	if (err)
 		goto err_eqs;
 
-	if (ib_register_device(&dev->ib_dev, NULL))
+	err = ib_register_device(&dev->ib_dev, NULL);
+	if (err)
 		goto err_rsrc;
 
 	err = create_umr_res(dev);
@@ -1434,8 +1436,9 @@
 		goto err_dev;
 
 	for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
-		if (device_create_file(&dev->ib_dev.dev,
-				       mlx5_class_attributes[i]))
+		err = device_create_file(&dev->ib_dev.dev,
+					 mlx5_class_attributes[i]);
+		if (err)
 			goto err_umrc;
 	}
 
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 16ac54c..045f8cdb 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -199,7 +199,7 @@
 
 static int sq_overhead(enum ib_qp_type qp_type)
 {
-	int size;
+	int size = 0;
 
 	switch (qp_type) {
 	case IB_QPT_XRC_INI:
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 418004c..9020024 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -3570,10 +3570,10 @@
 	tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT;
 	iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT;
 	nes_debug(NES_DBG_AEQ, "aeid = 0x%04X, qp-cq id = %d, aeqe = %p,"
-			" Tcp state = %d, iWARP state = %d\n",
+			" Tcp state = %s, iWARP state = %s\n",
 			async_event_id,
 			le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]), aeqe,
-			tcp_state, iwarp_state);
+			nes_tcp_state_str[tcp_state], nes_iwarp_state_str[iwarp_state]);
 
 	aeqe_cq_id = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]);
 	if (aeq_info & NES_AEQE_QP) {
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 8f67fe2e..5b53ca5 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -1384,6 +1384,7 @@
 
 			if (ibpd->uobject) {
 				uresp.mmap_sq_db_index = nesqp->mmap_sq_db_index;
+				uresp.mmap_rq_db_index = 0;
 				uresp.actual_sq_size = sq_size;
 				uresp.actual_rq_size = rq_size;
 				uresp.qp_id = nesqp->hwqp.qp_id;
@@ -1767,7 +1768,7 @@
 		resp.cq_id = nescq->hw_cq.cq_number;
 		resp.cq_size = nescq->hw_cq.cq_size;
 		resp.mmap_db_index = 0;
-		if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
+		if (ib_copy_to_udata(udata, &resp, sizeof resp - sizeof resp.reserved)) {
 			nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
 			kfree(nescq);
 			return ERR_PTR(-EFAULT);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index a877a8e..f4c587c 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -29,7 +29,6 @@
 #include <net/netevent.h>
 
 #include <rdma/ib_addr.h>
-#include <rdma/ib_cache.h>
 
 #include "ocrdma.h"
 #include "ocrdma_verbs.h"
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index dcfbab1..f36630e 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -242,6 +242,7 @@
 	memset(ctx->ah_tbl.va, 0, map_len);
 	ctx->ah_tbl.len = map_len;
 
+	memset(&resp, 0, sizeof(resp));
 	resp.ah_tbl_len = ctx->ah_tbl.len;
 	resp.ah_tbl_page = ctx->ah_tbl.pa;
 
@@ -253,7 +254,6 @@
 	resp.wqe_size = dev->attr.wqe_size;
 	resp.rqe_size = dev->attr.rqe_size;
 	resp.dpp_wqe_size = dev->attr.wqe_size;
-	resp.rsvd = 0;
 
 	memcpy(resp.fw_ver, dev->attr.fw_ver, sizeof(resp.fw_ver));
 	status = ib_copy_to_udata(udata, &resp, sizeof(resp));
@@ -338,6 +338,7 @@
 	struct ocrdma_alloc_pd_uresp rsp;
 	struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ib_ctx);
 
+	memset(&rsp, 0, sizeof(rsp));
 	rsp.id = pd->id;
 	rsp.dpp_enabled = pd->dpp_enabled;
 	db_page_addr = pd->dev->nic_info.unmapped_db +
@@ -692,6 +693,7 @@
 	struct ocrdma_ucontext *uctx;
 	struct ocrdma_create_cq_uresp uresp;
 
+	memset(&uresp, 0, sizeof(uresp));
 	uresp.cq_id = cq->id;
 	uresp.page_size = cq->len;
 	uresp.num_pages = 1;
@@ -1460,6 +1462,7 @@
 	int status;
 	struct ocrdma_create_srq_uresp uresp;
 
+	memset(&uresp, 0, sizeof(uresp));
 	uresp.rq_dbid = srq->rq.dbid;
 	uresp.num_rq_pages = 1;
 	uresp.rq_page_addr[0] = srq->rq.pa;
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 21e8b09..016e742 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -1596,6 +1596,8 @@
 	struct qib_devdata *dd = ppd->dd;
 
 	errs &= QIB_E_P_SDMAERRS;
+	err_decode(ppd->cpspec->sdmamsgbuf, sizeof(ppd->cpspec->sdmamsgbuf),
+		   errs, qib_7322p_error_msgs);
 
 	if (errs & QIB_E_P_SDMAUNEXPDATA)
 		qib_dev_err(dd, "IB%u:%u SDmaUnexpData\n", dd->unit,
diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
index 32162d3..9b5322d 100644
--- a/drivers/infiniband/hw/qib/qib_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_sdma.c
@@ -717,7 +717,7 @@
 	struct qib_sdma_txreq *txp, *txpnext;
 	__le64 *descqp;
 	u64 desc[2];
-	dma_addr_t addr;
+	u64 addr;
 	u16 gen, dwlen, dwoffset;
 	u16 head, tail, cnt;
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 2cfa76f..196b1d1 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -932,12 +932,47 @@
 	return 0;
 }
 
+/*
+ * Takes whatever value which is in pkey index 0 and updates priv->pkey
+ * returns 0 if the pkey value was changed.
+ */
+static inline int update_parent_pkey(struct ipoib_dev_priv *priv)
+{
+	int result;
+	u16 prev_pkey;
+
+	prev_pkey = priv->pkey;
+	result = ib_query_pkey(priv->ca, priv->port, 0, &priv->pkey);
+	if (result) {
+		ipoib_warn(priv, "ib_query_pkey port %d failed (ret = %d)\n",
+			   priv->port, result);
+		return result;
+	}
+
+	priv->pkey |= 0x8000;
+
+	if (prev_pkey != priv->pkey) {
+		ipoib_dbg(priv, "pkey changed from 0x%x to 0x%x\n",
+			  prev_pkey, priv->pkey);
+		/*
+		 * Update the pkey in the broadcast address, while making sure to set
+		 * the full membership bit, so that we join the right broadcast group.
+		 */
+		priv->dev->broadcast[8] = priv->pkey >> 8;
+		priv->dev->broadcast[9] = priv->pkey & 0xff;
+		return 0;
+	}
+
+	return 1;
+}
+
 static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
 				enum ipoib_flush_level level)
 {
 	struct ipoib_dev_priv *cpriv;
 	struct net_device *dev = priv->dev;
 	u16 new_index;
+	int result;
 
 	mutex_lock(&priv->vlan_mutex);
 
@@ -951,6 +986,10 @@
 	mutex_unlock(&priv->vlan_mutex);
 
 	if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) {
+		/* for non-child devices must check/update the pkey value here */
+		if (level == IPOIB_FLUSH_HEAVY &&
+		    !test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
+			update_parent_pkey(priv);
 		ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
 		return;
 	}
@@ -961,21 +1000,32 @@
 	}
 
 	if (level == IPOIB_FLUSH_HEAVY) {
-		if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) {
-			clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
-			ipoib_ib_dev_down(dev, 0);
-			ipoib_ib_dev_stop(dev, 0);
-			if (ipoib_pkey_dev_delay_open(dev))
+		/* child devices chase their origin pkey value, while non-child
+		 * (parent) devices should always takes what present in pkey index 0
+		 */
+		if (test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
+			if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) {
+				clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+				ipoib_ib_dev_down(dev, 0);
+				ipoib_ib_dev_stop(dev, 0);
+				if (ipoib_pkey_dev_delay_open(dev))
+					return;
+			}
+			/* restart QP only if P_Key index is changed */
+			if (test_and_set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags) &&
+			    new_index == priv->pkey_index) {
+				ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n");
 				return;
+			}
+			priv->pkey_index = new_index;
+		} else {
+			result = update_parent_pkey(priv);
+			/* restart QP only if P_Key value changed */
+			if (result) {
+				ipoib_dbg(priv, "Not flushing - P_Key value not changed.\n");
+				return;
+			}
 		}
-
-		/* restart QP only if P_Key index is changed */
-		if (test_and_set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags) &&
-		    new_index == priv->pkey_index) {
-			ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n");
-			return;
-		}
-		priv->pkey_index = new_index;
 	}
 
 	if (level == IPOIB_FLUSH_LIGHT) {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index b6e049a..c6f71a8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1461,7 +1461,7 @@
 	if (sscanf(buf, "%i", &pkey) != 1)
 		return -EINVAL;
 
-	if (pkey < 0 || pkey > 0xffff)
+	if (pkey <= 0 || pkey > 0xffff || pkey == 0x8000)
 		return -EINVAL;
 
 	/*
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
index 7468593..f81abe1 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
@@ -119,6 +119,15 @@
 	} else
 		child_pkey  = nla_get_u16(data[IFLA_IPOIB_PKEY]);
 
+	if (child_pkey == 0 || child_pkey == 0x8000)
+		return -EINVAL;
+
+	/*
+	 * Set the full membership bit, so that we join the right
+	 * broadcast group, etc.
+	 */
+	child_pkey |= 0x8000;
+
 	err = __ipoib_vlan_add(ppriv, netdev_priv(dev), child_pkey, IPOIB_RTNL_CHILD);
 
 	if (!err && data)
diff --git a/drivers/macintosh/windfarm_rm31.c b/drivers/macintosh/windfarm_rm31.c
index 0b9a79b..82fc86a 100644
--- a/drivers/macintosh/windfarm_rm31.c
+++ b/drivers/macintosh/windfarm_rm31.c
@@ -439,15 +439,15 @@
 
 /* Slots fan */
 static const struct wf_pid_param slots_param = {
-	.interval	= 5,
-	.history_len	= 2,
-	.gd		= 30 << 20,
-	.gp		= 5 << 20,
-	.gr		= 0,
-	.itarget	= 40 << 16,
-	.additive	= 1,
-	.min		= 300,
-	.max		= 4000,
+	.interval	= 1,
+	.history_len	= 20,
+	.gd		= 0,
+	.gp		= 0,
+	.gr		= 0x00100000,
+	.itarget	= 3200000,
+	.additive	= 0,
+	.min		= 20,
+	.max		= 100,
 };
 
 static void slots_fan_tick(void)
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 048f294..e45f557 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -63,7 +63,10 @@
 #include "bcache.h"
 #include "btree.h"
 
+#include <linux/freezer.h>
+#include <linux/kthread.h>
 #include <linux/random.h>
+#include <trace/events/bcache.h>
 
 #define MAX_IN_FLIGHT_DISCARDS		8U
 
@@ -151,7 +154,7 @@
 	mutex_unlock(&ca->set->bucket_lock);
 
 	closure_wake_up(&ca->set->bucket_wait);
-	wake_up(&ca->set->alloc_wait);
+	wake_up_process(ca->alloc_thread);
 
 	closure_put(&ca->set->cl);
 }
@@ -350,38 +353,30 @@
 		break;
 	}
 
-	pr_debug("free %zu/%zu free_inc %zu/%zu unused %zu/%zu",
-		 fifo_used(&ca->free), ca->free.size,
-		 fifo_used(&ca->free_inc), ca->free_inc.size,
-		 fifo_used(&ca->unused), ca->unused.size);
+	trace_bcache_alloc_invalidate(ca);
 }
 
 #define allocator_wait(ca, cond)					\
 do {									\
-	DEFINE_WAIT(__wait);						\
-									\
 	while (1) {							\
-		prepare_to_wait(&ca->set->alloc_wait,			\
-				&__wait, TASK_INTERRUPTIBLE);		\
+		set_current_state(TASK_INTERRUPTIBLE);			\
 		if (cond)						\
 			break;						\
 									\
 		mutex_unlock(&(ca)->set->bucket_lock);			\
-		if (test_bit(CACHE_SET_STOPPING_2, &ca->set->flags)) {	\
-			finish_wait(&ca->set->alloc_wait, &__wait);	\
-			closure_return(cl);				\
-		}							\
+		if (kthread_should_stop())				\
+			return 0;					\
 									\
+		try_to_freeze();					\
 		schedule();						\
 		mutex_lock(&(ca)->set->bucket_lock);			\
 	}								\
-									\
-	finish_wait(&ca->set->alloc_wait, &__wait);			\
+	__set_current_state(TASK_RUNNING);				\
 } while (0)
 
-void bch_allocator_thread(struct closure *cl)
+static int bch_allocator_thread(void *arg)
 {
-	struct cache *ca = container_of(cl, struct cache, alloc);
+	struct cache *ca = arg;
 
 	mutex_lock(&ca->set->bucket_lock);
 
@@ -442,7 +437,7 @@
 {
 	long r = -1;
 again:
-	wake_up(&ca->set->alloc_wait);
+	wake_up_process(ca->alloc_thread);
 
 	if (fifo_used(&ca->free) > ca->watermark[watermark] &&
 	    fifo_pop(&ca->free, r)) {
@@ -476,9 +471,7 @@
 		return r;
 	}
 
-	pr_debug("alloc failure: blocked %i free %zu free_inc %zu unused %zu",
-		 atomic_read(&ca->set->prio_blocked), fifo_used(&ca->free),
-		 fifo_used(&ca->free_inc), fifo_used(&ca->unused));
+	trace_bcache_alloc_fail(ca);
 
 	if (cl) {
 		closure_wait(&ca->set->bucket_wait, cl);
@@ -552,6 +545,17 @@
 
 /* Init */
 
+int bch_cache_allocator_start(struct cache *ca)
+{
+	struct task_struct *k = kthread_run(bch_allocator_thread,
+					    ca, "bcache_allocator");
+	if (IS_ERR(k))
+		return PTR_ERR(k);
+
+	ca->alloc_thread = k;
+	return 0;
+}
+
 void bch_cache_allocator_exit(struct cache *ca)
 {
 	struct discard *d;
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index d3e15b4..b39f6f0 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -178,7 +178,6 @@
 #define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
 
 #include <linux/bio.h>
-#include <linux/blktrace_api.h>
 #include <linux/kobject.h>
 #include <linux/list.h>
 #include <linux/mutex.h>
@@ -388,8 +387,6 @@
 typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey *);
 
 struct keybuf {
-	keybuf_pred_fn		*key_predicate;
-
 	struct bkey		last_scanned;
 	spinlock_t		lock;
 
@@ -437,9 +434,12 @@
 
 	/* If nonzero, we're detaching/unregistering from cache set */
 	atomic_t		detaching;
+	int			flush_done;
 
-	atomic_long_t		sectors_dirty;
-	unsigned long		sectors_dirty_gc;
+	uint64_t		nr_stripes;
+	unsigned		stripe_size_bits;
+	atomic_t		*stripe_sectors_dirty;
+
 	unsigned long		sectors_dirty_last;
 	long			sectors_dirty_derivative;
 
@@ -531,6 +531,7 @@
 	unsigned		sequential_merge:1;
 	unsigned		verify:1;
 
+	unsigned		partial_stripes_expensive:1;
 	unsigned		writeback_metadata:1;
 	unsigned		writeback_running:1;
 	unsigned char		writeback_percent;
@@ -565,8 +566,7 @@
 
 	unsigned		watermark[WATERMARK_MAX];
 
-	struct closure		alloc;
-	struct workqueue_struct	*alloc_workqueue;
+	struct task_struct	*alloc_thread;
 
 	struct closure		prio;
 	struct prio_set		*disk_buckets;
@@ -664,13 +664,9 @@
  * CACHE_SET_STOPPING always gets set first when we're closing down a cache set;
  * we'll continue to run normally for awhile with CACHE_SET_STOPPING set (i.e.
  * flushing dirty data).
- *
- * CACHE_SET_STOPPING_2 gets set at the last phase, when it's time to shut down
- * the allocation thread.
  */
 #define CACHE_SET_UNREGISTERING		0
 #define	CACHE_SET_STOPPING		1
-#define	CACHE_SET_STOPPING_2		2
 
 struct cache_set {
 	struct closure		cl;
@@ -703,9 +699,6 @@
 	/* For the btree cache */
 	struct shrinker		shrink;
 
-	/* For the allocator itself */
-	wait_queue_head_t	alloc_wait;
-
 	/* For the btree cache and anything allocation related */
 	struct mutex		bucket_lock;
 
@@ -823,10 +816,9 @@
 
 	/*
 	 * A btree node on disk could have too many bsets for an iterator to fit
-	 * on the stack - this is a single element mempool for btree_read_work()
+	 * on the stack - have to dynamically allocate them
 	 */
-	struct mutex		fill_lock;
-	struct btree_iter	*fill_iter;
+	mempool_t		*fill_iter;
 
 	/*
 	 * btree_sort() is a merge sort and requires temporary space - single
@@ -834,6 +826,7 @@
 	 */
 	struct mutex		sort_lock;
 	struct bset		*sort;
+	unsigned		sort_crit_factor;
 
 	/* List of buckets we're currently writing data to */
 	struct list_head	data_buckets;
@@ -906,8 +899,6 @@
 	return local_clock() >> 10;
 }
 
-#define MAX_BSETS		4U
-
 #define BTREE_PRIO		USHRT_MAX
 #define INITIAL_PRIO		32768
 
@@ -1112,23 +1103,6 @@
 		atomic_dec_bug(&PTR_BUCKET(c, k, i)->pin);
 }
 
-/* Blktrace macros */
-
-#define blktrace_msg(c, fmt, ...)					\
-do {									\
-	struct request_queue *q = bdev_get_queue(c->bdev);		\
-	if (q)								\
-		blk_add_trace_msg(q, fmt, ##__VA_ARGS__);		\
-} while (0)
-
-#define blktrace_msg_all(s, fmt, ...)					\
-do {									\
-	struct cache *_c;						\
-	unsigned i;							\
-	for_each_cache(_c, (s), i)					\
-		blktrace_msg(_c, fmt, ##__VA_ARGS__);			\
-} while (0)
-
 static inline void cached_dev_put(struct cached_dev *dc)
 {
 	if (atomic_dec_and_test(&dc->count))
@@ -1173,10 +1147,16 @@
 	static struct kobj_attribute ksysfs_##n =			\
 		__ATTR(n, S_IWUSR|S_IRUSR, show, store)
 
-/* Forward declarations */
+static inline void wake_up_allocators(struct cache_set *c)
+{
+	struct cache *ca;
+	unsigned i;
 
-void bch_writeback_queue(struct cached_dev *);
-void bch_writeback_add(struct cached_dev *, unsigned);
+	for_each_cache(ca, c, i)
+		wake_up_process(ca->alloc_thread);
+}
+
+/* Forward declarations */
 
 void bch_count_io_errors(struct cache *, int, const char *);
 void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
@@ -1193,7 +1173,6 @@
 uint8_t bch_inc_gen(struct cache *, struct bucket *);
 void bch_rescale_priorities(struct cache_set *, int);
 bool bch_bucket_add_unused(struct cache *, struct bucket *);
-void bch_allocator_thread(struct closure *);
 
 long bch_bucket_alloc(struct cache *, unsigned, struct closure *);
 void bch_bucket_free(struct cache_set *, struct bkey *);
@@ -1241,9 +1220,9 @@
 struct cache_set *bch_cache_set_alloc(struct cache_sb *);
 void bch_btree_cache_free(struct cache_set *);
 int bch_btree_cache_alloc(struct cache_set *);
-void bch_cached_dev_writeback_init(struct cached_dev *);
 void bch_moving_init_cache_set(struct cache_set *);
 
+int bch_cache_allocator_start(struct cache *ca);
 void bch_cache_allocator_exit(struct cache *ca);
 int bch_cache_allocator_init(struct cache *ca);
 
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 1d27d3a..8010eed 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -78,6 +78,7 @@
 bool __bch_ptr_invalid(struct cache_set *c, int level, const struct bkey *k)
 {
 	unsigned i;
+	char buf[80];
 
 	if (level && (!KEY_PTRS(k) || !KEY_SIZE(k) || KEY_DIRTY(k)))
 		goto bad;
@@ -102,7 +103,8 @@
 
 	return false;
 bad:
-	cache_bug(c, "spotted bad key %s: %s", pkey(k), bch_ptr_status(c, k));
+	bch_bkey_to_text(buf, sizeof(buf), k);
+	cache_bug(c, "spotted bad key %s: %s", buf, bch_ptr_status(c, k));
 	return true;
 }
 
@@ -162,10 +164,16 @@
 #ifdef CONFIG_BCACHE_EDEBUG
 bug:
 	mutex_unlock(&b->c->bucket_lock);
-	btree_bug(b,
+
+	{
+		char buf[80];
+
+		bch_bkey_to_text(buf, sizeof(buf), k);
+		btree_bug(b,
 "inconsistent pointer %s: bucket %zu pin %i prio %i gen %i last_gc %i mark %llu gc_gen %i",
-		  pkey(k), PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin),
-		  g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
+			  buf, PTR_BUCKET_NR(b->c, k, i), atomic_read(&g->pin),
+			  g->prio, g->gen, g->last_gc, GC_MARK(g), g->gc_gen);
+	}
 	return true;
 #endif
 }
@@ -1084,33 +1092,39 @@
 	new->sets->size = 0;
 }
 
+#define SORT_CRIT	(4096 / sizeof(uint64_t))
+
 void bch_btree_sort_lazy(struct btree *b)
 {
-	if (b->nsets) {
-		unsigned i, j, keys = 0, total;
+	unsigned crit = SORT_CRIT;
+	int i;
 
-		for (i = 0; i <= b->nsets; i++)
-			keys += b->sets[i].data->keys;
+	/* Don't sort if nothing to do */
+	if (!b->nsets)
+		goto out;
 
-		total = keys;
+	/* If not a leaf node, always sort */
+	if (b->level) {
+		bch_btree_sort(b);
+		return;
+	}
 
-		for (j = 0; j < b->nsets; j++) {
-			if (keys * 2 < total ||
-			    keys < 1000) {
-				bch_btree_sort_partial(b, j);
-				return;
-			}
+	for (i = b->nsets - 1; i >= 0; --i) {
+		crit *= b->c->sort_crit_factor;
 
-			keys -= b->sets[j].data->keys;
-		}
-
-		/* Must sort if b->nsets == 3 or we'll overflow */
-		if (b->nsets >= (MAX_BSETS - 1) - b->level) {
-			bch_btree_sort(b);
+		if (b->sets[i].data->keys < crit) {
+			bch_btree_sort_partial(b, i);
 			return;
 		}
 	}
 
+	/* Sort if we'd overflow */
+	if (b->nsets + 1 == MAX_BSETS) {
+		bch_btree_sort(b);
+		return;
+	}
+
+out:
 	bset_build_written_tree(b);
 }
 
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index 57a9cff..ae115a2 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -1,6 +1,8 @@
 #ifndef _BCACHE_BSET_H
 #define _BCACHE_BSET_H
 
+#include <linux/slab.h>
+
 /*
  * BKEYS:
  *
@@ -142,6 +144,8 @@
 
 /* Btree key comparison/iteration */
 
+#define MAX_BSETS		4U
+
 struct btree_iter {
 	size_t size, used;
 	struct btree_iter_set {
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 7a5658f..ee37288 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -24,6 +24,7 @@
 #include "btree.h"
 #include "debug.h"
 #include "request.h"
+#include "writeback.h"
 
 #include <linux/slab.h>
 #include <linux/bitops.h>
@@ -134,44 +135,17 @@
 	return crc ^ 0xffffffffffffffffULL;
 }
 
-static void btree_bio_endio(struct bio *bio, int error)
+static void bch_btree_node_read_done(struct btree *b)
 {
-	struct closure *cl = bio->bi_private;
-	struct btree *b = container_of(cl, struct btree, io.cl);
-
-	if (error)
-		set_btree_node_io_error(b);
-
-	bch_bbio_count_io_errors(b->c, bio, error, (bio->bi_rw & WRITE)
-				 ? "writing btree" : "reading btree");
-	closure_put(cl);
-}
-
-static void btree_bio_init(struct btree *b)
-{
-	BUG_ON(b->bio);
-	b->bio = bch_bbio_alloc(b->c);
-
-	b->bio->bi_end_io	= btree_bio_endio;
-	b->bio->bi_private	= &b->io.cl;
-}
-
-void bch_btree_read_done(struct closure *cl)
-{
-	struct btree *b = container_of(cl, struct btree, io.cl);
-	struct bset *i = b->sets[0].data;
-	struct btree_iter *iter = b->c->fill_iter;
 	const char *err = "bad btree header";
-	BUG_ON(b->nsets || b->written);
+	struct bset *i = b->sets[0].data;
+	struct btree_iter *iter;
 
-	bch_bbio_free(b->bio, b->c);
-	b->bio = NULL;
-
-	mutex_lock(&b->c->fill_lock);
+	iter = mempool_alloc(b->c->fill_iter, GFP_NOWAIT);
+	iter->size = b->c->sb.bucket_size / b->c->sb.block_size;
 	iter->used = 0;
 
-	if (btree_node_io_error(b) ||
-	    !i->seq)
+	if (!i->seq)
 		goto err;
 
 	for (;
@@ -228,17 +202,8 @@
 	if (b->written < btree_blocks(b))
 		bch_bset_init_next(b);
 out:
-
-	mutex_unlock(&b->c->fill_lock);
-
-	spin_lock(&b->c->btree_read_time_lock);
-	bch_time_stats_update(&b->c->btree_read_time, b->io_start_time);
-	spin_unlock(&b->c->btree_read_time_lock);
-
-	smp_wmb(); /* read_done is our write lock */
-	set_btree_node_read_done(b);
-
-	closure_return(cl);
+	mempool_free(iter, b->c->fill_iter);
+	return;
 err:
 	set_btree_node_io_error(b);
 	bch_cache_set_error(b->c, "%s at bucket %zu, block %zu, %u keys",
@@ -247,48 +212,69 @@
 	goto out;
 }
 
-void bch_btree_read(struct btree *b)
+static void btree_node_read_endio(struct bio *bio, int error)
 {
-	BUG_ON(b->nsets || b->written);
+	struct closure *cl = bio->bi_private;
+	closure_put(cl);
+}
 
-	if (!closure_trylock(&b->io.cl, &b->c->cl))
-		BUG();
+void bch_btree_node_read(struct btree *b)
+{
+	uint64_t start_time = local_clock();
+	struct closure cl;
+	struct bio *bio;
 
-	b->io_start_time = local_clock();
+	trace_bcache_btree_read(b);
 
-	btree_bio_init(b);
-	b->bio->bi_rw	= REQ_META|READ_SYNC;
-	b->bio->bi_size	= KEY_SIZE(&b->key) << 9;
+	closure_init_stack(&cl);
 
-	bch_bio_map(b->bio, b->sets[0].data);
+	bio = bch_bbio_alloc(b->c);
+	bio->bi_rw	= REQ_META|READ_SYNC;
+	bio->bi_size	= KEY_SIZE(&b->key) << 9;
+	bio->bi_end_io	= btree_node_read_endio;
+	bio->bi_private	= &cl;
 
-	pr_debug("%s", pbtree(b));
-	trace_bcache_btree_read(b->bio);
-	bch_submit_bbio(b->bio, b->c, &b->key, 0);
+	bch_bio_map(bio, b->sets[0].data);
 
-	continue_at(&b->io.cl, bch_btree_read_done, system_wq);
+	bch_submit_bbio(bio, b->c, &b->key, 0);
+	closure_sync(&cl);
+
+	if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
+		set_btree_node_io_error(b);
+
+	bch_bbio_free(bio, b->c);
+
+	if (btree_node_io_error(b))
+		goto err;
+
+	bch_btree_node_read_done(b);
+
+	spin_lock(&b->c->btree_read_time_lock);
+	bch_time_stats_update(&b->c->btree_read_time, start_time);
+	spin_unlock(&b->c->btree_read_time_lock);
+
+	return;
+err:
+	bch_cache_set_error(b->c, "io error reading bucket %lu",
+			    PTR_BUCKET_NR(b->c, &b->key, 0));
 }
 
 static void btree_complete_write(struct btree *b, struct btree_write *w)
 {
 	if (w->prio_blocked &&
 	    !atomic_sub_return(w->prio_blocked, &b->c->prio_blocked))
-		wake_up(&b->c->alloc_wait);
+		wake_up_allocators(b->c);
 
 	if (w->journal) {
 		atomic_dec_bug(w->journal);
 		__closure_wake_up(&b->c->journal.wait);
 	}
 
-	if (w->owner)
-		closure_put(w->owner);
-
 	w->prio_blocked	= 0;
 	w->journal	= NULL;
-	w->owner	= NULL;
 }
 
-static void __btree_write_done(struct closure *cl)
+static void __btree_node_write_done(struct closure *cl)
 {
 	struct btree *b = container_of(cl, struct btree, io.cl);
 	struct btree_write *w = btree_prev_write(b);
@@ -304,7 +290,7 @@
 	closure_return(cl);
 }
 
-static void btree_write_done(struct closure *cl)
+static void btree_node_write_done(struct closure *cl)
 {
 	struct btree *b = container_of(cl, struct btree, io.cl);
 	struct bio_vec *bv;
@@ -313,10 +299,22 @@
 	__bio_for_each_segment(bv, b->bio, n, 0)
 		__free_page(bv->bv_page);
 
-	__btree_write_done(cl);
+	__btree_node_write_done(cl);
 }
 
-static void do_btree_write(struct btree *b)
+static void btree_node_write_endio(struct bio *bio, int error)
+{
+	struct closure *cl = bio->bi_private;
+	struct btree *b = container_of(cl, struct btree, io.cl);
+
+	if (error)
+		set_btree_node_io_error(b);
+
+	bch_bbio_count_io_errors(b->c, bio, error, "writing btree");
+	closure_put(cl);
+}
+
+static void do_btree_node_write(struct btree *b)
 {
 	struct closure *cl = &b->io.cl;
 	struct bset *i = b->sets[b->nsets].data;
@@ -325,15 +323,34 @@
 	i->version	= BCACHE_BSET_VERSION;
 	i->csum		= btree_csum_set(b, i);
 
-	btree_bio_init(b);
-	b->bio->bi_rw	= REQ_META|WRITE_SYNC;
-	b->bio->bi_size	= set_blocks(i, b->c) * block_bytes(b->c);
+	BUG_ON(b->bio);
+	b->bio = bch_bbio_alloc(b->c);
+
+	b->bio->bi_end_io	= btree_node_write_endio;
+	b->bio->bi_private	= &b->io.cl;
+	b->bio->bi_rw		= REQ_META|WRITE_SYNC|REQ_FUA;
+	b->bio->bi_size		= set_blocks(i, b->c) * block_bytes(b->c);
 	bch_bio_map(b->bio, i);
 
+	/*
+	 * If we're appending to a leaf node, we don't technically need FUA -
+	 * this write just needs to be persisted before the next journal write,
+	 * which will be marked FLUSH|FUA.
+	 *
+	 * Similarly if we're writing a new btree root - the pointer is going to
+	 * be in the next journal entry.
+	 *
+	 * But if we're writing a new btree node (that isn't a root) or
+	 * appending to a non leaf btree node, we need either FUA or a flush
+	 * when we write the parent with the new pointer. FUA is cheaper than a
+	 * flush, and writes appending to leaf nodes aren't blocking anything so
+	 * just make all btree node writes FUA to keep things sane.
+	 */
+
 	bkey_copy(&k.key, &b->key);
 	SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + bset_offset(b, i));
 
-	if (!bch_bio_alloc_pages(b->bio, GFP_NOIO)) {
+	if (!bio_alloc_pages(b->bio, GFP_NOIO)) {
 		int j;
 		struct bio_vec *bv;
 		void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1));
@@ -342,40 +359,41 @@
 			memcpy(page_address(bv->bv_page),
 			       base + j * PAGE_SIZE, PAGE_SIZE);
 
-		trace_bcache_btree_write(b->bio);
 		bch_submit_bbio(b->bio, b->c, &k.key, 0);
 
-		continue_at(cl, btree_write_done, NULL);
+		continue_at(cl, btree_node_write_done, NULL);
 	} else {
 		b->bio->bi_vcnt = 0;
 		bch_bio_map(b->bio, i);
 
-		trace_bcache_btree_write(b->bio);
 		bch_submit_bbio(b->bio, b->c, &k.key, 0);
 
 		closure_sync(cl);
-		__btree_write_done(cl);
+		__btree_node_write_done(cl);
 	}
 }
 
-static void __btree_write(struct btree *b)
+void bch_btree_node_write(struct btree *b, struct closure *parent)
 {
 	struct bset *i = b->sets[b->nsets].data;
 
-	BUG_ON(current->bio_list);
+	trace_bcache_btree_write(b);
 
-	closure_lock(&b->io, &b->c->cl);
+	BUG_ON(current->bio_list);
+	BUG_ON(b->written >= btree_blocks(b));
+	BUG_ON(b->written && !i->keys);
+	BUG_ON(b->sets->data->seq != i->seq);
+	bch_check_key_order(b, i);
+
 	cancel_delayed_work(&b->work);
 
+	/* If caller isn't waiting for write, parent refcount is cache set */
+	closure_lock(&b->io, parent ?: &b->c->cl);
+
 	clear_bit(BTREE_NODE_dirty,	 &b->flags);
 	change_bit(BTREE_NODE_write_idx, &b->flags);
 
-	bch_check_key_order(b, i);
-	BUG_ON(b->written && !i->keys);
-
-	do_btree_write(b);
-
-	pr_debug("%s block %i keys %i", pbtree(b), b->written, i->keys);
+	do_btree_node_write(b);
 
 	b->written += set_blocks(i, b->c);
 	atomic_long_add(set_blocks(i, b->c) * b->c->sb.block_size,
@@ -387,37 +405,31 @@
 		bch_bset_init_next(b);
 }
 
-static void btree_write_work(struct work_struct *w)
+static void btree_node_write_work(struct work_struct *w)
 {
 	struct btree *b = container_of(to_delayed_work(w), struct btree, work);
 
-	down_write(&b->lock);
+	rw_lock(true, b, b->level);
 
 	if (btree_node_dirty(b))
-		__btree_write(b);
-	up_write(&b->lock);
+		bch_btree_node_write(b, NULL);
+	rw_unlock(true, b);
 }
 
-void bch_btree_write(struct btree *b, bool now, struct btree_op *op)
+static void bch_btree_leaf_dirty(struct btree *b, struct btree_op *op)
 {
 	struct bset *i = b->sets[b->nsets].data;
 	struct btree_write *w = btree_current_write(b);
 
-	BUG_ON(b->written &&
-	       (b->written >= btree_blocks(b) ||
-		i->seq != b->sets[0].data->seq ||
-		!i->keys));
+	BUG_ON(!b->written);
+	BUG_ON(!i->keys);
 
-	if (!btree_node_dirty(b)) {
-		set_btree_node_dirty(b);
-		queue_delayed_work(btree_io_wq, &b->work,
-				   msecs_to_jiffies(30000));
-	}
+	if (!btree_node_dirty(b))
+		queue_delayed_work(btree_io_wq, &b->work, 30 * HZ);
 
-	w->prio_blocked += b->prio_blocked;
-	b->prio_blocked = 0;
+	set_btree_node_dirty(b);
 
-	if (op && op->journal && !b->level) {
+	if (op && op->journal) {
 		if (w->journal &&
 		    journal_pin_cmp(b->c, w, op)) {
 			atomic_dec_bug(w->journal);
@@ -430,23 +442,10 @@
 		}
 	}
 
-	if (current->bio_list)
-		return;
-
 	/* Force write if set is too big */
-	if (now ||
-	    b->level ||
-	    set_bytes(i) > PAGE_SIZE - 48) {
-		if (op && now) {
-			/* Must wait on multiple writes */
-			BUG_ON(w->owner);
-			w->owner = &op->cl;
-			closure_get(&op->cl);
-		}
-
-		__btree_write(b);
-	}
-	BUG_ON(!b->written);
+	if (set_bytes(i) > PAGE_SIZE - 48 &&
+	    !current->bio_list)
+		bch_btree_node_write(b, NULL);
 }
 
 /*
@@ -559,7 +558,7 @@
 	init_rwsem(&b->lock);
 	lockdep_set_novalidate_class(&b->lock);
 	INIT_LIST_HEAD(&b->list);
-	INIT_DELAYED_WORK(&b->work, btree_write_work);
+	INIT_DELAYED_WORK(&b->work, btree_node_write_work);
 	b->c = c;
 	closure_init_unlocked(&b->io);
 
@@ -582,7 +581,7 @@
 	BUG_ON(btree_node_dirty(b) && !b->sets[0].data);
 
 	if (cl && btree_node_dirty(b))
-		bch_btree_write(b, true, NULL);
+		bch_btree_node_write(b, NULL);
 
 	if (cl)
 		closure_wait_event_async(&b->io.wait, cl,
@@ -623,6 +622,13 @@
 	else if (!mutex_trylock(&c->bucket_lock))
 		return -1;
 
+	/*
+	 * It's _really_ critical that we don't free too many btree nodes - we
+	 * have to always leave ourselves a reserve. The reserve is how we
+	 * guarantee that allocating memory for a new btree node can always
+	 * succeed, so that inserting keys into the btree can always succeed and
+	 * IO can always make forward progress:
+	 */
 	nr /= c->btree_pages;
 	nr = min_t(unsigned long, nr, mca_can_free(c));
 
@@ -766,6 +772,8 @@
 	int ret = -ENOMEM;
 	struct btree *i;
 
+	trace_bcache_btree_cache_cannibalize(c);
+
 	if (!cl)
 		return ERR_PTR(-ENOMEM);
 
@@ -784,7 +792,6 @@
 		return ERR_PTR(-EAGAIN);
 	}
 
-	/* XXX: tracepoint */
 	c->try_harder = cl;
 	c->try_harder_start = local_clock();
 retry:
@@ -905,6 +912,9 @@
 	b = mca_find(c, k);
 
 	if (!b) {
+		if (current->bio_list)
+			return ERR_PTR(-EAGAIN);
+
 		mutex_lock(&c->bucket_lock);
 		b = mca_alloc(c, k, level, &op->cl);
 		mutex_unlock(&c->bucket_lock);
@@ -914,7 +924,7 @@
 		if (IS_ERR(b))
 			return b;
 
-		bch_btree_read(b);
+		bch_btree_node_read(b);
 
 		if (!write)
 			downgrade_write(&b->lock);
@@ -937,15 +947,12 @@
 	for (; i <= b->nsets; i++)
 		prefetch(b->sets[i].data);
 
-	if (!closure_wait_event(&b->io.wait, &op->cl,
-				btree_node_read_done(b))) {
+	if (btree_node_io_error(b)) {
 		rw_unlock(write, b);
-		b = ERR_PTR(-EAGAIN);
-	} else if (btree_node_io_error(b)) {
-		rw_unlock(write, b);
-		b = ERR_PTR(-EIO);
-	} else
-		BUG_ON(!b->written);
+		return ERR_PTR(-EIO);
+	}
+
+	BUG_ON(!b->written);
 
 	return b;
 }
@@ -959,7 +966,7 @@
 	mutex_unlock(&c->bucket_lock);
 
 	if (!IS_ERR_OR_NULL(b)) {
-		bch_btree_read(b);
+		bch_btree_node_read(b);
 		rw_unlock(true, b);
 	}
 }
@@ -970,24 +977,19 @@
 {
 	unsigned i;
 
+	trace_bcache_btree_node_free(b);
+
 	/*
 	 * The BUG_ON() in btree_node_get() implies that we must have a write
 	 * lock on parent to free or even invalidate a node
 	 */
 	BUG_ON(op->lock <= b->level);
 	BUG_ON(b == b->c->root);
-	pr_debug("bucket %s", pbtree(b));
 
 	if (btree_node_dirty(b))
 		btree_complete_write(b, btree_current_write(b));
 	clear_bit(BTREE_NODE_dirty, &b->flags);
 
-	if (b->prio_blocked &&
-	    !atomic_sub_return(b->prio_blocked, &b->c->prio_blocked))
-		wake_up(&b->c->alloc_wait);
-
-	b->prio_blocked = 0;
-
 	cancel_delayed_work(&b->work);
 
 	mutex_lock(&b->c->bucket_lock);
@@ -1028,17 +1030,20 @@
 		goto retry;
 	}
 
-	set_btree_node_read_done(b);
 	b->accessed = 1;
 	bch_bset_init_next(b);
 
 	mutex_unlock(&c->bucket_lock);
+
+	trace_bcache_btree_node_alloc(b);
 	return b;
 err_free:
 	bch_bucket_free(c, &k.key);
 	__bkey_put(c, &k.key);
 err:
 	mutex_unlock(&c->bucket_lock);
+
+	trace_bcache_btree_node_alloc_fail(b);
 	return b;
 }
 
@@ -1137,11 +1142,8 @@
 		gc->nkeys++;
 
 		gc->data += KEY_SIZE(k);
-		if (KEY_DIRTY(k)) {
+		if (KEY_DIRTY(k))
 			gc->dirty += KEY_SIZE(k);
-			if (d)
-				d->sectors_dirty_gc += KEY_SIZE(k);
-		}
 	}
 
 	for (t = b->sets; t <= &b->sets[b->nsets]; t++)
@@ -1166,14 +1168,11 @@
 
 	if (!IS_ERR_OR_NULL(n)) {
 		swap(b, n);
+		__bkey_put(b->c, &b->key);
 
 		memcpy(k->ptr, b->key.ptr,
 		       sizeof(uint64_t) * KEY_PTRS(&b->key));
 
-		__bkey_put(b->c, &b->key);
-		atomic_inc(&b->c->prio_blocked);
-		b->prio_blocked++;
-
 		btree_node_free(n, op);
 		up_write(&n->lock);
 	}
@@ -1278,7 +1277,7 @@
 	btree_node_free(r->b, op);
 	up_write(&r->b->lock);
 
-	pr_debug("coalesced %u nodes", nodes);
+	trace_bcache_btree_gc_coalesce(nodes);
 
 	gc->nodes--;
 	nodes--;
@@ -1293,14 +1292,9 @@
 	void write(struct btree *r)
 	{
 		if (!r->written)
-			bch_btree_write(r, true, op);
-		else if (btree_node_dirty(r)) {
-			BUG_ON(btree_current_write(r)->owner);
-			btree_current_write(r)->owner = writes;
-			closure_get(writes);
-
-			bch_btree_write(r, true, NULL);
-		}
+			bch_btree_node_write(r, &op->cl);
+		else if (btree_node_dirty(r))
+			bch_btree_node_write(r, writes);
 
 		up_write(&r->lock);
 	}
@@ -1386,9 +1380,7 @@
 		ret = btree_gc_recurse(b, op, writes, gc);
 
 	if (!b->written || btree_node_dirty(b)) {
-		atomic_inc(&b->c->prio_blocked);
-		b->prio_blocked++;
-		bch_btree_write(b, true, n ? op : NULL);
+		bch_btree_node_write(b, n ? &op->cl : NULL);
 	}
 
 	if (!IS_ERR_OR_NULL(n)) {
@@ -1405,7 +1397,6 @@
 {
 	struct cache *ca;
 	struct bucket *b;
-	struct bcache_device **d;
 	unsigned i;
 
 	if (!c->gc_mark_valid)
@@ -1419,16 +1410,12 @@
 	for_each_cache(ca, c, i)
 		for_each_bucket(b, ca) {
 			b->gc_gen = b->gen;
-			if (!atomic_read(&b->pin))
+			if (!atomic_read(&b->pin)) {
 				SET_GC_MARK(b, GC_MARK_RECLAIMABLE);
+				SET_GC_SECTORS_USED(b, 0);
+			}
 		}
 
-	for (d = c->devices;
-	     d < c->devices + c->nr_uuids;
-	     d++)
-		if (*d)
-			(*d)->sectors_dirty_gc = 0;
-
 	mutex_unlock(&c->bucket_lock);
 }
 
@@ -1437,7 +1424,6 @@
 	size_t available = 0;
 	struct bucket *b;
 	struct cache *ca;
-	struct bcache_device **d;
 	unsigned i;
 
 	mutex_lock(&c->bucket_lock);
@@ -1480,22 +1466,6 @@
 		}
 	}
 
-	for (d = c->devices;
-	     d < c->devices + c->nr_uuids;
-	     d++)
-		if (*d) {
-			unsigned long last =
-				atomic_long_read(&((*d)->sectors_dirty));
-			long difference = (*d)->sectors_dirty_gc - last;
-
-			pr_debug("sectors dirty off by %li", difference);
-
-			(*d)->sectors_dirty_last += difference;
-
-			atomic_long_set(&((*d)->sectors_dirty),
-					(*d)->sectors_dirty_gc);
-		}
-
 	mutex_unlock(&c->bucket_lock);
 	return available;
 }
@@ -1508,10 +1478,9 @@
 	struct gc_stat stats;
 	struct closure writes;
 	struct btree_op op;
-
 	uint64_t start_time = local_clock();
-	trace_bcache_gc_start(c->sb.set_uuid);
-	blktrace_msg_all(c, "Starting gc");
+
+	trace_bcache_gc_start(c);
 
 	memset(&stats, 0, sizeof(struct gc_stat));
 	closure_init_stack(&writes);
@@ -1520,14 +1489,14 @@
 
 	btree_gc_start(c);
 
+	atomic_inc(&c->prio_blocked);
+
 	ret = btree_root(gc_root, c, &op, &writes, &stats);
 	closure_sync(&op.cl);
 	closure_sync(&writes);
 
 	if (ret) {
-		blktrace_msg_all(c, "Stopped gc");
 		pr_warn("gc failed!");
-
 		continue_at(cl, bch_btree_gc, bch_gc_wq);
 	}
 
@@ -1537,6 +1506,9 @@
 
 	available = bch_btree_gc_finish(c);
 
+	atomic_dec(&c->prio_blocked);
+	wake_up_allocators(c);
+
 	bch_time_stats_update(&c->btree_gc_time, start_time);
 
 	stats.key_bytes *= sizeof(uint64_t);
@@ -1544,10 +1516,8 @@
 	stats.data	<<= 9;
 	stats.in_use	= (c->nbuckets - available) * 100 / c->nbuckets;
 	memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat));
-	blktrace_msg_all(c, "Finished gc");
 
-	trace_bcache_gc_end(c->sb.set_uuid);
-	wake_up(&c->alloc_wait);
+	trace_bcache_gc_end(c);
 
 	continue_at(cl, bch_moving_gc, bch_gc_wq);
 }
@@ -1654,14 +1624,14 @@
 				    struct btree_iter *iter,
 				    struct btree_op *op)
 {
-	void subtract_dirty(struct bkey *k, int sectors)
+	void subtract_dirty(struct bkey *k, uint64_t offset, int sectors)
 	{
-		struct bcache_device *d = b->c->devices[KEY_INODE(k)];
-
-		if (KEY_DIRTY(k) && d)
-			atomic_long_sub(sectors, &d->sectors_dirty);
+		if (KEY_DIRTY(k))
+			bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k),
+						     offset, -sectors);
 	}
 
+	uint64_t old_offset;
 	unsigned old_size, sectors_found = 0;
 
 	while (1) {
@@ -1673,6 +1643,7 @@
 		if (bkey_cmp(k, &START_KEY(insert)) <= 0)
 			continue;
 
+		old_offset = KEY_START(k);
 		old_size = KEY_SIZE(k);
 
 		/*
@@ -1728,7 +1699,7 @@
 
 			struct bkey *top;
 
-			subtract_dirty(k, KEY_SIZE(insert));
+			subtract_dirty(k, KEY_START(insert), KEY_SIZE(insert));
 
 			if (bkey_written(b, k)) {
 				/*
@@ -1775,7 +1746,7 @@
 			}
 		}
 
-		subtract_dirty(k, old_size - KEY_SIZE(k));
+		subtract_dirty(k, old_offset, old_size - KEY_SIZE(k));
 	}
 
 check_failed:
@@ -1798,7 +1769,7 @@
 {
 	struct bset *i = b->sets[b->nsets].data;
 	struct bkey *m, *prev;
-	const char *status = "insert";
+	unsigned status = BTREE_INSERT_STATUS_INSERT;
 
 	BUG_ON(bkey_cmp(k, &b->key) > 0);
 	BUG_ON(b->level && !KEY_PTRS(k));
@@ -1831,17 +1802,17 @@
 			goto insert;
 
 		/* prev is in the tree, if we merge we're done */
-		status = "back merging";
+		status = BTREE_INSERT_STATUS_BACK_MERGE;
 		if (prev &&
 		    bch_bkey_try_merge(b, prev, k))
 			goto merged;
 
-		status = "overwrote front";
+		status = BTREE_INSERT_STATUS_OVERWROTE;
 		if (m != end(i) &&
 		    KEY_PTRS(m) == KEY_PTRS(k) && !KEY_SIZE(m))
 			goto copy;
 
-		status = "front merge";
+		status = BTREE_INSERT_STATUS_FRONT_MERGE;
 		if (m != end(i) &&
 		    bch_bkey_try_merge(b, k, m))
 			goto copy;
@@ -1851,21 +1822,21 @@
 insert:	shift_keys(b, m, k);
 copy:	bkey_copy(m, k);
 merged:
-	bch_check_keys(b, "%s for %s at %s: %s", status,
-		       op_type(op), pbtree(b), pkey(k));
-	bch_check_key_order_msg(b, i, "%s for %s at %s: %s", status,
-				op_type(op), pbtree(b), pkey(k));
+	if (KEY_DIRTY(k))
+		bcache_dev_sectors_dirty_add(b->c, KEY_INODE(k),
+					     KEY_START(k), KEY_SIZE(k));
+
+	bch_check_keys(b, "%u for %s", status, op_type(op));
 
 	if (b->level && !KEY_OFFSET(k))
-		b->prio_blocked++;
+		btree_current_write(b)->prio_blocked++;
 
-	pr_debug("%s for %s at %s: %s", status,
-		 op_type(op), pbtree(b), pkey(k));
+	trace_bcache_btree_insert_key(b, k, op->type, status);
 
 	return true;
 }
 
-bool bch_btree_insert_keys(struct btree *b, struct btree_op *op)
+static bool bch_btree_insert_keys(struct btree *b, struct btree_op *op)
 {
 	bool ret = false;
 	struct bkey *k;
@@ -1896,7 +1867,7 @@
 	    should_split(b))
 		goto out;
 
-	op->replace = KEY(op->inode, bio_end(bio), bio_sectors(bio));
+	op->replace = KEY(op->inode, bio_end_sector(bio), bio_sectors(bio));
 
 	SET_KEY_PTRS(&op->replace, 1);
 	get_random_bytes(&op->replace.ptr[0], sizeof(uint64_t));
@@ -1907,7 +1878,6 @@
 
 	BUG_ON(op->type != BTREE_INSERT);
 	BUG_ON(!btree_insert_key(b, op, &tmp.k));
-	bch_btree_write(b, false, NULL);
 	ret = true;
 out:
 	downgrade_write(&b->lock);
@@ -1929,12 +1899,11 @@
 
 	split = set_blocks(n1->sets[0].data, n1->c) > (btree_blocks(b) * 4) / 5;
 
-	pr_debug("%ssplitting at %s keys %i", split ? "" : "not ",
-		 pbtree(b), n1->sets[0].data->keys);
-
 	if (split) {
 		unsigned keys = 0;
 
+		trace_bcache_btree_node_split(b, n1->sets[0].data->keys);
+
 		n2 = bch_btree_node_alloc(b->c, b->level, &op->cl);
 		if (IS_ERR(n2))
 			goto err_free1;
@@ -1967,18 +1936,21 @@
 		bkey_copy_key(&n2->key, &b->key);
 
 		bch_keylist_add(&op->keys, &n2->key);
-		bch_btree_write(n2, true, op);
+		bch_btree_node_write(n2, &op->cl);
 		rw_unlock(true, n2);
-	} else
+	} else {
+		trace_bcache_btree_node_compact(b, n1->sets[0].data->keys);
+
 		bch_btree_insert_keys(n1, op);
+	}
 
 	bch_keylist_add(&op->keys, &n1->key);
-	bch_btree_write(n1, true, op);
+	bch_btree_node_write(n1, &op->cl);
 
 	if (n3) {
 		bkey_copy_key(&n3->key, &MAX_KEY);
 		bch_btree_insert_keys(n3, op);
-		bch_btree_write(n3, true, op);
+		bch_btree_node_write(n3, &op->cl);
 
 		closure_sync(&op->cl);
 		bch_btree_set_root(n3);
@@ -2082,8 +2054,12 @@
 
 		BUG_ON(write_block(b) != b->sets[b->nsets].data);
 
-		if (bch_btree_insert_keys(b, op))
-			bch_btree_write(b, false, op);
+		if (bch_btree_insert_keys(b, op)) {
+			if (!b->level)
+				bch_btree_leaf_dirty(b, op);
+			else
+				bch_btree_node_write(b, &op->cl);
+		}
 	}
 
 	return 0;
@@ -2140,6 +2116,11 @@
 void bch_btree_set_root(struct btree *b)
 {
 	unsigned i;
+	struct closure cl;
+
+	closure_init_stack(&cl);
+
+	trace_bcache_btree_set_root(b);
 
 	BUG_ON(!b->written);
 
@@ -2153,8 +2134,8 @@
 	b->c->root = b;
 	__bkey_put(b->c, &b->key);
 
-	bch_journal_meta(b->c, NULL);
-	pr_debug("%s for %pf", pbtree(b), __builtin_return_address(0));
+	bch_journal_meta(b->c, &cl);
+	closure_sync(&cl);
 }
 
 /* Cache lookup */
@@ -2215,9 +2196,6 @@
 					 KEY_OFFSET(k) - bio->bi_sector);
 
 		n = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split);
-		if (!n)
-			return -EAGAIN;
-
 		if (n == bio)
 			op->lookup_done = true;
 
@@ -2240,7 +2218,6 @@
 		n->bi_end_io	= bch_cache_read_endio;
 		n->bi_private	= &s->cl;
 
-		trace_bcache_cache_hit(n);
 		__bch_submit_bbio(n, b->c);
 	}
 
@@ -2257,9 +2234,6 @@
 	struct btree_iter iter;
 	bch_btree_iter_init(b, &iter, &KEY(op->inode, bio->bi_sector, 0));
 
-	pr_debug("at %s searching for %u:%llu", pbtree(b), op->inode,
-		 (uint64_t) bio->bi_sector);
-
 	do {
 		k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad);
 		if (!k) {
@@ -2303,7 +2277,8 @@
 }
 
 static int bch_btree_refill_keybuf(struct btree *b, struct btree_op *op,
-				   struct keybuf *buf, struct bkey *end)
+				   struct keybuf *buf, struct bkey *end,
+				   keybuf_pred_fn *pred)
 {
 	struct btree_iter iter;
 	bch_btree_iter_init(b, &iter, &buf->last_scanned);
@@ -2322,11 +2297,9 @@
 			if (bkey_cmp(&buf->last_scanned, end) >= 0)
 				break;
 
-			if (buf->key_predicate(buf, k)) {
+			if (pred(buf, k)) {
 				struct keybuf_key *w;
 
-				pr_debug("%s", pkey(k));
-
 				spin_lock(&buf->lock);
 
 				w = array_alloc(&buf->freelist);
@@ -2343,7 +2316,7 @@
 			if (!k)
 				break;
 
-			btree(refill_keybuf, k, b, op, buf, end);
+			btree(refill_keybuf, k, b, op, buf, end, pred);
 			/*
 			 * Might get an error here, but can't really do anything
 			 * and it'll get logged elsewhere. Just read what we
@@ -2361,7 +2334,7 @@
 }
 
 void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf,
-			  struct bkey *end)
+		       struct bkey *end, keybuf_pred_fn *pred)
 {
 	struct bkey start = buf->last_scanned;
 	struct btree_op op;
@@ -2369,7 +2342,7 @@
 
 	cond_resched();
 
-	btree_root(refill_keybuf, c, &op, buf, end);
+	btree_root(refill_keybuf, c, &op, buf, end, pred);
 	closure_sync(&op.cl);
 
 	pr_debug("found %s keys from %llu:%llu to %llu:%llu",
@@ -2455,7 +2428,8 @@
 
 struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c,
 					     struct keybuf *buf,
-					     struct bkey *end)
+					     struct bkey *end,
+					     keybuf_pred_fn *pred)
 {
 	struct keybuf_key *ret;
 
@@ -2469,15 +2443,14 @@
 			break;
 		}
 
-		bch_refill_keybuf(c, buf, end);
+		bch_refill_keybuf(c, buf, end, pred);
 	}
 
 	return ret;
 }
 
-void bch_keybuf_init(struct keybuf *buf, keybuf_pred_fn *fn)
+void bch_keybuf_init(struct keybuf *buf)
 {
-	buf->key_predicate	= fn;
 	buf->last_scanned	= MAX_KEY;
 	buf->keys		= RB_ROOT;
 
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index af4a709..3333d37 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -102,7 +102,6 @@
 #include "debug.h"
 
 struct btree_write {
-	struct closure		*owner;
 	atomic_t		*journal;
 
 	/* If btree_split() frees a btree node, it writes a new pointer to that
@@ -142,16 +141,12 @@
 	 */
 	struct bset_tree	sets[MAX_BSETS];
 
-	/* Used to refcount bio splits, also protects b->bio */
+	/* For outstanding btree writes, used as a lock - protects write_idx */
 	struct closure_with_waitlist	io;
 
-	/* Gets transferred to w->prio_blocked - see the comment there */
-	int			prio_blocked;
-
 	struct list_head	list;
 	struct delayed_work	work;
 
-	uint64_t		io_start_time;
 	struct btree_write	writes[2];
 	struct bio		*bio;
 };
@@ -164,13 +159,11 @@
 {	set_bit(BTREE_NODE_ ## flag, &b->flags); }			\
 
 enum btree_flags {
-	BTREE_NODE_read_done,
 	BTREE_NODE_io_error,
 	BTREE_NODE_dirty,
 	BTREE_NODE_write_idx,
 };
 
-BTREE_FLAG(read_done);
 BTREE_FLAG(io_error);
 BTREE_FLAG(dirty);
 BTREE_FLAG(write_idx);
@@ -278,6 +271,13 @@
 	BKEY_PADDED(replace);
 };
 
+enum {
+	BTREE_INSERT_STATUS_INSERT,
+	BTREE_INSERT_STATUS_BACK_MERGE,
+	BTREE_INSERT_STATUS_OVERWROTE,
+	BTREE_INSERT_STATUS_FRONT_MERGE,
+};
+
 void bch_btree_op_init_stack(struct btree_op *);
 
 static inline void rw_lock(bool w, struct btree *b, int level)
@@ -293,9 +293,7 @@
 #ifdef CONFIG_BCACHE_EDEBUG
 	unsigned i;
 
-	if (w &&
-	    b->key.ptr[0] &&
-	    btree_node_read_done(b))
+	if (w && b->key.ptr[0])
 		for (i = 0; i <= b->nsets; i++)
 			bch_check_key_order(b, b->sets[i].data);
 #endif
@@ -370,9 +368,8 @@
 		 > btree_blocks(b));
 }
 
-void bch_btree_read_done(struct closure *);
-void bch_btree_read(struct btree *);
-void bch_btree_write(struct btree *b, bool now, struct btree_op *op);
+void bch_btree_node_read(struct btree *);
+void bch_btree_node_write(struct btree *, struct closure *);
 
 void bch_cannibalize_unlock(struct cache_set *, struct closure *);
 void bch_btree_set_root(struct btree *);
@@ -380,7 +377,6 @@
 struct btree *bch_btree_node_get(struct cache_set *, struct bkey *,
 				int, struct btree_op *);
 
-bool bch_btree_insert_keys(struct btree *, struct btree_op *);
 bool bch_btree_insert_check_key(struct btree *, struct btree_op *,
 				   struct bio *);
 int bch_btree_insert(struct btree_op *, struct cache_set *);
@@ -393,13 +389,14 @@
 int bch_btree_check(struct cache_set *, struct btree_op *);
 uint8_t __bch_btree_mark_key(struct cache_set *, int, struct bkey *);
 
-void bch_keybuf_init(struct keybuf *, keybuf_pred_fn *);
-void bch_refill_keybuf(struct cache_set *, struct keybuf *, struct bkey *);
+void bch_keybuf_init(struct keybuf *);
+void bch_refill_keybuf(struct cache_set *, struct keybuf *, struct bkey *,
+		       keybuf_pred_fn *);
 bool bch_keybuf_check_overlapping(struct keybuf *, struct bkey *,
 				  struct bkey *);
 void bch_keybuf_del(struct keybuf *, struct keybuf_key *);
 struct keybuf_key *bch_keybuf_next(struct keybuf *);
-struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *,
-					  struct keybuf *, struct bkey *);
+struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *, struct keybuf *,
+					  struct bkey *, keybuf_pred_fn *);
 
 #endif
diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
index bd05a9a..9aba201 100644
--- a/drivers/md/bcache/closure.c
+++ b/drivers/md/bcache/closure.c
@@ -66,16 +66,18 @@
 		} else {
 			struct closure *parent = cl->parent;
 			struct closure_waitlist *wait = closure_waitlist(cl);
+			closure_fn *destructor = cl->fn;
 
 			closure_debug_destroy(cl);
 
+			smp_mb();
 			atomic_set(&cl->remaining, -1);
 
 			if (wait)
 				closure_wake_up(wait);
 
-			if (cl->fn)
-				cl->fn(cl);
+			if (destructor)
+				destructor(cl);
 
 			if (parent)
 				closure_put(parent);
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index 89fd520..88e6411 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -47,11 +47,10 @@
 	return "";
 }
 
-struct keyprint_hack bch_pkey(const struct bkey *k)
+int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k)
 {
 	unsigned i = 0;
-	struct keyprint_hack r;
-	char *out = r.s, *end = r.s + KEYHACK_SIZE;
+	char *out = buf, *end = buf + size;
 
 #define p(...)	(out += scnprintf(out, end - out, __VA_ARGS__))
 
@@ -75,16 +74,14 @@
 	if (KEY_CSUM(k))
 		p(" cs%llu %llx", KEY_CSUM(k), k->ptr[1]);
 #undef p
-	return r;
+	return out - buf;
 }
 
-struct keyprint_hack bch_pbtree(const struct btree *b)
+int bch_btree_to_text(char *buf, size_t size, const struct btree *b)
 {
-	struct keyprint_hack r;
-
-	snprintf(r.s, 40, "%zu level %i/%i", PTR_BUCKET_NR(b->c, &b->key, 0),
-		 b->level, b->c->root ? b->c->root->level : -1);
-	return r;
+	return scnprintf(buf, size, "%zu level %i/%i",
+			 PTR_BUCKET_NR(b->c, &b->key, 0),
+			 b->level, b->c->root ? b->c->root->level : -1);
 }
 
 #if defined(CONFIG_BCACHE_DEBUG) || defined(CONFIG_BCACHE_EDEBUG)
@@ -100,10 +97,12 @@
 {
 	struct bkey *k;
 	unsigned j;
+	char buf[80];
 
 	for (k = i->start; k < end(i); k = bkey_next(k)) {
+		bch_bkey_to_text(buf, sizeof(buf), k);
 		printk(KERN_ERR "block %zu key %zi/%u: %s", index(i, b),
-		       (uint64_t *) k - i->d, i->keys, pkey(k));
+		       (uint64_t *) k - i->d, i->keys, buf);
 
 		for (j = 0; j < KEY_PTRS(k); j++) {
 			size_t n = PTR_BUCKET_NR(b->c, k, j);
@@ -144,7 +143,7 @@
 	v->written = 0;
 	v->level = b->level;
 
-	bch_btree_read(v);
+	bch_btree_node_read(v);
 	closure_wait_event(&v->io.wait, &cl,
 			   atomic_read(&b->io.cl.remaining) == -1);
 
@@ -200,7 +199,7 @@
 	if (!check)
 		return;
 
-	if (bch_bio_alloc_pages(check, GFP_NOIO))
+	if (bio_alloc_pages(check, GFP_NOIO))
 		goto out_put;
 
 	check->bi_rw		= READ_SYNC;
@@ -252,6 +251,7 @@
 				   va_list args)
 {
 	unsigned i;
+	char buf[80];
 
 	console_lock();
 
@@ -262,7 +262,8 @@
 
 	console_unlock();
 
-	panic("at %s\n", pbtree(b));
+	bch_btree_to_text(buf, sizeof(buf), b);
+	panic("at %s\n", buf);
 }
 
 void bch_check_key_order_msg(struct btree *b, struct bset *i,
@@ -337,6 +338,7 @@
 {
 	struct dump_iterator *i = file->private_data;
 	ssize_t ret = 0;
+	char kbuf[80];
 
 	while (size) {
 		struct keybuf_key *w;
@@ -355,11 +357,12 @@
 		if (i->bytes)
 			break;
 
-		w = bch_keybuf_next_rescan(i->c, &i->keys, &MAX_KEY);
+		w = bch_keybuf_next_rescan(i->c, &i->keys, &MAX_KEY, dump_pred);
 		if (!w)
 			break;
 
-		i->bytes = snprintf(i->buf, PAGE_SIZE, "%s\n", pkey(&w->key));
+		bch_bkey_to_text(kbuf, sizeof(kbuf), &w->key);
+		i->bytes = snprintf(i->buf, PAGE_SIZE, "%s\n", kbuf);
 		bch_keybuf_del(&i->keys, w);
 	}
 
@@ -377,7 +380,7 @@
 
 	file->private_data = i;
 	i->c = c;
-	bch_keybuf_init(&i->keys, dump_pred);
+	bch_keybuf_init(&i->keys);
 	i->keys.last_scanned = KEY(0, 0, 0);
 
 	return 0;
@@ -409,142 +412,6 @@
 
 #endif
 
-/* Fuzz tester has rotted: */
-#if 0
-
-static ssize_t btree_fuzz(struct kobject *k, struct kobj_attribute *a,
-			  const char *buffer, size_t size)
-{
-	void dump(struct btree *b)
-	{
-		struct bset *i;
-
-		for (i = b->sets[0].data;
-		     index(i, b) < btree_blocks(b) &&
-		     i->seq == b->sets[0].data->seq;
-		     i = ((void *) i) + set_blocks(i, b->c) * block_bytes(b->c))
-			dump_bset(b, i);
-	}
-
-	struct cache_sb *sb;
-	struct cache_set *c;
-	struct btree *all[3], *b, *fill, *orig;
-	int j;
-
-	struct btree_op op;
-	bch_btree_op_init_stack(&op);
-
-	sb = kzalloc(sizeof(struct cache_sb), GFP_KERNEL);
-	if (!sb)
-		return -ENOMEM;
-
-	sb->bucket_size = 128;
-	sb->block_size = 4;
-
-	c = bch_cache_set_alloc(sb);
-	if (!c)
-		return -ENOMEM;
-
-	for (j = 0; j < 3; j++) {
-		BUG_ON(list_empty(&c->btree_cache));
-		all[j] = list_first_entry(&c->btree_cache, struct btree, list);
-		list_del_init(&all[j]->list);
-
-		all[j]->key = KEY(0, 0, c->sb.bucket_size);
-		bkey_copy_key(&all[j]->key, &MAX_KEY);
-	}
-
-	b = all[0];
-	fill = all[1];
-	orig = all[2];
-
-	while (1) {
-		for (j = 0; j < 3; j++)
-			all[j]->written = all[j]->nsets = 0;
-
-		bch_bset_init_next(b);
-
-		while (1) {
-			struct bset *i = write_block(b);
-			struct bkey *k = op.keys.top;
-			unsigned rand;
-
-			bkey_init(k);
-			rand = get_random_int();
-
-			op.type = rand & 1
-				? BTREE_INSERT
-				: BTREE_REPLACE;
-			rand >>= 1;
-
-			SET_KEY_SIZE(k, bucket_remainder(c, rand));
-			rand >>= c->bucket_bits;
-			rand &= 1024 * 512 - 1;
-			rand += c->sb.bucket_size;
-			SET_KEY_OFFSET(k, rand);
-#if 0
-			SET_KEY_PTRS(k, 1);
-#endif
-			bch_keylist_push(&op.keys);
-			bch_btree_insert_keys(b, &op);
-
-			if (should_split(b) ||
-			    set_blocks(i, b->c) !=
-			    __set_blocks(i, i->keys + 15, b->c)) {
-				i->csum = csum_set(i);
-
-				memcpy(write_block(fill),
-				       i, set_bytes(i));
-
-				b->written += set_blocks(i, b->c);
-				fill->written = b->written;
-				if (b->written == btree_blocks(b))
-					break;
-
-				bch_btree_sort_lazy(b);
-				bch_bset_init_next(b);
-			}
-		}
-
-		memcpy(orig->sets[0].data,
-		       fill->sets[0].data,
-		       btree_bytes(c));
-
-		bch_btree_sort(b);
-		fill->written = 0;
-		bch_btree_read_done(&fill->io.cl);
-
-		if (b->sets[0].data->keys != fill->sets[0].data->keys ||
-		    memcmp(b->sets[0].data->start,
-			   fill->sets[0].data->start,
-			   b->sets[0].data->keys * sizeof(uint64_t))) {
-			struct bset *i = b->sets[0].data;
-			struct bkey *k, *l;
-
-			for (k = i->start,
-			     l = fill->sets[0].data->start;
-			     k < end(i);
-			     k = bkey_next(k), l = bkey_next(l))
-				if (bkey_cmp(k, l) ||
-				    KEY_SIZE(k) != KEY_SIZE(l))
-					pr_err("key %zi differs: %s != %s",
-					       (uint64_t *) k - i->d,
-					       pkey(k), pkey(l));
-
-			for (j = 0; j < 3; j++) {
-				pr_err("**** Set %i ****", j);
-				dump(all[j]);
-			}
-			panic("\n");
-		}
-
-		pr_info("fuzz complete: %i keys", b->sets[0].data->keys);
-	}
-}
-
-kobj_attribute_write(fuzz, btree_fuzz);
-#endif
-
 void bch_debug_exit(void)
 {
 	if (!IS_ERR_OR_NULL(debug))
@@ -554,11 +421,6 @@
 int __init bch_debug_init(struct kobject *kobj)
 {
 	int ret = 0;
-#if 0
-	ret = sysfs_create_file(kobj, &ksysfs_fuzz.attr);
-	if (ret)
-		return ret;
-#endif
 
 	debug = debugfs_create_dir("bcache", NULL);
 	return ret;
diff --git a/drivers/md/bcache/debug.h b/drivers/md/bcache/debug.h
index f9378a2..1c39b5a 100644
--- a/drivers/md/bcache/debug.h
+++ b/drivers/md/bcache/debug.h
@@ -3,15 +3,8 @@
 
 /* Btree/bkey debug printing */
 
-#define KEYHACK_SIZE 80
-struct keyprint_hack {
-	char s[KEYHACK_SIZE];
-};
-
-struct keyprint_hack bch_pkey(const struct bkey *k);
-struct keyprint_hack bch_pbtree(const struct btree *b);
-#define pkey(k)		(&bch_pkey(k).s[0])
-#define pbtree(b)	(&bch_pbtree(b).s[0])
+int bch_bkey_to_text(char *buf, size_t size, const struct bkey *k);
+int bch_btree_to_text(char *buf, size_t size, const struct btree *b);
 
 #ifdef CONFIG_BCACHE_EDEBUG
 
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index 48efd4d..9056632 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -9,6 +9,8 @@
 #include "bset.h"
 #include "debug.h"
 
+#include <linux/blkdev.h>
+
 static void bch_bi_idx_hack_endio(struct bio *bio, int error)
 {
 	struct bio *p = bio->bi_private;
@@ -66,13 +68,6 @@
  * The newly allocated bio will point to @bio's bi_io_vec, if the split was on a
  * bvec boundry; it is the caller's responsibility to ensure that @bio is not
  * freed before the split.
- *
- * If bch_bio_split() is running under generic_make_request(), it's not safe to
- * allocate more than one bio from the same bio set. Therefore, if it is running
- * under generic_make_request() it masks out __GFP_WAIT when doing the
- * allocation. The caller must check for failure if there's any possibility of
- * it being called from under generic_make_request(); it is then the caller's
- * responsibility to retry from a safe context (by e.g. punting to workqueue).
  */
 struct bio *bch_bio_split(struct bio *bio, int sectors,
 			  gfp_t gfp, struct bio_set *bs)
@@ -83,20 +78,13 @@
 
 	BUG_ON(sectors <= 0);
 
-	/*
-	 * If we're being called from underneath generic_make_request() and we
-	 * already allocated any bios from this bio set, we risk deadlock if we
-	 * use the mempool. So instead, we possibly fail and let the caller punt
-	 * to workqueue or somesuch and retry in a safe context.
-	 */
-	if (current->bio_list)
-		gfp &= ~__GFP_WAIT;
-
 	if (sectors >= bio_sectors(bio))
 		return bio;
 
 	if (bio->bi_rw & REQ_DISCARD) {
 		ret = bio_alloc_bioset(gfp, 1, bs);
+		if (!ret)
+			return NULL;
 		idx = 0;
 		goto out;
 	}
@@ -160,17 +148,18 @@
 	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 	unsigned max_segments = min_t(unsigned, BIO_MAX_PAGES,
 				      queue_max_segments(q));
-	struct bio_vec *bv, *end = bio_iovec(bio) +
-		min_t(int, bio_segments(bio), max_segments);
 
 	if (bio->bi_rw & REQ_DISCARD)
 		return min(ret, q->limits.max_discard_sectors);
 
 	if (bio_segments(bio) > max_segments ||
 	    q->merge_bvec_fn) {
+		struct bio_vec *bv;
+		int i, seg = 0;
+
 		ret = 0;
 
-		for (bv = bio_iovec(bio); bv < end; bv++) {
+		bio_for_each_segment(bv, bio, i) {
 			struct bvec_merge_data bvm = {
 				.bi_bdev	= bio->bi_bdev,
 				.bi_sector	= bio->bi_sector,
@@ -178,10 +167,14 @@
 				.bi_rw		= bio->bi_rw,
 			};
 
+			if (seg == max_segments)
+				break;
+
 			if (q->merge_bvec_fn &&
 			    q->merge_bvec_fn(q, &bvm, bv) < (int) bv->bv_len)
 				break;
 
+			seg++;
 			ret += bv->bv_len >> 9;
 		}
 	}
@@ -218,30 +211,10 @@
 	closure_put(cl);
 }
 
-static void __bch_bio_submit_split(struct closure *cl)
-{
-	struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl);
-	struct bio *bio = s->bio, *n;
-
-	do {
-		n = bch_bio_split(bio, bch_bio_max_sectors(bio),
-				  GFP_NOIO, s->p->bio_split);
-		if (!n)
-			continue_at(cl, __bch_bio_submit_split, system_wq);
-
-		n->bi_end_io	= bch_bio_submit_split_endio;
-		n->bi_private	= cl;
-
-		closure_get(cl);
-		bch_generic_make_request_hack(n);
-	} while (n != bio);
-
-	continue_at(cl, bch_bio_submit_split_done, NULL);
-}
-
 void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p)
 {
 	struct bio_split_hook *s;
+	struct bio *n;
 
 	if (!bio_has_data(bio) && !(bio->bi_rw & REQ_DISCARD))
 		goto submit;
@@ -250,6 +223,7 @@
 		goto submit;
 
 	s = mempool_alloc(p->bio_split_hook, GFP_NOIO);
+	closure_init(&s->cl, NULL);
 
 	s->bio		= bio;
 	s->p		= p;
@@ -257,8 +231,18 @@
 	s->bi_private	= bio->bi_private;
 	bio_get(bio);
 
-	closure_call(&s->cl, __bch_bio_submit_split, NULL, NULL);
-	return;
+	do {
+		n = bch_bio_split(bio, bch_bio_max_sectors(bio),
+				  GFP_NOIO, s->p->bio_split);
+
+		n->bi_end_io	= bch_bio_submit_split_endio;
+		n->bi_private	= &s->cl;
+
+		closure_get(&s->cl);
+		bch_generic_make_request_hack(n);
+	} while (n != bio);
+
+	continue_at(&s->cl, bch_bio_submit_split_done, NULL);
 submit:
 	bch_generic_make_request_hack(bio);
 }
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 8c8dfdc..ba95ab8 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -9,6 +9,8 @@
 #include "debug.h"
 #include "request.h"
 
+#include <trace/events/bcache.h>
+
 /*
  * Journal replay/recovery:
  *
@@ -182,9 +184,14 @@
 		pr_debug("starting binary search, l %u r %u", l, r);
 
 		while (l + 1 < r) {
-			m = (l + r) >> 1;
+			seq = list_entry(list->prev, struct journal_replay,
+					 list)->j.seq;
 
-			if (read_bucket(m))
+			m = (l + r) >> 1;
+			read_bucket(m);
+
+			if (seq != list_entry(list->prev, struct journal_replay,
+					      list)->j.seq)
 				l = m;
 			else
 				r = m;
@@ -300,7 +307,8 @@
 		for (k = i->j.start;
 		     k < end(&i->j);
 		     k = bkey_next(k)) {
-			pr_debug("%s", pkey(k));
+			trace_bcache_journal_replay_key(k);
+
 			bkey_copy(op->keys.top, k);
 			bch_keylist_push(&op->keys);
 
@@ -384,7 +392,7 @@
 		return;
 found:
 	if (btree_node_dirty(best))
-		bch_btree_write(best, true, NULL);
+		bch_btree_node_write(best, NULL);
 	rw_unlock(true, best);
 }
 
@@ -617,7 +625,7 @@
 		bio_reset(bio);
 		bio->bi_sector	= PTR_OFFSET(k, i);
 		bio->bi_bdev	= ca->bdev;
-		bio->bi_rw	= REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH;
+		bio->bi_rw	= REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH|REQ_FUA;
 		bio->bi_size	= sectors << 9;
 
 		bio->bi_end_io	= journal_write_endio;
@@ -712,7 +720,8 @@
 	spin_lock(&c->journal.lock);
 
 	if (journal_full(&c->journal)) {
-		/* XXX: tracepoint */
+		trace_bcache_journal_full(c);
+
 		closure_wait(&c->journal.wait, cl);
 
 		journal_reclaim(c);
@@ -728,13 +737,15 @@
 
 	if (b * c->sb.block_size > PAGE_SECTORS << JSET_BITS ||
 	    b > c->journal.blocks_free) {
-		/* XXX: If we were inserting so many keys that they won't fit in
+		trace_bcache_journal_entry_full(c);
+
+		/*
+		 * XXX: If we were inserting so many keys that they won't fit in
 		 * an _empty_ journal write, we'll deadlock. For now, handle
 		 * this in bch_keylist_realloc() - but something to think about.
 		 */
 		BUG_ON(!w->data->keys);
 
-		/* XXX: tracepoint */
 		BUG_ON(!closure_wait(&w->wait, cl));
 
 		closure_flush(&c->journal.io);
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index 8589512..1a3b4f4 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -9,6 +9,8 @@
 #include "debug.h"
 #include "request.h"
 
+#include <trace/events/bcache.h>
+
 struct moving_io {
 	struct keybuf_key	*w;
 	struct search		s;
@@ -44,14 +46,14 @@
 {
 	struct moving_io *io = container_of(cl, struct moving_io, s.cl);
 	struct bio *bio = &io->bio.bio;
-	struct bio_vec *bv = bio_iovec_idx(bio, bio->bi_vcnt);
+	struct bio_vec *bv;
+	int i;
 
-	while (bv-- != bio->bi_io_vec)
+	bio_for_each_segment_all(bv, bio, i)
 		__free_page(bv->bv_page);
 
-	pr_debug("%s %s", io->s.op.insert_collision
-		 ? "collision moving" : "moved",
-		 pkey(&io->w->key));
+	if (io->s.op.insert_collision)
+		trace_bcache_gc_copy_collision(&io->w->key);
 
 	bch_keybuf_del(&io->s.op.c->moving_gc_keys, io->w);
 
@@ -94,8 +96,6 @@
 	struct moving_io *io = container_of(s, struct moving_io, s);
 
 	if (!s->error) {
-		trace_bcache_write_moving(&io->bio.bio);
-
 		moving_init(io);
 
 		io->bio.bio.bi_sector	= KEY_START(&io->w->key);
@@ -122,7 +122,6 @@
 	struct moving_io *io = container_of(s, struct moving_io, s);
 	struct bio *bio = &io->bio.bio;
 
-	trace_bcache_read_moving(bio);
 	bch_submit_bbio(bio, s->op.c, &io->w->key, 0);
 
 	continue_at(cl, write_moving, bch_gc_wq);
@@ -138,7 +137,8 @@
 	/* XXX: if we error, background writeback could stall indefinitely */
 
 	while (!test_bit(CACHE_SET_STOPPING, &c->flags)) {
-		w = bch_keybuf_next_rescan(c, &c->moving_gc_keys, &MAX_KEY);
+		w = bch_keybuf_next_rescan(c, &c->moving_gc_keys,
+					   &MAX_KEY, moving_pred);
 		if (!w)
 			break;
 
@@ -159,10 +159,10 @@
 		bio->bi_rw	= READ;
 		bio->bi_end_io	= read_moving_endio;
 
-		if (bch_bio_alloc_pages(bio, GFP_KERNEL))
+		if (bio_alloc_pages(bio, GFP_KERNEL))
 			goto err;
 
-		pr_debug("%s", pkey(&w->key));
+		trace_bcache_gc_copy(&w->key);
 
 		closure_call(&io->s.cl, read_moving_submit, NULL, &c->gc.cl);
 
@@ -250,5 +250,5 @@
 
 void bch_moving_init_cache_set(struct cache_set *c)
 {
-	bch_keybuf_init(&c->moving_gc_keys, moving_pred);
+	bch_keybuf_init(&c->moving_gc_keys);
 }
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index e5ff12e5..786a1a4 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -10,6 +10,7 @@
 #include "btree.h"
 #include "debug.h"
 #include "request.h"
+#include "writeback.h"
 
 #include <linux/cgroup.h>
 #include <linux/module.h>
@@ -21,8 +22,6 @@
 
 #define CUTOFF_CACHE_ADD	95
 #define CUTOFF_CACHE_READA	90
-#define CUTOFF_WRITEBACK	50
-#define CUTOFF_WRITEBACK_SYNC	75
 
 struct kmem_cache *bch_search_cache;
 
@@ -489,6 +488,12 @@
 		bch_queue_gc(op->c);
 	}
 
+	/*
+	 * Journal writes are marked REQ_FLUSH; if the original write was a
+	 * flush, it'll wait on the journal write.
+	 */
+	bio->bi_rw &= ~(REQ_FLUSH|REQ_FUA);
+
 	do {
 		unsigned i;
 		struct bkey *k;
@@ -510,10 +515,6 @@
 			goto err;
 
 		n = bch_bio_split(bio, KEY_SIZE(k), GFP_NOIO, split);
-		if (!n) {
-			__bkey_put(op->c, k);
-			continue_at(cl, bch_insert_data_loop, bcache_wq);
-		}
 
 		n->bi_end_io	= bch_insert_data_endio;
 		n->bi_private	= cl;
@@ -530,10 +531,9 @@
 		if (KEY_CSUM(k))
 			bio_csum(n, k);
 
-		pr_debug("%s", pkey(k));
+		trace_bcache_cache_insert(k);
 		bch_keylist_push(&op->keys);
 
-		trace_bcache_cache_insert(n, n->bi_sector, n->bi_bdev);
 		n->bi_rw |= REQ_WRITE;
 		bch_submit_bbio(n, op->c, k, 0);
 	} while (n != bio);
@@ -716,7 +716,7 @@
 	s->task			= current;
 	s->orig_bio		= bio;
 	s->write		= (bio->bi_rw & REQ_WRITE) != 0;
-	s->op.flush_journal	= (bio->bi_rw & REQ_FLUSH) != 0;
+	s->op.flush_journal	= (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0;
 	s->op.skip		= (bio->bi_rw & REQ_DISCARD) != 0;
 	s->recoverable		= 1;
 	s->start_time		= jiffies;
@@ -784,11 +784,8 @@
 	int i;
 
 	if (s->recoverable) {
-		/* The cache read failed, but we can retry from the backing
-		 * device.
-		 */
-		pr_debug("recovering at sector %llu",
-			 (uint64_t) s->orig_bio->bi_sector);
+		/* Retry from the backing device: */
+		trace_bcache_read_retry(s->orig_bio);
 
 		s->error = 0;
 		bv = s->bio.bio.bi_io_vec;
@@ -806,7 +803,6 @@
 
 		/* XXX: invalidate cache */
 
-		trace_bcache_read_retry(&s->bio.bio);
 		closure_bio_submit(&s->bio.bio, &s->cl, s->d);
 	}
 
@@ -827,53 +823,13 @@
 	 */
 
 	if (s->op.cache_bio) {
-		struct bio_vec *src, *dst;
-		unsigned src_offset, dst_offset, bytes;
-		void *dst_ptr;
-
 		bio_reset(s->op.cache_bio);
 		s->op.cache_bio->bi_sector	= s->cache_miss->bi_sector;
 		s->op.cache_bio->bi_bdev	= s->cache_miss->bi_bdev;
 		s->op.cache_bio->bi_size	= s->cache_bio_sectors << 9;
 		bch_bio_map(s->op.cache_bio, NULL);
 
-		src = bio_iovec(s->op.cache_bio);
-		dst = bio_iovec(s->cache_miss);
-		src_offset = src->bv_offset;
-		dst_offset = dst->bv_offset;
-		dst_ptr = kmap(dst->bv_page);
-
-		while (1) {
-			if (dst_offset == dst->bv_offset + dst->bv_len) {
-				kunmap(dst->bv_page);
-				dst++;
-				if (dst == bio_iovec_idx(s->cache_miss,
-						s->cache_miss->bi_vcnt))
-					break;
-
-				dst_offset = dst->bv_offset;
-				dst_ptr = kmap(dst->bv_page);
-			}
-
-			if (src_offset == src->bv_offset + src->bv_len) {
-				src++;
-				if (src == bio_iovec_idx(s->op.cache_bio,
-						 s->op.cache_bio->bi_vcnt))
-					BUG();
-
-				src_offset = src->bv_offset;
-			}
-
-			bytes = min(dst->bv_offset + dst->bv_len - dst_offset,
-				    src->bv_offset + src->bv_len - src_offset);
-
-			memcpy(dst_ptr + dst_offset,
-			       page_address(src->bv_page) + src_offset,
-			       bytes);
-
-			src_offset	+= bytes;
-			dst_offset	+= bytes;
-		}
+		bio_copy_data(s->cache_miss, s->op.cache_bio);
 
 		bio_put(s->cache_miss);
 		s->cache_miss = NULL;
@@ -899,6 +855,7 @@
 	struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
 
 	bch_mark_cache_accounting(s, !s->cache_miss, s->op.skip);
+	trace_bcache_read(s->orig_bio, !s->cache_miss, s->op.skip);
 
 	if (s->error)
 		continue_at_nobarrier(cl, request_read_error, bcache_wq);
@@ -917,9 +874,6 @@
 	struct bio *miss;
 
 	miss = bch_bio_split(bio, sectors, GFP_NOIO, s->d->bio_split);
-	if (!miss)
-		return -EAGAIN;
-
 	if (miss == bio)
 		s->op.lookup_done = true;
 
@@ -938,8 +892,9 @@
 		reada = min(dc->readahead >> 9,
 			    sectors - bio_sectors(miss));
 
-		if (bio_end(miss) + reada > bdev_sectors(miss->bi_bdev))
-			reada = bdev_sectors(miss->bi_bdev) - bio_end(miss);
+		if (bio_end_sector(miss) + reada > bdev_sectors(miss->bi_bdev))
+			reada = bdev_sectors(miss->bi_bdev) -
+				bio_end_sector(miss);
 	}
 
 	s->cache_bio_sectors = bio_sectors(miss) + reada;
@@ -963,13 +918,12 @@
 		goto out_put;
 
 	bch_bio_map(s->op.cache_bio, NULL);
-	if (bch_bio_alloc_pages(s->op.cache_bio, __GFP_NOWARN|GFP_NOIO))
+	if (bio_alloc_pages(s->op.cache_bio, __GFP_NOWARN|GFP_NOIO))
 		goto out_put;
 
 	s->cache_miss = miss;
 	bio_get(s->op.cache_bio);
 
-	trace_bcache_cache_miss(s->orig_bio);
 	closure_bio_submit(s->op.cache_bio, &s->cl, s->d);
 
 	return ret;
@@ -1002,24 +956,13 @@
 	cached_dev_bio_complete(cl);
 }
 
-static bool should_writeback(struct cached_dev *dc, struct bio *bio)
-{
-	unsigned threshold = (bio->bi_rw & REQ_SYNC)
-		? CUTOFF_WRITEBACK_SYNC
-		: CUTOFF_WRITEBACK;
-
-	return !atomic_read(&dc->disk.detaching) &&
-		cache_mode(dc, bio) == CACHE_MODE_WRITEBACK &&
-		dc->disk.c->gc_stats.in_use < threshold;
-}
-
 static void request_write(struct cached_dev *dc, struct search *s)
 {
 	struct closure *cl = &s->cl;
 	struct bio *bio = &s->bio.bio;
 	struct bkey start, end;
 	start = KEY(dc->disk.id, bio->bi_sector, 0);
-	end = KEY(dc->disk.id, bio_end(bio), 0);
+	end = KEY(dc->disk.id, bio_end_sector(bio), 0);
 
 	bch_keybuf_check_overlapping(&s->op.c->moving_gc_keys, &start, &end);
 
@@ -1034,22 +977,37 @@
 	if (bio->bi_rw & REQ_DISCARD)
 		goto skip;
 
+	if (should_writeback(dc, s->orig_bio,
+			     cache_mode(dc, bio),
+			     s->op.skip)) {
+		s->op.skip = false;
+		s->writeback = true;
+	}
+
 	if (s->op.skip)
 		goto skip;
 
-	if (should_writeback(dc, s->orig_bio))
-		s->writeback = true;
+	trace_bcache_write(s->orig_bio, s->writeback, s->op.skip);
 
 	if (!s->writeback) {
 		s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO,
 						   dc->disk.bio_split);
 
-		trace_bcache_writethrough(s->orig_bio);
 		closure_bio_submit(bio, cl, s->d);
 	} else {
-		s->op.cache_bio = bio;
-		trace_bcache_writeback(s->orig_bio);
-		bch_writeback_add(dc, bio_sectors(bio));
+		bch_writeback_add(dc);
+
+		if (s->op.flush_journal) {
+			/* Also need to send a flush to the backing device */
+			s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO,
+							   dc->disk.bio_split);
+
+			bio->bi_size = 0;
+			bio->bi_vcnt = 0;
+			closure_bio_submit(bio, cl, s->d);
+		} else {
+			s->op.cache_bio = bio;
+		}
 	}
 out:
 	closure_call(&s->op.cl, bch_insert_data, NULL, cl);
@@ -1058,7 +1016,6 @@
 	s->op.skip = true;
 	s->op.cache_bio = s->orig_bio;
 	bio_get(s->op.cache_bio);
-	trace_bcache_write_skip(s->orig_bio);
 
 	if ((bio->bi_rw & REQ_DISCARD) &&
 	    !blk_queue_discard(bdev_get_queue(dc->bdev)))
@@ -1088,9 +1045,10 @@
 
 /* Cached devices - read & write stuff */
 
-int bch_get_congested(struct cache_set *c)
+unsigned bch_get_congested(struct cache_set *c)
 {
 	int i;
+	long rand;
 
 	if (!c->congested_read_threshold_us &&
 	    !c->congested_write_threshold_us)
@@ -1106,7 +1064,13 @@
 
 	i += CONGESTED_MAX;
 
-	return i <= 0 ? 1 : fract_exp_two(i, 6);
+	if (i > 0)
+		i = fract_exp_two(i, 6);
+
+	rand = get_random_int();
+	i -= bitmap_weight(&rand, BITS_PER_LONG);
+
+	return i > 0 ? i : 1;
 }
 
 static void add_sequential(struct task_struct *t)
@@ -1126,10 +1090,8 @@
 {
 	struct cache_set *c = s->op.c;
 	struct bio *bio = &s->bio.bio;
-
-	long rand;
-	int cutoff = bch_get_congested(c);
 	unsigned mode = cache_mode(dc, bio);
+	unsigned sectors, congested = bch_get_congested(c);
 
 	if (atomic_read(&dc->disk.detaching) ||
 	    c->gc_stats.in_use > CUTOFF_CACHE_ADD ||
@@ -1147,17 +1109,14 @@
 		goto skip;
 	}
 
-	if (!cutoff) {
-		cutoff = dc->sequential_cutoff >> 9;
+	if (!congested && !dc->sequential_cutoff)
+		goto rescale;
 
-		if (!cutoff)
-			goto rescale;
-
-		if (mode == CACHE_MODE_WRITEBACK &&
-		    (bio->bi_rw & REQ_WRITE) &&
-		    (bio->bi_rw & REQ_SYNC))
-			goto rescale;
-	}
+	if (!congested &&
+	    mode == CACHE_MODE_WRITEBACK &&
+	    (bio->bi_rw & REQ_WRITE) &&
+	    (bio->bi_rw & REQ_SYNC))
+		goto rescale;
 
 	if (dc->sequential_merge) {
 		struct io *i;
@@ -1177,7 +1136,7 @@
 		if (i->sequential + bio->bi_size > i->sequential)
 			i->sequential	+= bio->bi_size;
 
-		i->last			 = bio_end(bio);
+		i->last			 = bio_end_sector(bio);
 		i->jiffies		 = jiffies + msecs_to_jiffies(5000);
 		s->task->sequential_io	 = i->sequential;
 
@@ -1192,12 +1151,19 @@
 		add_sequential(s->task);
 	}
 
-	rand = get_random_int();
-	cutoff -= bitmap_weight(&rand, BITS_PER_LONG);
+	sectors = max(s->task->sequential_io,
+		      s->task->sequential_io_avg) >> 9;
 
-	if (cutoff <= (int) (max(s->task->sequential_io,
-				 s->task->sequential_io_avg) >> 9))
+	if (dc->sequential_cutoff &&
+	    sectors >= dc->sequential_cutoff >> 9) {
+		trace_bcache_bypass_sequential(s->orig_bio);
 		goto skip;
+	}
+
+	if (congested && sectors >= congested) {
+		trace_bcache_bypass_congested(s->orig_bio);
+		goto skip;
+	}
 
 rescale:
 	bch_rescale_priorities(c, bio_sectors(bio));
@@ -1288,30 +1254,25 @@
 static int flash_dev_cache_miss(struct btree *b, struct search *s,
 				struct bio *bio, unsigned sectors)
 {
+	struct bio_vec *bv;
+	int i;
+
 	/* Zero fill bio */
 
-	while (bio->bi_idx != bio->bi_vcnt) {
-		struct bio_vec *bv = bio_iovec(bio);
+	bio_for_each_segment(bv, bio, i) {
 		unsigned j = min(bv->bv_len >> 9, sectors);
 
 		void *p = kmap(bv->bv_page);
 		memset(p + bv->bv_offset, 0, j << 9);
 		kunmap(bv->bv_page);
 
-		bv->bv_len	-= j << 9;
-		bv->bv_offset	+= j << 9;
-
-		if (bv->bv_len)
-			return 0;
-
-		bio->bi_sector	+= j;
-		bio->bi_size	-= j << 9;
-
-		bio->bi_idx++;
-		sectors		-= j;
+		sectors	-= j;
 	}
 
-	s->op.lookup_done = true;
+	bio_advance(bio, min(sectors << 9, bio->bi_size));
+
+	if (!bio->bi_size)
+		s->op.lookup_done = true;
 
 	return 0;
 }
@@ -1338,8 +1299,8 @@
 		closure_call(&s->op.cl, btree_read_async, NULL, cl);
 	} else if (bio_has_data(bio) || s->op.skip) {
 		bch_keybuf_check_overlapping(&s->op.c->moving_gc_keys,
-					     &KEY(d->id, bio->bi_sector, 0),
-					     &KEY(d->id, bio_end(bio), 0));
+					&KEY(d->id, bio->bi_sector, 0),
+					&KEY(d->id, bio_end_sector(bio), 0));
 
 		s->writeback	= true;
 		s->op.cache_bio	= bio;
diff --git a/drivers/md/bcache/request.h b/drivers/md/bcache/request.h
index 254d9ab..57dc478 100644
--- a/drivers/md/bcache/request.h
+++ b/drivers/md/bcache/request.h
@@ -30,7 +30,7 @@
 };
 
 void bch_cache_read_endio(struct bio *, int);
-int bch_get_congested(struct cache_set *);
+unsigned bch_get_congested(struct cache_set *);
 void bch_insert_data(struct closure *cl);
 void bch_btree_insert_async(struct closure *);
 void bch_cache_read_endio(struct bio *, int);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index f88e2b6..547c4c5 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -10,10 +10,13 @@
 #include "btree.h"
 #include "debug.h"
 #include "request.h"
+#include "writeback.h"
 
+#include <linux/blkdev.h>
 #include <linux/buffer_head.h>
 #include <linux/debugfs.h>
 #include <linux/genhd.h>
+#include <linux/kthread.h>
 #include <linux/module.h>
 #include <linux/random.h>
 #include <linux/reboot.h>
@@ -342,6 +345,7 @@
 	struct closure *cl = &c->uuid_write.cl;
 	struct uuid_entry *u;
 	unsigned i;
+	char buf[80];
 
 	BUG_ON(!parent);
 	closure_lock(&c->uuid_write, parent);
@@ -362,8 +366,8 @@
 			break;
 	}
 
-	pr_debug("%s UUIDs at %s", rw & REQ_WRITE ? "wrote" : "read",
-		 pkey(&c->uuid_bucket));
+	bch_bkey_to_text(buf, sizeof(buf), k);
+	pr_debug("%s UUIDs at %s", rw & REQ_WRITE ? "wrote" : "read", buf);
 
 	for (u = c->uuids; u < c->uuids + c->nr_uuids; u++)
 		if (!bch_is_zero(u->uuid, 16))
@@ -543,7 +547,6 @@
 
 	pr_debug("free %zu, free_inc %zu, unused %zu", fifo_used(&ca->free),
 		 fifo_used(&ca->free_inc), fifo_used(&ca->unused));
-	blktrace_msg(ca, "Starting priorities: " buckets_free(ca));
 
 	for (i = prio_buckets(ca) - 1; i >= 0; --i) {
 		long bucket;
@@ -704,7 +707,8 @@
 		atomic_set(&d->detaching, 0);
 	}
 
-	bcache_device_unlink(d);
+	if (!d->flush_done)
+		bcache_device_unlink(d);
 
 	d->c->devices[d->id] = NULL;
 	closure_put(&d->c->caching);
@@ -743,13 +747,35 @@
 		mempool_destroy(d->unaligned_bvec);
 	if (d->bio_split)
 		bioset_free(d->bio_split);
+	if (is_vmalloc_addr(d->stripe_sectors_dirty))
+		vfree(d->stripe_sectors_dirty);
+	else
+		kfree(d->stripe_sectors_dirty);
 
 	closure_debug_destroy(&d->cl);
 }
 
-static int bcache_device_init(struct bcache_device *d, unsigned block_size)
+static int bcache_device_init(struct bcache_device *d, unsigned block_size,
+			      sector_t sectors)
 {
 	struct request_queue *q;
+	size_t n;
+
+	if (!d->stripe_size_bits)
+		d->stripe_size_bits = 31;
+
+	d->nr_stripes = round_up(sectors, 1 << d->stripe_size_bits) >>
+		d->stripe_size_bits;
+
+	if (!d->nr_stripes || d->nr_stripes > SIZE_MAX / sizeof(atomic_t))
+		return -ENOMEM;
+
+	n = d->nr_stripes * sizeof(atomic_t);
+	d->stripe_sectors_dirty = n < PAGE_SIZE << 6
+		? kzalloc(n, GFP_KERNEL)
+		: vzalloc(n);
+	if (!d->stripe_sectors_dirty)
+		return -ENOMEM;
 
 	if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
 	    !(d->unaligned_bvec = mempool_create_kmalloc_pool(1,
@@ -759,6 +785,7 @@
 	    !(q = blk_alloc_queue(GFP_KERNEL)))
 		return -ENOMEM;
 
+	set_capacity(d->disk, sectors);
 	snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", bcache_minor);
 
 	d->disk->major		= bcache_major;
@@ -781,6 +808,8 @@
 	set_bit(QUEUE_FLAG_NONROT,	&d->disk->queue->queue_flags);
 	set_bit(QUEUE_FLAG_DISCARD,	&d->disk->queue->queue_flags);
 
+	blk_queue_flush(q, REQ_FLUSH|REQ_FUA);
+
 	return 0;
 }
 
@@ -800,6 +829,17 @@
 void bch_cached_dev_run(struct cached_dev *dc)
 {
 	struct bcache_device *d = &dc->disk;
+	char buf[SB_LABEL_SIZE + 1];
+	char *env[] = {
+		"DRIVER=bcache",
+		kasprintf(GFP_KERNEL, "CACHED_UUID=%pU", dc->sb.uuid),
+		NULL,
+		NULL,
+	};
+
+	memcpy(buf, dc->sb.label, SB_LABEL_SIZE);
+	buf[SB_LABEL_SIZE] = '\0';
+	env[2] = kasprintf(GFP_KERNEL, "CACHED_LABEL=%s", buf);
 
 	if (atomic_xchg(&dc->running, 1))
 		return;
@@ -816,10 +856,12 @@
 
 	add_disk(d->disk);
 	bd_link_disk_holder(dc->bdev, dc->disk.disk);
-#if 0
-	char *env[] = { "SYMLINK=label" , NULL };
+	/* won't show up in the uevent file, use udevadm monitor -e instead
+	 * only class / kset properties are persistent */
 	kobject_uevent_env(&disk_to_dev(d->disk)->kobj, KOBJ_CHANGE, env);
-#endif
+	kfree(env[1]);
+	kfree(env[2]);
+
 	if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") ||
 	    sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache"))
 		pr_debug("error creating sysfs link");
@@ -960,6 +1002,7 @@
 	atomic_set(&dc->count, 1);
 
 	if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
+		bch_sectors_dirty_init(dc);
 		atomic_set(&dc->has_dirty, 1);
 		atomic_inc(&dc->count);
 		bch_writeback_queue(dc);
@@ -1014,6 +1057,14 @@
 	struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
 	struct bcache_device *d = &dc->disk;
 
+	mutex_lock(&bch_register_lock);
+	d->flush_done = 1;
+
+	if (d->c)
+		bcache_device_unlink(d);
+
+	mutex_unlock(&bch_register_lock);
+
 	bch_cache_accounting_destroy(&dc->accounting);
 	kobject_del(&d->kobj);
 
@@ -1045,7 +1096,8 @@
 		hlist_add_head(&io->hash, dc->io_hash + RECENT_IO);
 	}
 
-	ret = bcache_device_init(&dc->disk, block_size);
+	ret = bcache_device_init(&dc->disk, block_size,
+			 dc->bdev->bd_part->nr_sects - dc->sb.data_offset);
 	if (ret)
 		return ret;
 
@@ -1144,11 +1196,10 @@
 
 	kobject_init(&d->kobj, &bch_flash_dev_ktype);
 
-	if (bcache_device_init(d, block_bytes(c)))
+	if (bcache_device_init(d, block_bytes(c), u->sectors))
 		goto err;
 
 	bcache_device_attach(d, c, u - c->uuids);
-	set_capacity(d->disk, u->sectors);
 	bch_flash_dev_request_init(d);
 	add_disk(d->disk);
 
@@ -1255,9 +1306,10 @@
 	free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c)));
 	free_pages((unsigned long) c->sort, ilog2(bucket_pages(c)));
 
-	kfree(c->fill_iter);
 	if (c->bio_split)
 		bioset_free(c->bio_split);
+	if (c->fill_iter)
+		mempool_destroy(c->fill_iter);
 	if (c->bio_meta)
 		mempool_destroy(c->bio_meta);
 	if (c->search)
@@ -1278,11 +1330,9 @@
 static void cache_set_flush(struct closure *cl)
 {
 	struct cache_set *c = container_of(cl, struct cache_set, caching);
+	struct cache *ca;
 	struct btree *b;
-
-	/* Shut down allocator threads */
-	set_bit(CACHE_SET_STOPPING_2, &c->flags);
-	wake_up(&c->alloc_wait);
+	unsigned i;
 
 	bch_cache_accounting_destroy(&c->accounting);
 
@@ -1295,7 +1345,11 @@
 	/* Should skip this if we're unregistering because of an error */
 	list_for_each_entry(b, &c->btree_cache, list)
 		if (btree_node_dirty(b))
-			bch_btree_write(b, true, NULL);
+			bch_btree_node_write(b, NULL);
+
+	for_each_cache(ca, c, i)
+		if (ca->alloc_thread)
+			kthread_stop(ca->alloc_thread);
 
 	closure_return(cl);
 }
@@ -1303,18 +1357,22 @@
 static void __cache_set_unregister(struct closure *cl)
 {
 	struct cache_set *c = container_of(cl, struct cache_set, caching);
-	struct cached_dev *dc, *t;
+	struct cached_dev *dc;
 	size_t i;
 
 	mutex_lock(&bch_register_lock);
 
-	if (test_bit(CACHE_SET_UNREGISTERING, &c->flags))
-		list_for_each_entry_safe(dc, t, &c->cached_devs, list)
-			bch_cached_dev_detach(dc);
-
 	for (i = 0; i < c->nr_uuids; i++)
-		if (c->devices[i] && UUID_FLASH_ONLY(&c->uuids[i]))
-			bcache_device_stop(c->devices[i]);
+		if (c->devices[i]) {
+			if (!UUID_FLASH_ONLY(&c->uuids[i]) &&
+			    test_bit(CACHE_SET_UNREGISTERING, &c->flags)) {
+				dc = container_of(c->devices[i],
+						  struct cached_dev, disk);
+				bch_cached_dev_detach(dc);
+			} else {
+				bcache_device_stop(c->devices[i]);
+			}
+		}
 
 	mutex_unlock(&bch_register_lock);
 
@@ -1373,9 +1431,9 @@
 		c->btree_pages = max_t(int, c->btree_pages / 4,
 				       BTREE_MAX_PAGES);
 
-	init_waitqueue_head(&c->alloc_wait);
+	c->sort_crit_factor = int_sqrt(c->btree_pages);
+
 	mutex_init(&c->bucket_lock);
-	mutex_init(&c->fill_lock);
 	mutex_init(&c->sort_lock);
 	spin_lock_init(&c->sort_time_lock);
 	closure_init_unlocked(&c->sb_write);
@@ -1401,8 +1459,8 @@
 	    !(c->bio_meta = mempool_create_kmalloc_pool(2,
 				sizeof(struct bbio) + sizeof(struct bio_vec) *
 				bucket_pages(c))) ||
+	    !(c->fill_iter = mempool_create_kmalloc_pool(1, iter_size)) ||
 	    !(c->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
-	    !(c->fill_iter = kmalloc(iter_size, GFP_KERNEL)) ||
 	    !(c->sort = alloc_bucket_pages(GFP_KERNEL, c)) ||
 	    !(c->uuids = alloc_bucket_pages(GFP_KERNEL, c)) ||
 	    bch_journal_alloc(c) ||
@@ -1410,8 +1468,6 @@
 	    bch_open_buckets_alloc(c))
 		goto err;
 
-	c->fill_iter->size = sb->bucket_size / sb->block_size;
-
 	c->congested_read_threshold_us	= 2000;
 	c->congested_write_threshold_us	= 20000;
 	c->error_limit	= 8 << IO_ERROR_SHIFT;
@@ -1496,9 +1552,10 @@
 		 */
 		bch_journal_next(&c->journal);
 
+		err = "error starting allocator thread";
 		for_each_cache(ca, c, i)
-			closure_call(&ca->alloc, bch_allocator_thread,
-				     system_wq, &c->cl);
+			if (bch_cache_allocator_start(ca))
+				goto err;
 
 		/*
 		 * First place it's safe to allocate: btree_check() and
@@ -1531,17 +1588,16 @@
 
 		bch_btree_gc_finish(c);
 
+		err = "error starting allocator thread";
 		for_each_cache(ca, c, i)
-			closure_call(&ca->alloc, bch_allocator_thread,
-				     ca->alloc_workqueue, &c->cl);
+			if (bch_cache_allocator_start(ca))
+				goto err;
 
 		mutex_lock(&c->bucket_lock);
 		for_each_cache(ca, c, i)
 			bch_prio_write(ca);
 		mutex_unlock(&c->bucket_lock);
 
-		wake_up(&c->alloc_wait);
-
 		err = "cannot allocate new UUID bucket";
 		if (__uuid_write(c))
 			goto err_unlock_gc;
@@ -1552,7 +1608,7 @@
 			goto err_unlock_gc;
 
 		bkey_copy_key(&c->root->key, &MAX_KEY);
-		bch_btree_write(c->root, true, &op);
+		bch_btree_node_write(c->root, &op.cl);
 
 		bch_btree_set_root(c->root);
 		rw_unlock(true, c->root);
@@ -1673,9 +1729,6 @@
 
 	bio_split_pool_free(&ca->bio_split_hook);
 
-	if (ca->alloc_workqueue)
-		destroy_workqueue(ca->alloc_workqueue);
-
 	free_pages((unsigned long) ca->disk_buckets, ilog2(bucket_pages(ca)));
 	kfree(ca->prio_buckets);
 	vfree(ca->buckets);
@@ -1723,7 +1776,6 @@
 	    !(ca->prio_buckets	= kzalloc(sizeof(uint64_t) * prio_buckets(ca) *
 					  2, GFP_KERNEL)) ||
 	    !(ca->disk_buckets	= alloc_bucket_pages(GFP_KERNEL, ca)) ||
-	    !(ca->alloc_workqueue = alloc_workqueue("bch_allocator", 0, 1)) ||
 	    bio_split_pool_init(&ca->bio_split_hook))
 		return -ENOMEM;
 
@@ -1786,6 +1838,36 @@
 kobj_attribute_write(register,		register_bcache);
 kobj_attribute_write(register_quiet,	register_bcache);
 
+static bool bch_is_open_backing(struct block_device *bdev) {
+	struct cache_set *c, *tc;
+	struct cached_dev *dc, *t;
+
+	list_for_each_entry_safe(c, tc, &bch_cache_sets, list)
+		list_for_each_entry_safe(dc, t, &c->cached_devs, list)
+			if (dc->bdev == bdev)
+				return true;
+	list_for_each_entry_safe(dc, t, &uncached_devices, list)
+		if (dc->bdev == bdev)
+			return true;
+	return false;
+}
+
+static bool bch_is_open_cache(struct block_device *bdev) {
+	struct cache_set *c, *tc;
+	struct cache *ca;
+	unsigned i;
+
+	list_for_each_entry_safe(c, tc, &bch_cache_sets, list)
+		for_each_cache(ca, c, i)
+			if (ca->bdev == bdev)
+				return true;
+	return false;
+}
+
+static bool bch_is_open(struct block_device *bdev) {
+	return bch_is_open_cache(bdev) || bch_is_open_backing(bdev);
+}
+
 static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
 			       const char *buffer, size_t size)
 {
@@ -1810,8 +1892,13 @@
 				  FMODE_READ|FMODE_WRITE|FMODE_EXCL,
 				  sb);
 	if (IS_ERR(bdev)) {
-		if (bdev == ERR_PTR(-EBUSY))
-			err = "device busy";
+		if (bdev == ERR_PTR(-EBUSY)) {
+			bdev = lookup_bdev(strim(path));
+			if (!IS_ERR(bdev) && bch_is_open(bdev))
+				err = "device already registered";
+			else
+				err = "device busy";
+		}
 		goto err;
 	}
 
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 4d9cca4..12a2c28 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -9,7 +9,9 @@
 #include "sysfs.h"
 #include "btree.h"
 #include "request.h"
+#include "writeback.h"
 
+#include <linux/blkdev.h>
 #include <linux/sort.h>
 
 static const char * const cache_replacement_policies[] = {
@@ -79,6 +81,9 @@
 rw_attribute(writeback_rate_d_smooth);
 read_attribute(writeback_rate_debug);
 
+read_attribute(stripe_size);
+read_attribute(partial_stripes_expensive);
+
 rw_attribute(synchronous);
 rw_attribute(journal_delay_ms);
 rw_attribute(discard);
@@ -127,7 +132,7 @@
 		char derivative[20];
 		char target[20];
 		bch_hprint(dirty,
-		       atomic_long_read(&dc->disk.sectors_dirty) << 9);
+			   bcache_dev_sectors_dirty(&dc->disk) << 9);
 		bch_hprint(derivative,	dc->writeback_rate_derivative << 9);
 		bch_hprint(target,	dc->writeback_rate_target << 9);
 
@@ -143,7 +148,10 @@
 	}
 
 	sysfs_hprint(dirty_data,
-		     atomic_long_read(&dc->disk.sectors_dirty) << 9);
+		     bcache_dev_sectors_dirty(&dc->disk) << 9);
+
+	sysfs_hprint(stripe_size,	(1 << dc->disk.stripe_size_bits) << 9);
+	var_printf(partial_stripes_expensive,	"%u");
 
 	var_printf(sequential_merge,	"%i");
 	var_hprint(sequential_cutoff);
@@ -170,6 +178,7 @@
 					     disk.kobj);
 	unsigned v = size;
 	struct cache_set *c;
+	struct kobj_uevent_env *env;
 
 #define d_strtoul(var)		sysfs_strtoul(var, dc->var)
 #define d_strtoi_h(var)		sysfs_hatoi(var, dc->var)
@@ -214,6 +223,7 @@
 	}
 
 	if (attr == &sysfs_label) {
+		/* note: endlines are preserved */
 		memcpy(dc->sb.label, buf, SB_LABEL_SIZE);
 		bch_write_bdev_super(dc, NULL);
 		if (dc->disk.c) {
@@ -221,6 +231,15 @@
 			       buf, SB_LABEL_SIZE);
 			bch_uuid_write(dc->disk.c);
 		}
+		env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL);
+		if (!env)
+			return -ENOMEM;
+		add_uevent_var(env, "DRIVER=bcache");
+		add_uevent_var(env, "CACHED_UUID=%pU", dc->sb.uuid),
+		add_uevent_var(env, "CACHED_LABEL=%s", buf);
+		kobject_uevent_env(
+			&disk_to_dev(dc->disk.disk)->kobj, KOBJ_CHANGE, env->envp);
+		kfree(env);
 	}
 
 	if (attr == &sysfs_attach) {
@@ -284,6 +303,8 @@
 	&sysfs_writeback_rate_d_smooth,
 	&sysfs_writeback_rate_debug,
 	&sysfs_dirty_data,
+	&sysfs_stripe_size,
+	&sysfs_partial_stripes_expensive,
 	&sysfs_sequential_cutoff,
 	&sysfs_sequential_merge,
 	&sysfs_clear_stats,
@@ -665,12 +686,10 @@
 		int cmp(const void *l, const void *r)
 		{	return *((uint16_t *) r) - *((uint16_t *) l); }
 
-		/* Number of quantiles we compute */
-		const unsigned nq = 31;
-
 		size_t n = ca->sb.nbuckets, i, unused, btree;
 		uint64_t sum = 0;
-		uint16_t q[nq], *p, *cached;
+		/* Compute 31 quantiles */
+		uint16_t q[31], *p, *cached;
 		ssize_t ret;
 
 		cached = p = vmalloc(ca->sb.nbuckets * sizeof(uint16_t));
@@ -703,26 +722,29 @@
 		if (n)
 			do_div(sum, n);
 
-		for (i = 0; i < nq; i++)
-			q[i] = INITIAL_PRIO - cached[n * (i + 1) / (nq + 1)];
+		for (i = 0; i < ARRAY_SIZE(q); i++)
+			q[i] = INITIAL_PRIO - cached[n * (i + 1) /
+				(ARRAY_SIZE(q) + 1)];
 
 		vfree(p);
 
-		ret = snprintf(buf, PAGE_SIZE,
-			       "Unused:		%zu%%\n"
-			       "Metadata:	%zu%%\n"
-			       "Average:	%llu\n"
-			       "Sectors per Q:	%zu\n"
-			       "Quantiles:	[",
-			       unused * 100 / (size_t) ca->sb.nbuckets,
-			       btree * 100 / (size_t) ca->sb.nbuckets, sum,
-			       n * ca->sb.bucket_size / (nq + 1));
+		ret = scnprintf(buf, PAGE_SIZE,
+				"Unused:		%zu%%\n"
+				"Metadata:	%zu%%\n"
+				"Average:	%llu\n"
+				"Sectors per Q:	%zu\n"
+				"Quantiles:	[",
+				unused * 100 / (size_t) ca->sb.nbuckets,
+				btree * 100 / (size_t) ca->sb.nbuckets, sum,
+				n * ca->sb.bucket_size / (ARRAY_SIZE(q) + 1));
 
-		for (i = 0; i < nq && ret < (ssize_t) PAGE_SIZE; i++)
-			ret += snprintf(buf + ret, PAGE_SIZE - ret,
-					i < nq - 1 ? "%u " : "%u]\n", q[i]);
+		for (i = 0; i < ARRAY_SIZE(q); i++)
+			ret += scnprintf(buf + ret, PAGE_SIZE - ret,
+					 "%u ", q[i]);
+		ret--;
 
-		buf[PAGE_SIZE - 1] = '\0';
+		ret += scnprintf(buf + ret, PAGE_SIZE - ret, "]\n");
+
 		return ret;
 	}
 
diff --git a/drivers/md/bcache/trace.c b/drivers/md/bcache/trace.c
index 983f9bb..f7b6c19 100644
--- a/drivers/md/bcache/trace.c
+++ b/drivers/md/bcache/trace.c
@@ -2,6 +2,7 @@
 #include "btree.h"
 #include "request.h"
 
+#include <linux/blktrace_api.h>
 #include <linux/module.h>
 
 #define CREATE_TRACE_POINTS
@@ -9,18 +10,44 @@
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_start);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_request_end);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_passthrough);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_hit);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_miss);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_bypass_sequential);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_bypass_congested);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read_retry);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writethrough);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write_skip);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_insert);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_replay_key);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_write);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_full);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_entry_full);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_cache_cannibalize);
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_read);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_write);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_write_dirty);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_read_dirty);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_journal_write);
-EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_cache_insert);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_alloc);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_alloc_fail);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_free);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_gc_coalesce);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_start);
 EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_end);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_gc_copy_collision);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_insert_key);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_split);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_node_compact);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_btree_set_root);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_invalidate);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_alloc_fail);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback);
+EXPORT_TRACEPOINT_SYMBOL_GPL(bcache_writeback_collision);
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index da3a99e..98eb811 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -228,23 +228,6 @@
 	}
 }
 
-int bch_bio_alloc_pages(struct bio *bio, gfp_t gfp)
-{
-	int i;
-	struct bio_vec *bv;
-
-	bio_for_each_segment(bv, bio, i) {
-		bv->bv_page = alloc_page(gfp);
-		if (!bv->bv_page) {
-			while (bv-- != bio->bi_io_vec + bio->bi_idx)
-				__free_page(bv->bv_page);
-			return -ENOMEM;
-		}
-	}
-
-	return 0;
-}
-
 /*
  * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any
  * use permitted, subject to terms of PostgreSQL license; see.)
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index 577393e..1ae2a73 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -15,8 +15,6 @@
 
 struct closure;
 
-#include <trace/events/bcache.h>
-
 #ifdef CONFIG_BCACHE_EDEBUG
 
 #define atomic_dec_bug(v)	BUG_ON(atomic_dec_return(v) < 0)
@@ -566,12 +564,8 @@
 	return x;
 }
 
-#define bio_end(bio)	((bio)->bi_sector + bio_sectors(bio))
-
 void bch_bio_map(struct bio *bio, void *base);
 
-int bch_bio_alloc_pages(struct bio *bio, gfp_t gfp);
-
 static inline sector_t bdev_sectors(struct block_device *bdev)
 {
 	return bdev->bd_inode->i_size >> 9;
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 2714ed3..22cbff5 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -9,6 +9,9 @@
 #include "bcache.h"
 #include "btree.h"
 #include "debug.h"
+#include "writeback.h"
+
+#include <trace/events/bcache.h>
 
 static struct workqueue_struct *dirty_wq;
 
@@ -36,7 +39,7 @@
 
 	int change = 0;
 	int64_t error;
-	int64_t dirty = atomic_long_read(&dc->disk.sectors_dirty);
+	int64_t dirty = bcache_dev_sectors_dirty(&dc->disk);
 	int64_t derivative = dirty - dc->disk.sectors_dirty_last;
 
 	dc->disk.sectors_dirty_last = dirty;
@@ -105,6 +108,31 @@
 	return KEY_DIRTY(k);
 }
 
+static bool dirty_full_stripe_pred(struct keybuf *buf, struct bkey *k)
+{
+	uint64_t stripe;
+	unsigned nr_sectors = KEY_SIZE(k);
+	struct cached_dev *dc = container_of(buf, struct cached_dev,
+					     writeback_keys);
+	unsigned stripe_size = 1 << dc->disk.stripe_size_bits;
+
+	if (!KEY_DIRTY(k))
+		return false;
+
+	stripe = KEY_START(k) >> dc->disk.stripe_size_bits;
+	while (1) {
+		if (atomic_read(dc->disk.stripe_sectors_dirty + stripe) !=
+		    stripe_size)
+			return false;
+
+		if (nr_sectors <= stripe_size)
+			return true;
+
+		nr_sectors -= stripe_size;
+		stripe++;
+	}
+}
+
 static void dirty_init(struct keybuf_key *w)
 {
 	struct dirty_io *io = w->private;
@@ -149,7 +177,22 @@
 		searched_from_start = true;
 	}
 
-	bch_refill_keybuf(dc->disk.c, buf, &end);
+	if (dc->partial_stripes_expensive) {
+		uint64_t i;
+
+		for (i = 0; i < dc->disk.nr_stripes; i++)
+			if (atomic_read(dc->disk.stripe_sectors_dirty + i) ==
+			    1 << dc->disk.stripe_size_bits)
+				goto full_stripes;
+
+		goto normal_refill;
+full_stripes:
+		bch_refill_keybuf(dc->disk.c, buf, &end,
+				  dirty_full_stripe_pred);
+	} else {
+normal_refill:
+		bch_refill_keybuf(dc->disk.c, buf, &end, dirty_pred);
+	}
 
 	if (bkey_cmp(&buf->last_scanned, &end) >= 0 && searched_from_start) {
 		/* Searched the entire btree  - delay awhile */
@@ -181,10 +224,8 @@
 	}
 }
 
-void bch_writeback_add(struct cached_dev *dc, unsigned sectors)
+void bch_writeback_add(struct cached_dev *dc)
 {
-	atomic_long_add(sectors, &dc->disk.sectors_dirty);
-
 	if (!atomic_read(&dc->has_dirty) &&
 	    !atomic_xchg(&dc->has_dirty, 1)) {
 		atomic_inc(&dc->count);
@@ -203,6 +244,34 @@
 	}
 }
 
+void bcache_dev_sectors_dirty_add(struct cache_set *c, unsigned inode,
+				  uint64_t offset, int nr_sectors)
+{
+	struct bcache_device *d = c->devices[inode];
+	unsigned stripe_size, stripe_offset;
+	uint64_t stripe;
+
+	if (!d)
+		return;
+
+	stripe_size = 1 << d->stripe_size_bits;
+	stripe = offset >> d->stripe_size_bits;
+	stripe_offset = offset & (stripe_size - 1);
+
+	while (nr_sectors) {
+		int s = min_t(unsigned, abs(nr_sectors),
+			      stripe_size - stripe_offset);
+
+		if (nr_sectors < 0)
+			s = -s;
+
+		atomic_add(s, d->stripe_sectors_dirty + stripe);
+		nr_sectors -= s;
+		stripe_offset = 0;
+		stripe++;
+	}
+}
+
 /* Background writeback - IO loop */
 
 static void dirty_io_destructor(struct closure *cl)
@@ -216,9 +285,10 @@
 	struct dirty_io *io = container_of(cl, struct dirty_io, cl);
 	struct keybuf_key *w = io->bio.bi_private;
 	struct cached_dev *dc = io->dc;
-	struct bio_vec *bv = bio_iovec_idx(&io->bio, io->bio.bi_vcnt);
+	struct bio_vec *bv;
+	int i;
 
-	while (bv-- != io->bio.bi_io_vec)
+	bio_for_each_segment_all(bv, &io->bio, i)
 		__free_page(bv->bv_page);
 
 	/* This is kind of a dumb way of signalling errors. */
@@ -236,10 +306,12 @@
 		for (i = 0; i < KEY_PTRS(&w->key); i++)
 			atomic_inc(&PTR_BUCKET(dc->disk.c, &w->key, i)->pin);
 
-		pr_debug("clearing %s", pkey(&w->key));
 		bch_btree_insert(&op, dc->disk.c);
 		closure_sync(&op.cl);
 
+		if (op.insert_collision)
+			trace_bcache_writeback_collision(&w->key);
+
 		atomic_long_inc(op.insert_collision
 				? &dc->disk.c->writeback_keys_failed
 				: &dc->disk.c->writeback_keys_done);
@@ -275,7 +347,6 @@
 	io->bio.bi_bdev		= io->dc->bdev;
 	io->bio.bi_end_io	= dirty_endio;
 
-	trace_bcache_write_dirty(&io->bio);
 	closure_bio_submit(&io->bio, cl, &io->dc->disk);
 
 	continue_at(cl, write_dirty_finish, dirty_wq);
@@ -296,7 +367,6 @@
 {
 	struct dirty_io *io = container_of(cl, struct dirty_io, cl);
 
-	trace_bcache_read_dirty(&io->bio);
 	closure_bio_submit(&io->bio, cl, &io->dc->disk);
 
 	continue_at(cl, write_dirty, dirty_wq);
@@ -349,10 +419,10 @@
 		io->bio.bi_rw		= READ;
 		io->bio.bi_end_io	= read_dirty_endio;
 
-		if (bch_bio_alloc_pages(&io->bio, GFP_KERNEL))
+		if (bio_alloc_pages(&io->bio, GFP_KERNEL))
 			goto err_free;
 
-		pr_debug("%s", pkey(&w->key));
+		trace_bcache_writeback(&w->key);
 
 		closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl);
 
@@ -375,12 +445,49 @@
 	refill_dirty(cl);
 }
 
+/* Init */
+
+static int bch_btree_sectors_dirty_init(struct btree *b, struct btree_op *op,
+					struct cached_dev *dc)
+{
+	struct bkey *k;
+	struct btree_iter iter;
+
+	bch_btree_iter_init(b, &iter, &KEY(dc->disk.id, 0, 0));
+	while ((k = bch_btree_iter_next_filter(&iter, b, bch_ptr_bad)))
+		if (!b->level) {
+			if (KEY_INODE(k) > dc->disk.id)
+				break;
+
+			if (KEY_DIRTY(k))
+				bcache_dev_sectors_dirty_add(b->c, dc->disk.id,
+							     KEY_START(k),
+							     KEY_SIZE(k));
+		} else {
+			btree(sectors_dirty_init, k, b, op, dc);
+			if (KEY_INODE(k) > dc->disk.id)
+				break;
+
+			cond_resched();
+		}
+
+	return 0;
+}
+
+void bch_sectors_dirty_init(struct cached_dev *dc)
+{
+	struct btree_op op;
+
+	bch_btree_op_init_stack(&op);
+	btree_root(sectors_dirty_init, dc->disk.c, &op, dc);
+}
+
 void bch_cached_dev_writeback_init(struct cached_dev *dc)
 {
 	closure_init_unlocked(&dc->writeback);
 	init_rwsem(&dc->writeback_lock);
 
-	bch_keybuf_init(&dc->writeback_keys, dirty_pred);
+	bch_keybuf_init(&dc->writeback_keys);
 
 	dc->writeback_metadata		= true;
 	dc->writeback_running		= true;
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
new file mode 100644
index 0000000..c91f61b
--- /dev/null
+++ b/drivers/md/bcache/writeback.h
@@ -0,0 +1,64 @@
+#ifndef _BCACHE_WRITEBACK_H
+#define _BCACHE_WRITEBACK_H
+
+#define CUTOFF_WRITEBACK	40
+#define CUTOFF_WRITEBACK_SYNC	70
+
+static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
+{
+	uint64_t i, ret = 0;
+
+	for (i = 0; i < d->nr_stripes; i++)
+		ret += atomic_read(d->stripe_sectors_dirty + i);
+
+	return ret;
+}
+
+static inline bool bcache_dev_stripe_dirty(struct bcache_device *d,
+					   uint64_t offset,
+					   unsigned nr_sectors)
+{
+	uint64_t stripe = offset >> d->stripe_size_bits;
+
+	while (1) {
+		if (atomic_read(d->stripe_sectors_dirty + stripe))
+			return true;
+
+		if (nr_sectors <= 1 << d->stripe_size_bits)
+			return false;
+
+		nr_sectors -= 1 << d->stripe_size_bits;
+		stripe++;
+	}
+}
+
+static inline bool should_writeback(struct cached_dev *dc, struct bio *bio,
+				    unsigned cache_mode, bool would_skip)
+{
+	unsigned in_use = dc->disk.c->gc_stats.in_use;
+
+	if (cache_mode != CACHE_MODE_WRITEBACK ||
+	    atomic_read(&dc->disk.detaching) ||
+	    in_use > CUTOFF_WRITEBACK_SYNC)
+		return false;
+
+	if (dc->partial_stripes_expensive &&
+	    bcache_dev_stripe_dirty(&dc->disk, bio->bi_sector,
+				    bio_sectors(bio)))
+		return true;
+
+	if (would_skip)
+		return false;
+
+	return bio->bi_rw & REQ_SYNC ||
+		in_use <= CUTOFF_WRITEBACK;
+}
+
+void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int);
+void bch_writeback_queue(struct cached_dev *);
+void bch_writeback_add(struct cached_dev *);
+
+void bch_sectors_dirty_init(struct cached_dev *dc);
+void bch_cached_dev_writeback_init(struct cached_dev *);
+
+#endif
diff --git a/drivers/md/dm-cache-policy-mq.c b/drivers/md/dm-cache-policy-mq.c
index dc112a7..4296155 100644
--- a/drivers/md/dm-cache-policy-mq.c
+++ b/drivers/md/dm-cache-policy-mq.c
@@ -959,23 +959,21 @@
 	return r;
 }
 
-static void remove_mapping(struct mq_policy *mq, dm_oblock_t oblock)
+static void mq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)
 {
-	struct entry *e = hash_lookup(mq, oblock);
+	struct mq_policy *mq = to_mq_policy(p);
+	struct entry *e;
+
+	mutex_lock(&mq->lock);
+
+	e = hash_lookup(mq, oblock);
 
 	BUG_ON(!e || !e->in_cache);
 
 	del(mq, e);
 	e->in_cache = false;
 	push(mq, e);
-}
 
-static void mq_remove_mapping(struct dm_cache_policy *p, dm_oblock_t oblock)
-{
-	struct mq_policy *mq = to_mq_policy(p);
-
-	mutex_lock(&mq->lock);
-	remove_mapping(mq, oblock);
 	mutex_unlock(&mq->lock);
 }
 
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 957a719..df7b0a0 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2290,12 +2290,18 @@
 	d = r10_bio->devs[1].devnum;
 	wbio = r10_bio->devs[1].bio;
 	wbio2 = r10_bio->devs[1].repl_bio;
+	/* Need to test wbio2->bi_end_io before we call
+	 * generic_make_request as if the former is NULL,
+	 * the latter is free to free wbio2.
+	 */
+	if (wbio2 && !wbio2->bi_end_io)
+		wbio2 = NULL;
 	if (wbio->bi_end_io) {
 		atomic_inc(&conf->mirrors[d].rdev->nr_pending);
 		md_sync_acct(conf->mirrors[d].rdev->bdev, bio_sectors(wbio));
 		generic_make_request(wbio);
 	}
-	if (wbio2 && wbio2->bi_end_io) {
+	if (wbio2) {
 		atomic_inc(&conf->mirrors[d].replacement->nr_pending);
 		md_sync_acct(conf->mirrors[d].replacement->bdev,
 			     bio_sectors(wbio2));
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 2bf094a..78ea443 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3462,6 +3462,7 @@
 		    test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
 			set_bit(STRIPE_SYNCING, &sh->state);
 			clear_bit(STRIPE_INSYNC, &sh->state);
+			clear_bit(STRIPE_REPLACED, &sh->state);
 		}
 		spin_unlock(&sh->stripe_lock);
 	}
@@ -3607,19 +3608,23 @@
 			handle_parity_checks5(conf, sh, &s, disks);
 	}
 
-	if (s.replacing && s.locked == 0
-	    && !test_bit(STRIPE_INSYNC, &sh->state)) {
+	if ((s.replacing || s.syncing) && s.locked == 0
+	    && !test_bit(STRIPE_COMPUTE_RUN, &sh->state)
+	    && !test_bit(STRIPE_REPLACED, &sh->state)) {
 		/* Write out to replacement devices where possible */
 		for (i = 0; i < conf->raid_disks; i++)
-			if (test_bit(R5_UPTODATE, &sh->dev[i].flags) &&
-			    test_bit(R5_NeedReplace, &sh->dev[i].flags)) {
+			if (test_bit(R5_NeedReplace, &sh->dev[i].flags)) {
+				WARN_ON(!test_bit(R5_UPTODATE, &sh->dev[i].flags));
 				set_bit(R5_WantReplace, &sh->dev[i].flags);
 				set_bit(R5_LOCKED, &sh->dev[i].flags);
 				s.locked++;
 			}
-		set_bit(STRIPE_INSYNC, &sh->state);
+		if (s.replacing)
+			set_bit(STRIPE_INSYNC, &sh->state);
+		set_bit(STRIPE_REPLACED, &sh->state);
 	}
 	if ((s.syncing || s.replacing) && s.locked == 0 &&
+	    !test_bit(STRIPE_COMPUTE_RUN, &sh->state) &&
 	    test_bit(STRIPE_INSYNC, &sh->state)) {
 		md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
 		clear_bit(STRIPE_SYNCING, &sh->state);
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index b0b663b..70c4932 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -306,6 +306,7 @@
 	STRIPE_SYNC_REQUESTED,
 	STRIPE_SYNCING,
 	STRIPE_INSYNC,
+	STRIPE_REPLACED,
 	STRIPE_PREREAD_ACTIVE,
 	STRIPE_DELAYED,
 	STRIPE_DEGRADED,
diff --git a/drivers/media/i2c/ml86v7667.c b/drivers/media/i2c/ml86v7667.c
index efdc873..a985702 100644
--- a/drivers/media/i2c/ml86v7667.c
+++ b/drivers/media/i2c/ml86v7667.c
@@ -117,7 +117,7 @@
 {
 	struct v4l2_subdev *sd = to_sd(ctrl);
 	struct i2c_client *client = v4l2_get_subdevdata(sd);
-	int ret;
+	int ret = -EINVAL;
 
 	switch (ctrl->id) {
 	case V4L2_CID_BRIGHTNESS:
@@ -157,7 +157,7 @@
 		break;
 	}
 
-	return 0;
+	return ret;
 }
 
 static int ml86v7667_querystd(struct v4l2_subdev *sd, v4l2_std_id *std)
diff --git a/drivers/media/platform/coda.c b/drivers/media/platform/coda.c
index df4ada88..bd9405d 100644
--- a/drivers/media/platform/coda.c
+++ b/drivers/media/platform/coda.c
@@ -1987,7 +1987,7 @@
 
 #ifdef CONFIG_OF
 static const struct of_device_id coda_dt_ids[] = {
-	{ .compatible = "fsl,imx27-vpu", .data = &coda_platform_ids[CODA_IMX27] },
+	{ .compatible = "fsl,imx27-vpu", .data = &coda_devdata[CODA_IMX27] },
 	{ .compatible = "fsl,imx53-vpu", .data = &coda_devdata[CODA_IMX53] },
 	{ /* sentinel */ }
 };
diff --git a/drivers/media/platform/s5p-g2d/g2d.c b/drivers/media/platform/s5p-g2d/g2d.c
index 553d87e..fd6289d 100644
--- a/drivers/media/platform/s5p-g2d/g2d.c
+++ b/drivers/media/platform/s5p-g2d/g2d.c
@@ -784,6 +784,7 @@
 	}
 	*vfd = g2d_videodev;
 	vfd->lock = &dev->mutex;
+	vfd->v4l2_dev = &dev->v4l2_dev;
 	ret = video_register_device(vfd, VFL_TYPE_GRABBER, 0);
 	if (ret) {
 		v4l2_err(&dev->v4l2_dev, "Failed to register video device\n");
diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_dec.c b/drivers/media/platform/s5p-mfc/s5p_mfc_dec.c
index 5296385..4f6dd42 100644
--- a/drivers/media/platform/s5p-mfc/s5p_mfc_dec.c
+++ b/drivers/media/platform/s5p-mfc/s5p_mfc_dec.c
@@ -344,7 +344,7 @@
 		pix_mp->num_planes = 2;
 		/* Set pixelformat to the format in which MFC
 		   outputs the decoded frame */
-		pix_mp->pixelformat = V4L2_PIX_FMT_NV12MT;
+		pix_mp->pixelformat = ctx->dst_fmt->fourcc;
 		pix_mp->plane_fmt[0].bytesperline = ctx->buf_width;
 		pix_mp->plane_fmt[0].sizeimage = ctx->luma_size;
 		pix_mp->plane_fmt[1].bytesperline = ctx->buf_width;
@@ -382,10 +382,16 @@
 			mfc_err("Unsupported format for source.\n");
 			return -EINVAL;
 		}
-		if (!IS_MFCV6(dev) && (fmt->fourcc == V4L2_PIX_FMT_VP8)) {
-			mfc_err("Not supported format.\n");
+		if (fmt->codec_mode == S5P_FIMV_CODEC_NONE) {
+			mfc_err("Unknown codec\n");
 			return -EINVAL;
 		}
+		if (!IS_MFCV6(dev)) {
+			if (fmt->fourcc == V4L2_PIX_FMT_VP8) {
+				mfc_err("Not supported format.\n");
+				return -EINVAL;
+			}
+		}
 	} else if (f->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
 		fmt = find_format(f, MFC_FMT_RAW);
 		if (!fmt) {
@@ -411,7 +417,6 @@
 	struct s5p_mfc_dev *dev = video_drvdata(file);
 	struct s5p_mfc_ctx *ctx = fh_to_ctx(priv);
 	int ret = 0;
-	struct s5p_mfc_fmt *fmt;
 	struct v4l2_pix_format_mplane *pix_mp;
 
 	mfc_debug_enter();
@@ -425,54 +430,32 @@
 		goto out;
 	}
 	if (f->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
-		fmt = find_format(f, MFC_FMT_RAW);
-		if (!fmt) {
-			mfc_err("Unsupported format for source.\n");
-			return -EINVAL;
-		}
-		if (!IS_MFCV6(dev) && (fmt->fourcc != V4L2_PIX_FMT_NV12MT)) {
-			mfc_err("Not supported format.\n");
-			return -EINVAL;
-		} else if (IS_MFCV6(dev) &&
-				(fmt->fourcc == V4L2_PIX_FMT_NV12MT)) {
-			mfc_err("Not supported format.\n");
-			return -EINVAL;
-		}
-		ctx->dst_fmt = fmt;
-		mfc_debug_leave();
-		return ret;
-	} else if (f->type != V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
-		mfc_err("Wrong type error for S_FMT : %d", f->type);
-		return -EINVAL;
-	}
-	fmt = find_format(f, MFC_FMT_DEC);
-	if (!fmt || fmt->codec_mode == S5P_MFC_CODEC_NONE) {
-		mfc_err("Unknown codec\n");
-		ret = -EINVAL;
+		/* dst_fmt is validated by call to vidioc_try_fmt */
+		ctx->dst_fmt = find_format(f, MFC_FMT_RAW);
+		ret = 0;
 		goto out;
-	}
-	if (fmt->type != MFC_FMT_DEC) {
-		mfc_err("Wrong format selected, you should choose "
-					"format for decoding\n");
-		ret = -EINVAL;
-		goto out;
-	}
-	if (!IS_MFCV6(dev) && (fmt->fourcc == V4L2_PIX_FMT_VP8)) {
-		mfc_err("Not supported format.\n");
-		return -EINVAL;
-	}
-	ctx->src_fmt = fmt;
-	ctx->codec_mode = fmt->codec_mode;
-	mfc_debug(2, "The codec number is: %d\n", ctx->codec_mode);
-	pix_mp->height = 0;
-	pix_mp->width = 0;
-	if (pix_mp->plane_fmt[0].sizeimage)
-		ctx->dec_src_buf_size = pix_mp->plane_fmt[0].sizeimage;
-	else
-		pix_mp->plane_fmt[0].sizeimage = ctx->dec_src_buf_size =
+	} else if (f->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
+		/* src_fmt is validated by call to vidioc_try_fmt */
+		ctx->src_fmt = find_format(f, MFC_FMT_DEC);
+		ctx->codec_mode = ctx->src_fmt->codec_mode;
+		mfc_debug(2, "The codec number is: %d\n", ctx->codec_mode);
+		pix_mp->height = 0;
+		pix_mp->width = 0;
+		if (pix_mp->plane_fmt[0].sizeimage)
+			ctx->dec_src_buf_size = pix_mp->plane_fmt[0].sizeimage;
+		else
+			pix_mp->plane_fmt[0].sizeimage = ctx->dec_src_buf_size =
 								DEF_CPB_SIZE;
-	pix_mp->plane_fmt[0].bytesperline = 0;
-	ctx->state = MFCINST_INIT;
+		pix_mp->plane_fmt[0].bytesperline = 0;
+		ctx->state = MFCINST_INIT;
+		ret = 0;
+		goto out;
+	} else {
+		mfc_err("Wrong type error for S_FMT : %d", f->type);
+		ret = -EINVAL;
+		goto out;
+	}
+
 out:
 	mfc_debug_leave();
 	return ret;
diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c b/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c
index 2549967..59e56f4 100644
--- a/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c
+++ b/drivers/media/platform/s5p-mfc/s5p_mfc_enc.c
@@ -906,6 +906,7 @@
 
 static int vidioc_try_fmt(struct file *file, void *priv, struct v4l2_format *f)
 {
+	struct s5p_mfc_dev *dev = video_drvdata(file);
 	struct s5p_mfc_fmt *fmt;
 	struct v4l2_pix_format_mplane *pix_fmt_mp = &f->fmt.pix_mp;
 
@@ -930,6 +931,18 @@
 			return -EINVAL;
 		}
 
+		if (!IS_MFCV6(dev)) {
+			if (fmt->fourcc == V4L2_PIX_FMT_NV12MT_16X16) {
+				mfc_err("Not supported format.\n");
+				return -EINVAL;
+			}
+		} else if (IS_MFCV6(dev)) {
+			if (fmt->fourcc == V4L2_PIX_FMT_NV12MT) {
+				mfc_err("Not supported format.\n");
+				return -EINVAL;
+			}
+		}
+
 		if (fmt->num_planes != pix_fmt_mp->num_planes) {
 			mfc_err("failed to try output format\n");
 			return -EINVAL;
@@ -947,7 +960,6 @@
 {
 	struct s5p_mfc_dev *dev = video_drvdata(file);
 	struct s5p_mfc_ctx *ctx = fh_to_ctx(priv);
-	struct s5p_mfc_fmt *fmt;
 	struct v4l2_pix_format_mplane *pix_fmt_mp = &f->fmt.pix_mp;
 	int ret = 0;
 
@@ -960,13 +972,9 @@
 		goto out;
 	}
 	if (f->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
-		fmt = find_format(f, MFC_FMT_ENC);
-		if (!fmt) {
-			mfc_err("failed to set capture format\n");
-			return -EINVAL;
-		}
+		/* dst_fmt is validated by call to vidioc_try_fmt */
+		ctx->dst_fmt = find_format(f, MFC_FMT_ENC);
 		ctx->state = MFCINST_INIT;
-		ctx->dst_fmt = fmt;
 		ctx->codec_mode = ctx->dst_fmt->codec_mode;
 		ctx->enc_dst_buf_size =	pix_fmt_mp->plane_fmt[0].sizeimage;
 		pix_fmt_mp->plane_fmt[0].bytesperline = 0;
@@ -987,28 +995,8 @@
 		}
 		mfc_debug(2, "Got instance number: %d\n", ctx->inst_no);
 	} else if (f->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
-		fmt = find_format(f, MFC_FMT_RAW);
-		if (!fmt) {
-			mfc_err("failed to set output format\n");
-			return -EINVAL;
-		}
-
-		if (!IS_MFCV6(dev) &&
-				(fmt->fourcc == V4L2_PIX_FMT_NV12MT_16X16)) {
-			mfc_err("Not supported format.\n");
-			return -EINVAL;
-		} else if (IS_MFCV6(dev) &&
-				(fmt->fourcc == V4L2_PIX_FMT_NV12MT)) {
-			mfc_err("Not supported format.\n");
-			return -EINVAL;
-		}
-
-		if (fmt->num_planes != pix_fmt_mp->num_planes) {
-			mfc_err("failed to set output format\n");
-			ret = -EINVAL;
-			goto out;
-		}
-		ctx->src_fmt = fmt;
+		/* src_fmt is validated by call to vidioc_try_fmt */
+		ctx->src_fmt = find_format(f, MFC_FMT_RAW);
 		ctx->img_width = pix_fmt_mp->width;
 		ctx->img_height = pix_fmt_mp->height;
 		mfc_debug(2, "codec number: %d\n", ctx->src_fmt->codec_mode);
diff --git a/drivers/media/usb/em28xx/em28xx-i2c.c b/drivers/media/usb/em28xx/em28xx-i2c.c
index 4851cc2..c4ff973 100644
--- a/drivers/media/usb/em28xx/em28xx-i2c.c
+++ b/drivers/media/usb/em28xx/em28xx-i2c.c
@@ -726,7 +726,7 @@
 
 	*eedata = data;
 	*eedata_len = len;
-	dev_config = (void *)eedata;
+	dev_config = (void *)*eedata;
 
 	switch (le16_to_cpu(dev_config->chip_conf) >> 4 & 0x3) {
 	case 0:
diff --git a/drivers/media/usb/hdpvr/hdpvr-core.c b/drivers/media/usb/hdpvr/hdpvr-core.c
index cb69405..6e50707 100644
--- a/drivers/media/usb/hdpvr/hdpvr-core.c
+++ b/drivers/media/usb/hdpvr/hdpvr-core.c
@@ -303,6 +303,11 @@
 
 	dev->workqueue = 0;
 
+	/* init video transfer queues first of all */
+	/* to prevent oops in hdpvr_delete() on error paths */
+	INIT_LIST_HEAD(&dev->free_buff_list);
+	INIT_LIST_HEAD(&dev->rec_buff_list);
+
 	/* register v4l2_device early so it can be used for printks */
 	if (v4l2_device_register(&interface->dev, &dev->v4l2_dev)) {
 		dev_err(&interface->dev, "v4l2_device_register failed\n");
@@ -325,10 +330,6 @@
 	if (!dev->workqueue)
 		goto error;
 
-	/* init video transfer queues */
-	INIT_LIST_HEAD(&dev->free_buff_list);
-	INIT_LIST_HEAD(&dev->rec_buff_list);
-
 	dev->options = hdpvr_default_options;
 
 	if (default_video_input < HDPVR_VIDEO_INPUTS)
@@ -405,7 +406,7 @@
 				    video_nr[atomic_inc_return(&dev_nr)]);
 	if (retval < 0) {
 		v4l2_err(&dev->v4l2_dev, "registering videodev failed\n");
-		goto error;
+		goto reg_fail;
 	}
 
 	/* let the user know what node this device is now attached to */
diff --git a/drivers/media/usb/usbtv/Kconfig b/drivers/media/usb/usbtv/Kconfig
index 8864436..7c5b860 100644
--- a/drivers/media/usb/usbtv/Kconfig
+++ b/drivers/media/usb/usbtv/Kconfig
@@ -1,6 +1,6 @@
 config VIDEO_USBTV
         tristate "USBTV007 video capture support"
-        depends on VIDEO_DEV
+        depends on VIDEO_V4L2
         select VIDEOBUF2_VMALLOC
 
         ---help---
diff --git a/drivers/media/usb/usbtv/usbtv.c b/drivers/media/usb/usbtv/usbtv.c
index bf43f87..91650173 100644
--- a/drivers/media/usb/usbtv/usbtv.c
+++ b/drivers/media/usb/usbtv/usbtv.c
@@ -57,7 +57,7 @@
 #define USBTV_CHUNK_SIZE	256
 #define USBTV_CHUNK		240
 #define USBTV_CHUNKS		(USBTV_WIDTH * USBTV_HEIGHT \
-					/ 2 / USBTV_CHUNK)
+					/ 4 / USBTV_CHUNK)
 
 /* Chunk header. */
 #define USBTV_MAGIC_OK(chunk)	((be32_to_cpu(chunk[0]) & 0xff000000) \
@@ -89,6 +89,7 @@
 	/* Number of currently processed frame, useful find
 	 * out when a new one begins. */
 	u32 frame_id;
+	int chunks_done;
 
 	int iso_size;
 	unsigned int sequence;
@@ -202,6 +203,26 @@
 	return 0;
 }
 
+/* Copy data from chunk into a frame buffer, deinterlacing the data
+ * into every second line. Unfortunately, they don't align nicely into
+ * 720 pixel lines, as the chunk is 240 words long, which is 480 pixels.
+ * Therefore, we break down the chunk into two halves before copyting,
+ * so that we can interleave a line if needed. */
+static void usbtv_chunk_to_vbuf(u32 *frame, u32 *src, int chunk_no, int odd)
+{
+	int half;
+
+	for (half = 0; half < 2; half++) {
+		int part_no = chunk_no * 2 + half;
+		int line = part_no / 3;
+		int part_index = (line * 2 + !odd) * 3 + (part_no % 3);
+
+		u32 *dst = &frame[part_index * USBTV_CHUNK/2];
+		memcpy(dst, src, USBTV_CHUNK/2 * sizeof(*src));
+		src += USBTV_CHUNK/2;
+	}
+}
+
 /* Called for each 256-byte image chunk.
  * First word identifies the chunk, followed by 240 words of image
  * data and padding. */
@@ -218,17 +239,17 @@
 	frame_id = USBTV_FRAME_ID(chunk);
 	odd = USBTV_ODD(chunk);
 	chunk_no = USBTV_CHUNK_NO(chunk);
-
-	/* Deinterlace. TODO: Use interlaced frame format. */
-	chunk_no = (chunk_no - chunk_no % 3) * 2 + chunk_no % 3;
-	chunk_no += !odd * 3;
-
 	if (chunk_no >= USBTV_CHUNKS)
 		return;
 
 	/* Beginning of a frame. */
-	if (chunk_no == 0)
+	if (chunk_no == 0) {
 		usbtv->frame_id = frame_id;
+		usbtv->chunks_done = 0;
+	}
+
+	if (usbtv->frame_id != frame_id)
+		return;
 
 	spin_lock_irqsave(&usbtv->buflock, flags);
 	if (list_empty(&usbtv->bufs)) {
@@ -241,19 +262,23 @@
 	buf = list_first_entry(&usbtv->bufs, struct usbtv_buf, list);
 	frame = vb2_plane_vaddr(&buf->vb, 0);
 
-	/* Copy the chunk. */
-	memcpy(&frame[chunk_no * USBTV_CHUNK], &chunk[1],
-			USBTV_CHUNK * sizeof(chunk[1]));
+	/* Copy the chunk data. */
+	usbtv_chunk_to_vbuf(frame, &chunk[1], chunk_no, odd);
+	usbtv->chunks_done++;
 
 	/* Last chunk in a frame, signalling an end */
-	if (usbtv->frame_id && chunk_no == USBTV_CHUNKS-1) {
+	if (odd && chunk_no == USBTV_CHUNKS-1) {
 		int size = vb2_plane_size(&buf->vb, 0);
+		enum vb2_buffer_state state = usbtv->chunks_done ==
+						USBTV_CHUNKS ?
+						VB2_BUF_STATE_DONE :
+						VB2_BUF_STATE_ERROR;
 
 		buf->vb.v4l2_buf.field = V4L2_FIELD_INTERLACED;
 		buf->vb.v4l2_buf.sequence = usbtv->sequence++;
 		v4l2_get_timestamp(&buf->vb.v4l2_buf.timestamp);
 		vb2_set_plane_payload(&buf->vb, 0, size);
-		vb2_buffer_done(&buf->vb, VB2_BUF_STATE_DONE);
+		vb2_buffer_done(&buf->vb, state);
 		list_del(&buf->list);
 	}
 
@@ -518,7 +543,7 @@
 	if (*nbuffers < 2)
 		*nbuffers = 2;
 	*nplanes = 1;
-	sizes[0] = USBTV_CHUNK * USBTV_CHUNKS * sizeof(u32);
+	sizes[0] = USBTV_WIDTH * USBTV_HEIGHT / 2 * sizeof(u32);
 
 	return 0;
 }
diff --git a/drivers/misc/atmel-ssc.c b/drivers/misc/atmel-ssc.c
index f7b90661..e068a76 100644
--- a/drivers/misc/atmel-ssc.c
+++ b/drivers/misc/atmel-ssc.c
@@ -66,14 +66,19 @@
 
 void ssc_free(struct ssc_device *ssc)
 {
+	bool disable_clk = true;
+
 	spin_lock(&user_lock);
-	if (ssc->user) {
+	if (ssc->user)
 		ssc->user--;
-		clk_disable_unprepare(ssc->clk);
-	} else {
+	else {
+		disable_clk = false;
 		dev_dbg(&ssc->pdev->dev, "device already free\n");
 	}
 	spin_unlock(&user_lock);
+
+	if (disable_clk)
+		clk_disable_unprepare(ssc->clk);
 }
 EXPORT_SYMBOL(ssc_free);
 
diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c
index f9296ab..6127ab6 100644
--- a/drivers/misc/mei/hbm.c
+++ b/drivers/misc/mei/hbm.c
@@ -167,7 +167,7 @@
 
 	dev->hbm_state = MEI_HBM_IDLE;
 	if (mei_write_message(dev, mei_hdr, dev->wr_msg.data)) {
-		dev_err(&dev->pdev->dev, "version message writet failed\n");
+		dev_err(&dev->pdev->dev, "version message write failed\n");
 		dev->dev_state = MEI_DEV_RESETTING;
 		mei_reset(dev, 1);
 		return -ENODEV;
diff --git a/drivers/misc/mei/hw-me.c b/drivers/misc/mei/hw-me.c
index e4f8dec..b22c7e2 100644
--- a/drivers/misc/mei/hw-me.c
+++ b/drivers/misc/mei/hw-me.c
@@ -239,14 +239,18 @@
 	if (mei_me_hw_is_ready(dev))
 		return 0;
 
+	dev->recvd_hw_ready = false;
 	mutex_unlock(&dev->device_lock);
 	err = wait_event_interruptible_timeout(dev->wait_hw_ready,
-			dev->recvd_hw_ready, MEI_INTEROP_TIMEOUT);
+			dev->recvd_hw_ready,
+			mei_secs_to_jiffies(MEI_INTEROP_TIMEOUT));
 	mutex_lock(&dev->device_lock);
 	if (!err && !dev->recvd_hw_ready) {
+		if (!err)
+			err = -ETIMEDOUT;
 		dev_err(&dev->pdev->dev,
-			"wait hw ready failed. status = 0x%x\n", err);
-		return -ETIMEDOUT;
+			"wait hw ready failed. status = %d\n", err);
+		return err;
 	}
 
 	dev->recvd_hw_ready = false;
@@ -483,7 +487,9 @@
 	/* check if ME wants a reset */
 	if (!mei_hw_is_ready(dev) &&
 	    dev->dev_state != MEI_DEV_RESETTING &&
-	    dev->dev_state != MEI_DEV_INITIALIZING) {
+	    dev->dev_state != MEI_DEV_INITIALIZING &&
+	    dev->dev_state != MEI_DEV_POWER_DOWN &&
+	    dev->dev_state != MEI_DEV_POWER_UP) {
 		dev_dbg(&dev->pdev->dev, "FW not ready.\n");
 		mei_reset(dev, 1);
 		mutex_unlock(&dev->device_lock);
diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c
index ed1d752..e6f16f8 100644
--- a/drivers/misc/mei/init.c
+++ b/drivers/misc/mei/init.c
@@ -148,7 +148,8 @@
 
 	dev->hbm_state = MEI_HBM_IDLE;
 
-	if (dev->dev_state != MEI_DEV_INITIALIZING) {
+	if (dev->dev_state != MEI_DEV_INITIALIZING &&
+	    dev->dev_state != MEI_DEV_POWER_UP) {
 		if (dev->dev_state != MEI_DEV_DISABLED &&
 		    dev->dev_state != MEI_DEV_POWER_DOWN)
 			dev->dev_state = MEI_DEV_RESETTING;
diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c
index 847b199..2c5a91b 100644
--- a/drivers/mmc/host/pxamci.c
+++ b/drivers/mmc/host/pxamci.c
@@ -128,7 +128,7 @@
 			       !!on ^ host->pdata->gpio_power_invert);
 	}
 	if (!host->vcc && host->pdata && host->pdata->setpower)
-		host->pdata->setpower(mmc_dev(host->mmc), vdd);
+		return host->pdata->setpower(mmc_dev(host->mmc), vdd);
 
 	return 0;
 }
diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
index a746ba2..a956053 100644
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c
@@ -1007,7 +1007,7 @@
 
 	soft = &pkt.soft.rfc1201;
 
-	lp->hw.copy_from_card(dev, bufnum, 0, &pkt, sizeof(ARC_HDR_SIZE));
+	lp->hw.copy_from_card(dev, bufnum, 0, &pkt, ARC_HDR_SIZE);
 	if (pkt.hard.offset[0]) {
 		ofs = pkt.hard.offset[0];
 		length = 256 - ofs;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 07f257d4..e48cb33 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3714,11 +3714,17 @@
  * The bonding ndo_neigh_setup is called at init time beofre any
  * slave exists. So we must declare proxy setup function which will
  * be used at run time to resolve the actual slave neigh param setup.
+ *
+ * It's also called by master devices (such as vlans) to setup their
+ * underlying devices. In that case - do nothing, we're already set up from
+ * our init.
  */
 static int bond_neigh_setup(struct net_device *dev,
 			    struct neigh_parms *parms)
 {
-	parms->neigh_setup   = bond_neigh_init;
+	/* modify only our neigh_parms */
+	if (parms->dev == dev)
+		parms->neigh_setup = bond_neigh_init;
 
 	return 0;
 }
diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c
index 6aa7b32..ac6177d 100644
--- a/drivers/net/can/usb/esd_usb2.c
+++ b/drivers/net/can/usb/esd_usb2.c
@@ -412,10 +412,20 @@
 
 		switch (msg->msg.hdr.cmd) {
 		case CMD_CAN_RX:
+			if (msg->msg.rx.net >= dev->net_count) {
+				dev_err(dev->udev->dev.parent, "format error\n");
+				break;
+			}
+
 			esd_usb2_rx_can_msg(dev->nets[msg->msg.rx.net], msg);
 			break;
 
 		case CMD_CAN_TX:
+			if (msg->msg.txdone.net >= dev->net_count) {
+				dev_err(dev->udev->dev.parent, "format error\n");
+				break;
+			}
+
 			esd_usb2_tx_done_msg(dev->nets[msg->msg.txdone.net],
 					     msg);
 			break;
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c
index 25723d8..925ab8e 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb.c
@@ -649,7 +649,7 @@
 		if ((mc->ptr + rec_len) > mc->end)
 			goto decode_failed;
 
-		memcpy(cf->data, mc->ptr, rec_len);
+		memcpy(cf->data, mc->ptr, cf->can_dlc);
 		mc->ptr += rec_len;
 	}
 
diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c
index cbd388e..8becd3d 100644
--- a/drivers/net/can/usb/usb_8dev.c
+++ b/drivers/net/can/usb/usb_8dev.c
@@ -779,6 +779,7 @@
 			usb_unanchor_urb(urb);
 			usb_free_coherent(priv->udev, RX_BUFFER_SIZE, buf,
 					  urb->transfer_dma);
+			usb_free_urb(urb);
 			break;
 		}
 
diff --git a/drivers/net/ethernet/allwinner/Kconfig b/drivers/net/ethernet/allwinner/Kconfig
index 53ad213..d8d95d4 100644
--- a/drivers/net/ethernet/allwinner/Kconfig
+++ b/drivers/net/ethernet/allwinner/Kconfig
@@ -3,19 +3,20 @@
 #
 
 config NET_VENDOR_ALLWINNER
-       bool "Allwinner devices"
-       default y
-       depends on ARCH_SUNXI
-       ---help---
-         If you have a network (Ethernet) card belonging to this
-	 class, say Y and read the Ethernet-HOWTO, available from
-	 <http://www.tldp.org/docs.html#howto>.
+	bool "Allwinner devices"
+	default y
 
-	 Note that the answer to this question doesn't directly
-	 affect the kernel: saying N will just cause the configurator
-	 to skip all the questions about Allwinner cards. If you say Y,
-	 you will be asked for your specific card in the following
-	 questions.
+	depends on ARCH_SUNXI
+	---help---
+	  If you have a network (Ethernet) card belonging to this
+	  class, say Y and read the Ethernet-HOWTO, available from
+	  <http://www.tldp.org/docs.html#howto>.
+
+	  Note that the answer to this question doesn't directly
+	  affect the kernel: saying N will just cause the configurator
+	  to skip all the questions about Allwinner cards. If you say Y,
+	  you will be asked for your specific card in the following
+	  questions.
 
 if NET_VENDOR_ALLWINNER
 
@@ -26,6 +27,7 @@
 	select CRC32
 	select MII
 	select PHYLIB
+	select MDIO_SUN4I
         ---help---
           Support for Allwinner A10 EMAC ethernet driver.
 
diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c
index f1b121e..55d79cb 100644
--- a/drivers/net/ethernet/arc/emac_main.c
+++ b/drivers/net/ethernet/arc/emac_main.c
@@ -199,7 +199,7 @@
 	struct arc_emac_priv *priv = netdev_priv(ndev);
 	unsigned int work_done;
 
-	for (work_done = 0; work_done <= budget; work_done++) {
+	for (work_done = 0; work_done < budget; work_done++) {
 		unsigned int *last_rx_bd = &priv->last_rx_bd;
 		struct net_device_stats *stats = &priv->stats;
 		struct buffer_state *rx_buff = &priv->rx_buff[*last_rx_bd];
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c.h b/drivers/net/ethernet/atheros/atl1c/atl1c.h
index b2bf324..0f05565 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c.h
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c.h
@@ -520,6 +520,9 @@
 	struct net_device   *netdev;
 	struct pci_dev      *pdev;
 	struct napi_struct  napi;
+	struct page         *rx_page;
+	unsigned int	    rx_page_offset;
+	unsigned int	    rx_frag_size;
 	struct atl1c_hw        hw;
 	struct atl1c_hw_stats  hw_stats;
 	struct mii_if_info  mii;    /* MII interface info */
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 786a874..a36a760 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -481,10 +481,15 @@
 static void atl1c_set_rxbufsize(struct atl1c_adapter *adapter,
 				struct net_device *dev)
 {
+	unsigned int head_size;
 	int mtu = dev->mtu;
 
 	adapter->rx_buffer_len = mtu > AT_RX_BUF_SIZE ?
 		roundup(mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN, 8) : AT_RX_BUF_SIZE;
+
+	head_size = SKB_DATA_ALIGN(adapter->rx_buffer_len + NET_SKB_PAD) +
+		    SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+	adapter->rx_frag_size = roundup_pow_of_two(head_size);
 }
 
 static netdev_features_t atl1c_fix_features(struct net_device *netdev,
@@ -952,6 +957,10 @@
 		kfree(adapter->tpd_ring[0].buffer_info);
 		adapter->tpd_ring[0].buffer_info = NULL;
 	}
+	if (adapter->rx_page) {
+		put_page(adapter->rx_page);
+		adapter->rx_page = NULL;
+	}
 }
 
 /**
@@ -1639,6 +1648,35 @@
 	skb_checksum_none_assert(skb);
 }
 
+static struct sk_buff *atl1c_alloc_skb(struct atl1c_adapter *adapter)
+{
+	struct sk_buff *skb;
+	struct page *page;
+
+	if (adapter->rx_frag_size > PAGE_SIZE)
+		return netdev_alloc_skb(adapter->netdev,
+					adapter->rx_buffer_len);
+
+	page = adapter->rx_page;
+	if (!page) {
+		adapter->rx_page = page = alloc_page(GFP_ATOMIC);
+		if (unlikely(!page))
+			return NULL;
+		adapter->rx_page_offset = 0;
+	}
+
+	skb = build_skb(page_address(page) + adapter->rx_page_offset,
+			adapter->rx_frag_size);
+	if (likely(skb)) {
+		adapter->rx_page_offset += adapter->rx_frag_size;
+		if (adapter->rx_page_offset >= PAGE_SIZE)
+			adapter->rx_page = NULL;
+		else
+			get_page(page);
+	}
+	return skb;
+}
+
 static int atl1c_alloc_rx_buffer(struct atl1c_adapter *adapter)
 {
 	struct atl1c_rfd_ring *rfd_ring = &adapter->rfd_ring;
@@ -1660,7 +1698,7 @@
 	while (next_info->flags & ATL1C_BUFFER_FREE) {
 		rfd_desc = ATL1C_RFD_DESC(rfd_ring, rfd_next_to_use);
 
-		skb = netdev_alloc_skb(adapter->netdev, adapter->rx_buffer_len);
+		skb = atl1c_alloc_skb(adapter);
 		if (unlikely(!skb)) {
 			if (netif_msg_rx_err(adapter))
 				dev_warn(&pdev->dev, "alloc rx buffer failed\n");
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index dedbd76..00b88cb 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -486,7 +486,7 @@
 
 	struct napi_struct	napi;
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 	unsigned int state;
 #define BNX2X_FP_STATE_IDLE		      0
 #define BNX2X_FP_STATE_NAPI		(1 << 0)    /* NAPI owns this FP */
@@ -498,7 +498,7 @@
 #define BNX2X_FP_USER_PEND (BNX2X_FP_STATE_POLL | BNX2X_FP_STATE_POLL_YIELD)
 	/* protect state */
 	spinlock_t lock;
-#endif /* CONFIG_NET_LL_RX_POLL */
+#endif /* CONFIG_NET_RX_BUSY_POLL */
 
 	union host_hc_status_block	status_blk;
 	/* chip independent shortcuts into sb structure */
@@ -572,7 +572,7 @@
 #define bnx2x_fp_stats(bp, fp)	(&((bp)->fp_stats[(fp)->index]))
 #define bnx2x_fp_qstats(bp, fp)	(&((bp)->fp_stats[(fp)->index].eth_q_stats))
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 static inline void bnx2x_fp_init_lock(struct bnx2x_fastpath *fp)
 {
 	spin_lock_init(&fp->lock);
@@ -680,7 +680,7 @@
 {
 	return false;
 }
-#endif /* CONFIG_NET_LL_RX_POLL */
+#endif /* CONFIG_NET_RX_BUSY_POLL */
 
 /* Use 2500 as a mini-jumbo MTU for FCoE */
 #define BNX2X_FCOE_MINI_JUMBO_MTU	2500
@@ -1333,6 +1333,8 @@
 	BNX2X_SP_RTNL_VFPF_CHANNEL_DOWN,
 	BNX2X_SP_RTNL_VFPF_STORM_RX_MODE,
 	BNX2X_SP_RTNL_HYPERVISOR_VLAN,
+	BNX2X_SP_RTNL_TX_STOP,
+	BNX2X_SP_RTNL_TX_RESUME,
 };
 
 struct bnx2x_prev_path_list {
@@ -1502,6 +1504,7 @@
 #define BC_SUPPORTS_DCBX_MSG_NON_PMF	(1 << 21)
 #define IS_VF_FLAG			(1 << 22)
 #define INTERRUPTS_ENABLED_FLAG		(1 << 23)
+#define BC_SUPPORTS_RMMOD_CMD		(1 << 24)
 
 #define BP_NOMCP(bp)			((bp)->flags & NO_MCP_FLAG)
 
@@ -1830,6 +1833,8 @@
 
 	int fp_array_size;
 	u32 dump_preset_idx;
+	bool					stats_started;
+	struct semaphore			stats_sema;
 };
 
 /* Tx queues may be less or equal to Rx queues */
@@ -2451,4 +2456,6 @@
 	BNX2X_PCI_LINK_SPEED_5000 = 5000,
 	BNX2X_PCI_LINK_SPEED_8000 = 8000
 };
+
+void bnx2x_set_local_cmng(struct bnx2x *bp);
 #endif /* bnx2x.h */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index ee350bd..f2d1ff1 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -3117,7 +3117,7 @@
 	return work_done;
 }
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 /* must be called with local_bh_disable()d */
 int bnx2x_low_latency_recv(struct napi_struct *napi)
 {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c
index 0c94df4..fcf2761 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c
@@ -30,10 +30,8 @@
 #include "bnx2x_dcb.h"
 
 /* forward declarations of dcbx related functions */
-static int bnx2x_dcbx_stop_hw_tx(struct bnx2x *bp);
 static void bnx2x_pfc_set_pfc(struct bnx2x *bp);
 static void bnx2x_dcbx_update_ets_params(struct bnx2x *bp);
-static int bnx2x_dcbx_resume_hw_tx(struct bnx2x *bp);
 static void bnx2x_dcbx_get_ets_pri_pg_tbl(struct bnx2x *bp,
 					  u32 *set_configuration_ets_pg,
 					  u32 *pri_pg_tbl);
@@ -425,30 +423,52 @@
 		bnx2x_pfc_clear(bp);
 }
 
-static int bnx2x_dcbx_stop_hw_tx(struct bnx2x *bp)
+int bnx2x_dcbx_stop_hw_tx(struct bnx2x *bp)
 {
 	struct bnx2x_func_state_params func_params = {NULL};
+	int rc;
 
 	func_params.f_obj = &bp->func_obj;
 	func_params.cmd = BNX2X_F_CMD_TX_STOP;
 
+	__set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	__set_bit(RAMROD_RETRY, &func_params.ramrod_flags);
+
 	DP(BNX2X_MSG_DCB, "STOP TRAFFIC\n");
-	return bnx2x_func_state_change(bp, &func_params);
+
+	rc = bnx2x_func_state_change(bp, &func_params);
+	if (rc) {
+		BNX2X_ERR("Unable to hold traffic for HW configuration\n");
+		bnx2x_panic();
+	}
+
+	return rc;
 }
 
-static int bnx2x_dcbx_resume_hw_tx(struct bnx2x *bp)
+int bnx2x_dcbx_resume_hw_tx(struct bnx2x *bp)
 {
 	struct bnx2x_func_state_params func_params = {NULL};
 	struct bnx2x_func_tx_start_params *tx_params =
 		&func_params.params.tx_start;
+	int rc;
 
 	func_params.f_obj = &bp->func_obj;
 	func_params.cmd = BNX2X_F_CMD_TX_START;
 
+	__set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
+	__set_bit(RAMROD_RETRY, &func_params.ramrod_flags);
+
 	bnx2x_dcbx_fw_struct(bp, tx_params);
 
 	DP(BNX2X_MSG_DCB, "START TRAFFIC\n");
-	return bnx2x_func_state_change(bp, &func_params);
+
+	rc = bnx2x_func_state_change(bp, &func_params);
+	if (rc) {
+		BNX2X_ERR("Unable to resume traffic after HW configuration\n");
+		bnx2x_panic();
+	}
+
+	return rc;
 }
 
 static void bnx2x_dcbx_2cos_limit_update_ets_config(struct bnx2x *bp)
@@ -744,7 +764,9 @@
 			if (IS_MF(bp))
 				bnx2x_link_sync_notify(bp);
 
-			bnx2x_dcbx_stop_hw_tx(bp);
+			set_bit(BNX2X_SP_RTNL_TX_STOP, &bp->sp_rtnl_state);
+
+			schedule_delayed_work(&bp->sp_rtnl_task, 0);
 
 			return;
 		}
@@ -753,7 +775,13 @@
 		bnx2x_pfc_set_pfc(bp);
 
 		bnx2x_dcbx_update_ets_params(bp);
-		bnx2x_dcbx_resume_hw_tx(bp);
+
+		/* ets may affect cmng configuration: reinit it in hw */
+		bnx2x_set_local_cmng(bp);
+
+		set_bit(BNX2X_SP_RTNL_TX_RESUME, &bp->sp_rtnl_state);
+
+		schedule_delayed_work(&bp->sp_rtnl_task, 0);
 
 		return;
 	case BNX2X_DCBX_STATE_TX_RELEASED:
@@ -2363,21 +2391,24 @@
 		case DCB_FEATCFG_ATTR_PG:
 			if (bp->dcbx_local_feat.ets.enabled)
 				*flags |= DCB_FEATCFG_ENABLE;
-			if (bp->dcbx_error & DCBX_LOCAL_ETS_ERROR)
+			if (bp->dcbx_error & (DCBX_LOCAL_ETS_ERROR |
+					      DCBX_REMOTE_MIB_ERROR))
 				*flags |= DCB_FEATCFG_ERROR;
 			break;
 		case DCB_FEATCFG_ATTR_PFC:
 			if (bp->dcbx_local_feat.pfc.enabled)
 				*flags |= DCB_FEATCFG_ENABLE;
 			if (bp->dcbx_error & (DCBX_LOCAL_PFC_ERROR |
-			    DCBX_LOCAL_PFC_MISMATCH))
+					      DCBX_LOCAL_PFC_MISMATCH |
+					      DCBX_REMOTE_MIB_ERROR))
 				*flags |= DCB_FEATCFG_ERROR;
 			break;
 		case DCB_FEATCFG_ATTR_APP:
 			if (bp->dcbx_local_feat.app.enabled)
 				*flags |= DCB_FEATCFG_ENABLE;
 			if (bp->dcbx_error & (DCBX_LOCAL_APP_ERROR |
-			    DCBX_LOCAL_APP_MISMATCH))
+					      DCBX_LOCAL_APP_MISMATCH |
+					      DCBX_REMOTE_MIB_ERROR))
 				*flags |= DCB_FEATCFG_ERROR;
 			break;
 		default:
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.h
index 125bd1b..804b8f6 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.h
@@ -199,4 +199,7 @@
 int bnx2x_dcbnl_update_applist(struct bnx2x *bp, bool delall);
 #endif /* BCM_DCBNL */
 
+int bnx2x_dcbx_stop_hw_tx(struct bnx2x *bp);
+int bnx2x_dcbx_resume_hw_tx(struct bnx2x *bp);
+
 #endif /* BNX2X_DCB_H */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
index 5018e52..32767f6 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h
@@ -1300,6 +1300,9 @@
 
 	#define DRV_MSG_CODE_EEE_RESULTS_ACK            0xda000000
 
+	#define DRV_MSG_CODE_RMMOD                      0xdb000000
+	#define REQ_BC_VER_4_RMMOD_CMD                  0x0007080f
+
 	#define DRV_MSG_CODE_SET_MF_BW                  0xe0000000
 	#define REQ_BC_VER_4_SET_MF_BW                  0x00060202
 	#define DRV_MSG_CODE_SET_MF_BW_ACK              0xe1000000
@@ -1372,6 +1375,8 @@
 
 	#define FW_MSG_CODE_EEE_RESULS_ACK              0xda100000
 
+	#define FW_MSG_CODE_RMMOD_ACK                   0xdb100000
+
 	#define FW_MSG_CODE_SET_MF_BW_SENT              0xe0000000
 	#define FW_MSG_CODE_SET_MF_BW_DONE              0xe1000000
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index e5da078..8bdc8b9 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -2261,6 +2261,23 @@
 		bp->link_params.req_fc_auto_adv = BNX2X_FLOW_CTRL_BOTH;
 }
 
+static void bnx2x_init_dropless_fc(struct bnx2x *bp)
+{
+	u32 pause_enabled = 0;
+
+	if (!CHIP_IS_E1(bp) && bp->dropless_fc && bp->link_vars.link_up) {
+		if (bp->link_vars.flow_ctrl & BNX2X_FLOW_CTRL_TX)
+			pause_enabled = 1;
+
+		REG_WR(bp, BAR_USTRORM_INTMEM +
+			   USTORM_ETH_PAUSE_ENABLED_OFFSET(BP_PORT(bp)),
+		       pause_enabled);
+	}
+
+	DP(NETIF_MSG_IFUP | NETIF_MSG_LINK, "dropless_fc is %s\n",
+	   pause_enabled ? "enabled" : "disabled");
+}
+
 int bnx2x_initial_phy_init(struct bnx2x *bp, int load_mode)
 {
 	int rc, cfx_idx = bnx2x_get_link_cfg_idx(bp);
@@ -2294,6 +2311,8 @@
 
 		bnx2x_release_phy_lock(bp);
 
+		bnx2x_init_dropless_fc(bp);
+
 		bnx2x_calc_fc_adv(bp);
 
 		if (bp->link_vars.link_up) {
@@ -2315,6 +2334,8 @@
 		bnx2x_phy_init(&bp->link_params, &bp->link_vars);
 		bnx2x_release_phy_lock(bp);
 
+		bnx2x_init_dropless_fc(bp);
+
 		bnx2x_calc_fc_adv(bp);
 	} else
 		BNX2X_ERR("Bootcode is missing - can not set link\n");
@@ -2476,7 +2497,7 @@
 
 	input.port_rate = bp->link_vars.line_speed;
 
-	if (cmng_type == CMNG_FNS_MINMAX) {
+	if (cmng_type == CMNG_FNS_MINMAX && input.port_rate) {
 		int vn;
 
 		/* read mf conf from shmem */
@@ -2533,6 +2554,21 @@
 	}
 }
 
+/* init cmng mode in HW according to local configuration */
+void bnx2x_set_local_cmng(struct bnx2x *bp)
+{
+	int cmng_fns = bnx2x_get_cmng_fns_mode(bp);
+
+	if (cmng_fns != CMNG_FNS_NONE) {
+		bnx2x_cmng_fns_init(bp, false, cmng_fns);
+		storm_memset_cmng(bp, &bp->cmng, BP_PORT(bp));
+	} else {
+		/* rate shaping and fairness are disabled */
+		DP(NETIF_MSG_IFUP,
+		   "single function mode without fairness\n");
+	}
+}
+
 /* This function is called upon link interrupt */
 static void bnx2x_link_attn(struct bnx2x *bp)
 {
@@ -2541,21 +2577,10 @@
 
 	bnx2x_link_update(&bp->link_params, &bp->link_vars);
 
+	bnx2x_init_dropless_fc(bp);
+
 	if (bp->link_vars.link_up) {
 
-		/* dropless flow control */
-		if (!CHIP_IS_E1(bp) && bp->dropless_fc) {
-			int port = BP_PORT(bp);
-			u32 pause_enabled = 0;
-
-			if (bp->link_vars.flow_ctrl & BNX2X_FLOW_CTRL_TX)
-				pause_enabled = 1;
-
-			REG_WR(bp, BAR_USTRORM_INTMEM +
-			       USTORM_ETH_PAUSE_ENABLED_OFFSET(port),
-			       pause_enabled);
-		}
-
 		if (bp->link_vars.mac_type != MAC_TYPE_EMAC) {
 			struct host_port_stats *pstats;
 
@@ -2568,17 +2593,8 @@
 			bnx2x_stats_handle(bp, STATS_EVENT_LINK_UP);
 	}
 
-	if (bp->link_vars.link_up && bp->link_vars.line_speed) {
-		int cmng_fns = bnx2x_get_cmng_fns_mode(bp);
-
-		if (cmng_fns != CMNG_FNS_NONE) {
-			bnx2x_cmng_fns_init(bp, false, cmng_fns);
-			storm_memset_cmng(bp, &bp->cmng, BP_PORT(bp));
-		} else
-			/* rate shaping and fairness are disabled */
-			DP(NETIF_MSG_IFUP,
-			   "single function mode without fairness\n");
-	}
+	if (bp->link_vars.link_up && bp->link_vars.line_speed)
+		bnx2x_set_local_cmng(bp);
 
 	__bnx2x_link_report(bp);
 
@@ -9639,6 +9655,12 @@
 			       &bp->sp_rtnl_state))
 		bnx2x_pf_set_vfs_vlan(bp);
 
+	if (test_and_clear_bit(BNX2X_SP_RTNL_TX_STOP, &bp->sp_rtnl_state))
+		bnx2x_dcbx_stop_hw_tx(bp);
+
+	if (test_and_clear_bit(BNX2X_SP_RTNL_TX_RESUME, &bp->sp_rtnl_state))
+		bnx2x_dcbx_resume_hw_tx(bp);
+
 	/* work which needs rtnl lock not-taken (as it takes the lock itself and
 	 * can be called from other contexts as well)
 	 */
@@ -10362,6 +10384,10 @@
 
 	bp->flags |= (val >= REQ_BC_VER_4_DCBX_ADMIN_MSG_NON_PMF) ?
 			BC_SUPPORTS_DCBX_MSG_NON_PMF : 0;
+
+	bp->flags |= (val >= REQ_BC_VER_4_RMMOD_CMD) ?
+			BC_SUPPORTS_RMMOD_CMD : 0;
+
 	boot_mode = SHMEM_RD(bp,
 			dev_info.port_feature_config[BP_PORT(bp)].mba_config) &
 			PORT_FEATURE_MBA_BOOT_AGENT_TYPE_MASK;
@@ -11137,6 +11163,9 @@
 	int tmp;
 	u32 cfg;
 
+	if (IS_VF(bp))
+		return 0;
+
 	if (IS_MF(bp) && !CHIP_IS_E1x(bp)) {
 		/* Take function: tmp = func */
 		tmp = BP_ABS_FUNC(bp);
@@ -11524,6 +11553,7 @@
 	mutex_init(&bp->port.phy_mutex);
 	mutex_init(&bp->fw_mb_mutex);
 	spin_lock_init(&bp->stats_lock);
+	sema_init(&bp->stats_sema, 1);
 
 	INIT_DELAYED_WORK(&bp->sp_task, bnx2x_sp_task);
 	INIT_DELAYED_WORK(&bp->sp_rtnl_task, bnx2x_sp_rtnl_task);
@@ -12026,7 +12056,7 @@
 	.ndo_fcoe_get_wwn	= bnx2x_fcoe_get_wwn,
 #endif
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 	.ndo_busy_poll		= bnx2x_low_latency_recv,
 #endif
 };
@@ -12817,13 +12847,17 @@
 	bnx2x_dcbnl_update_applist(bp, true);
 #endif
 
+	if (IS_PF(bp) &&
+	    !BP_NOMCP(bp) &&
+	    (bp->flags & BC_SUPPORTS_RMMOD_CMD))
+		bnx2x_fw_command(bp, DRV_MSG_CODE_RMMOD, 0);
+
 	/* Close the interface - either directly or implicitly */
 	if (remove_netdev) {
 		unregister_netdev(dev);
 	} else {
 		rtnl_lock();
-		if (netif_running(dev))
-			bnx2x_close(dev);
+		dev_close(dev);
 		rtnl_unlock();
 	}
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 95861ef..ad83f4b 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -1747,11 +1747,8 @@
 
 void bnx2x_iov_init_dmae(struct bnx2x *bp)
 {
-	DP(BNX2X_MSG_IOV, "SRIOV is %s\n", IS_SRIOV(bp) ? "ON" : "OFF");
-	if (!IS_SRIOV(bp))
-		return;
-
-	REG_WR(bp, DMAE_REG_BACKWARD_COMP_EN, 0);
+	if (pci_find_ext_capability(bp->pdev, PCI_EXT_CAP_ID_SRIOV))
+		REG_WR(bp, DMAE_REG_BACKWARD_COMP_EN, 0);
 }
 
 static int bnx2x_vf_bus(struct bnx2x *bp, int vfid)
@@ -3084,8 +3081,9 @@
 	pci_disable_sriov(bp->pdev);
 }
 
-static int bnx2x_vf_ndo_sanity(struct bnx2x *bp, int vfidx,
-			       struct bnx2x_virtf *vf)
+static int bnx2x_vf_ndo_prep(struct bnx2x *bp, int vfidx,
+			     struct bnx2x_virtf **vf,
+			     struct pf_vf_bulletin_content **bulletin)
 {
 	if (bp->state != BNX2X_STATE_OPEN) {
 		BNX2X_ERR("vf ndo called though PF is down\n");
@@ -3103,12 +3101,22 @@
 		return -EINVAL;
 	}
 
-	if (!vf) {
+	/* init members */
+	*vf = BP_VF(bp, vfidx);
+	*bulletin = BP_VF_BULLETIN(bp, vfidx);
+
+	if (!*vf) {
 		BNX2X_ERR("vf ndo called but vf was null. vfidx was %d\n",
 			  vfidx);
 		return -EINVAL;
 	}
 
+	if (!*bulletin) {
+		BNX2X_ERR("vf ndo called but Bulletin Board struct is null. vfidx was %d\n",
+			  vfidx);
+		return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -3116,17 +3124,19 @@
 			struct ifla_vf_info *ivi)
 {
 	struct bnx2x *bp = netdev_priv(dev);
-	struct bnx2x_virtf *vf = BP_VF(bp, vfidx);
-	struct bnx2x_vlan_mac_obj *mac_obj = &bnx2x_vfq(vf, 0, mac_obj);
-	struct bnx2x_vlan_mac_obj *vlan_obj = &bnx2x_vfq(vf, 0, vlan_obj);
-	struct pf_vf_bulletin_content *bulletin = BP_VF_BULLETIN(bp, vfidx);
+	struct bnx2x_virtf *vf = NULL;
+	struct pf_vf_bulletin_content *bulletin = NULL;
+	struct bnx2x_vlan_mac_obj *mac_obj;
+	struct bnx2x_vlan_mac_obj *vlan_obj;
 	int rc;
 
-	/* sanity */
-	rc = bnx2x_vf_ndo_sanity(bp, vfidx, vf);
+	/* sanity and init */
+	rc = bnx2x_vf_ndo_prep(bp, vfidx, &vf, &bulletin);
 	if (rc)
 		return rc;
-	if (!mac_obj || !vlan_obj || !bulletin) {
+	mac_obj = &bnx2x_vfq(vf, 0, mac_obj);
+	vlan_obj = &bnx2x_vfq(vf, 0, vlan_obj);
+	if (!mac_obj || !vlan_obj) {
 		BNX2X_ERR("VF partially initialized\n");
 		return -EINVAL;
 	}
@@ -3183,11 +3193,11 @@
 {
 	struct bnx2x *bp = netdev_priv(dev);
 	int rc, q_logical_state;
-	struct bnx2x_virtf *vf = BP_VF(bp, vfidx);
-	struct pf_vf_bulletin_content *bulletin = BP_VF_BULLETIN(bp, vfidx);
+	struct bnx2x_virtf *vf = NULL;
+	struct pf_vf_bulletin_content *bulletin = NULL;
 
-	/* sanity */
-	rc = bnx2x_vf_ndo_sanity(bp, vfidx, vf);
+	/* sanity and init */
+	rc = bnx2x_vf_ndo_prep(bp, vfidx, &vf, &bulletin);
 	if (rc)
 		return rc;
 	if (!is_valid_ether_addr(mac)) {
@@ -3249,11 +3259,11 @@
 {
 	struct bnx2x *bp = netdev_priv(dev);
 	int rc, q_logical_state;
-	struct bnx2x_virtf *vf = BP_VF(bp, vfidx);
-	struct pf_vf_bulletin_content *bulletin = BP_VF_BULLETIN(bp, vfidx);
+	struct bnx2x_virtf *vf = NULL;
+	struct pf_vf_bulletin_content *bulletin = NULL;
 
-	/* sanity */
-	rc = bnx2x_vf_ndo_sanity(bp, vfidx, vf);
+	/* sanity and init */
+	rc = bnx2x_vf_ndo_prep(bp, vfidx, &vf, &bulletin);
 	if (rc)
 		return rc;
 
@@ -3463,7 +3473,7 @@
 alloc_mem_err:
 	BNX2X_PCI_FREE(bp->vf2pf_mbox, bp->vf2pf_mbox_mapping,
 		       sizeof(struct bnx2x_vf_mbx_msg));
-	BNX2X_PCI_FREE(bp->vf2pf_mbox, bp->vf2pf_mbox_mapping,
+	BNX2X_PCI_FREE(bp->vf2pf_mbox, bp->pf2vf_bulletin_mapping,
 		       sizeof(union pf_vf_bulletin));
 	return -ENOMEM;
 }
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c
index 98366ab..d63d132 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c
@@ -221,7 +221,8 @@
  * Statistics service functions
  */
 
-static void bnx2x_stats_pmf_update(struct bnx2x *bp)
+/* should be called under stats_sema */
+static void __bnx2x_stats_pmf_update(struct bnx2x *bp)
 {
 	struct dmae_command *dmae;
 	u32 opcode;
@@ -518,7 +519,8 @@
 	*stats_comp = 0;
 }
 
-static void bnx2x_stats_start(struct bnx2x *bp)
+/* should be called under stats_sema */
+static void __bnx2x_stats_start(struct bnx2x *bp)
 {
 	/* vfs travel through here as part of the statistics FSM, but no action
 	 * is required
@@ -534,13 +536,34 @@
 
 	bnx2x_hw_stats_post(bp);
 	bnx2x_storm_stats_post(bp);
+
+	bp->stats_started = true;
+}
+
+static void bnx2x_stats_start(struct bnx2x *bp)
+{
+	if (down_timeout(&bp->stats_sema, HZ/10))
+		BNX2X_ERR("Unable to acquire stats lock\n");
+	__bnx2x_stats_start(bp);
+	up(&bp->stats_sema);
 }
 
 static void bnx2x_stats_pmf_start(struct bnx2x *bp)
 {
+	if (down_timeout(&bp->stats_sema, HZ/10))
+		BNX2X_ERR("Unable to acquire stats lock\n");
 	bnx2x_stats_comp(bp);
-	bnx2x_stats_pmf_update(bp);
-	bnx2x_stats_start(bp);
+	__bnx2x_stats_pmf_update(bp);
+	__bnx2x_stats_start(bp);
+	up(&bp->stats_sema);
+}
+
+static void bnx2x_stats_pmf_update(struct bnx2x *bp)
+{
+	if (down_timeout(&bp->stats_sema, HZ/10))
+		BNX2X_ERR("Unable to acquire stats lock\n");
+	__bnx2x_stats_pmf_update(bp);
+	up(&bp->stats_sema);
 }
 
 static void bnx2x_stats_restart(struct bnx2x *bp)
@@ -550,8 +573,11 @@
 	 */
 	if (IS_VF(bp))
 		return;
+	if (down_timeout(&bp->stats_sema, HZ/10))
+		BNX2X_ERR("Unable to acquire stats lock\n");
 	bnx2x_stats_comp(bp);
-	bnx2x_stats_start(bp);
+	__bnx2x_stats_start(bp);
+	up(&bp->stats_sema);
 }
 
 static void bnx2x_bmac_stats_update(struct bnx2x *bp)
@@ -888,9 +914,7 @@
 	/* Make sure we use the value of the counter
 	 * used for sending the last stats ramrod.
 	 */
-	spin_lock_bh(&bp->stats_lock);
 	cur_stats_counter = bp->stats_counter - 1;
-	spin_unlock_bh(&bp->stats_lock);
 
 	/* are storm stats valid? */
 	if (le16_to_cpu(counters->xstats_counter) != cur_stats_counter) {
@@ -1227,12 +1251,18 @@
 {
 	u32 *stats_comp = bnx2x_sp(bp, stats_comp);
 
-	if (bnx2x_edebug_stats_stopped(bp))
+	/* we run update from timer context, so give up
+	 * if somebody is in the middle of transition
+	 */
+	if (down_trylock(&bp->stats_sema))
 		return;
 
+	if (bnx2x_edebug_stats_stopped(bp) || !bp->stats_started)
+		goto out;
+
 	if (IS_PF(bp)) {
 		if (*stats_comp != DMAE_COMP_VAL)
-			return;
+			goto out;
 
 		if (bp->port.pmf)
 			bnx2x_hw_stats_update(bp);
@@ -1242,7 +1272,7 @@
 				BNX2X_ERR("storm stats were not updated for 3 times\n");
 				bnx2x_panic();
 			}
-			return;
+			goto out;
 		}
 	} else {
 		/* vf doesn't collect HW statistics, and doesn't get completions
@@ -1256,7 +1286,7 @@
 
 	/* vf is done */
 	if (IS_VF(bp))
-		return;
+		goto out;
 
 	if (netif_msg_timer(bp)) {
 		struct bnx2x_eth_stats *estats = &bp->eth_stats;
@@ -1267,6 +1297,9 @@
 
 	bnx2x_hw_stats_post(bp);
 	bnx2x_storm_stats_post(bp);
+
+out:
+	up(&bp->stats_sema);
 }
 
 static void bnx2x_port_stats_stop(struct bnx2x *bp)
@@ -1332,6 +1365,11 @@
 {
 	int update = 0;
 
+	if (down_timeout(&bp->stats_sema, HZ/10))
+		BNX2X_ERR("Unable to acquire stats lock\n");
+
+	bp->stats_started = false;
+
 	bnx2x_stats_comp(bp);
 
 	if (bp->port.pmf)
@@ -1348,6 +1386,8 @@
 		bnx2x_hw_stats_post(bp);
 		bnx2x_stats_comp(bp);
 	}
+
+	up(&bp->stats_sema);
 }
 
 static void bnx2x_stats_do_nothing(struct bnx2x *bp)
@@ -1376,15 +1416,17 @@
 void bnx2x_stats_handle(struct bnx2x *bp, enum bnx2x_stats_event event)
 {
 	enum bnx2x_stats_state state;
+	void (*action)(struct bnx2x *bp);
 	if (unlikely(bp->panic))
 		return;
 
 	spin_lock_bh(&bp->stats_lock);
 	state = bp->stats_state;
 	bp->stats_state = bnx2x_stats_stm[state][event].next_state;
+	action = bnx2x_stats_stm[state][event].action;
 	spin_unlock_bh(&bp->stats_lock);
 
-	bnx2x_stats_stm[state][event].action(bp);
+	action(bp);
 
 	if ((event != STATS_EVENT_UPDATE) || netif_msg_timer(bp))
 		DP(BNX2X_MSG_STATS, "state %d -> event %d -> state %d\n",
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index d964f30..0da2214 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -17625,7 +17625,8 @@
 	pci_release_regions(pdev);
 
 err_out_disable_pdev:
-	pci_disable_device(pdev);
+	if (pci_is_enabled(pdev))
+		pci_disable_device(pdev);
 	pci_set_drvdata(pdev, NULL);
 	return err;
 }
@@ -17773,7 +17774,8 @@
 
 	rtnl_lock();
 
-	if (!netif_running(netdev))
+	/* We probably don't have netdev yet */
+	if (!netdev || !netif_running(netdev))
 		goto done;
 
 	tg3_phy_stop(tp);
@@ -17794,8 +17796,10 @@
 
 done:
 	if (state == pci_channel_io_perm_failure) {
-		tg3_napi_enable(tp);
-		dev_close(netdev);
+		if (netdev) {
+			tg3_napi_enable(tp);
+			dev_close(netdev);
+		}
 		err = PCI_ERS_RESULT_DISCONNECT;
 	} else {
 		pci_disable_device(pdev);
@@ -17825,7 +17829,8 @@
 	rtnl_lock();
 
 	if (pci_enable_device(pdev)) {
-		netdev_err(netdev, "Cannot re-enable PCI device after reset.\n");
+		dev_err(&pdev->dev,
+			"Cannot re-enable PCI device after reset.\n");
 		goto done;
 	}
 
@@ -17833,7 +17838,7 @@
 	pci_restore_state(pdev);
 	pci_save_state(pdev);
 
-	if (!netif_running(netdev)) {
+	if (!netdev || !netif_running(netdev)) {
 		rc = PCI_ERS_RESULT_RECOVERED;
 		goto done;
 	}
@@ -17845,7 +17850,7 @@
 	rc = PCI_ERS_RESULT_RECOVERED;
 
 done:
-	if (rc != PCI_ERS_RESULT_RECOVERED && netif_running(netdev)) {
+	if (rc != PCI_ERS_RESULT_RECOVERED && netdev && netif_running(netdev)) {
 		tg3_napi_enable(tp);
 		dev_close(netdev);
 	}
diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c
index 687ec4a..9c89dc8 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c
@@ -455,11 +455,6 @@
 		q->pg_chunk.offset = 0;
 		mapping = pci_map_page(adapter->pdev, q->pg_chunk.page,
 				       0, q->alloc_size, PCI_DMA_FROMDEVICE);
-		if (unlikely(pci_dma_mapping_error(adapter->pdev, mapping))) {
-			__free_pages(q->pg_chunk.page, order);
-			q->pg_chunk.page = NULL;
-			return -EIO;
-		}
 		q->pg_chunk.mapping = mapping;
 	}
 	sd->pg_chunk = q->pg_chunk;
@@ -954,75 +949,40 @@
 	return flits_to_desc(flits);
 }
 
-
-/*	map_skb - map a packet main body and its page fragments
- *	@pdev: the PCI device
- *	@skb: the packet
- *	@addr: placeholder to save the mapped addresses
- *
- *	map the main body of an sk_buff and its page fragments, if any.
- */
-static int map_skb(struct pci_dev *pdev, const struct sk_buff *skb,
-		   dma_addr_t *addr)
-{
-	const skb_frag_t *fp, *end;
-	const struct skb_shared_info *si;
-
-	*addr = pci_map_single(pdev, skb->data, skb_headlen(skb),
-			       PCI_DMA_TODEVICE);
-	if (pci_dma_mapping_error(pdev, *addr))
-		goto out_err;
-
-	si = skb_shinfo(skb);
-	end = &si->frags[si->nr_frags];
-
-	for (fp = si->frags; fp < end; fp++) {
-		*++addr = skb_frag_dma_map(&pdev->dev, fp, 0, skb_frag_size(fp),
-					   DMA_TO_DEVICE);
-		if (pci_dma_mapping_error(pdev, *addr))
-			goto unwind;
-	}
-	return 0;
-
-unwind:
-	while (fp-- > si->frags)
-		dma_unmap_page(&pdev->dev, *--addr, skb_frag_size(fp),
-			       DMA_TO_DEVICE);
-
-	pci_unmap_single(pdev, addr[-1], skb_headlen(skb), PCI_DMA_TODEVICE);
-out_err:
-	return -ENOMEM;
-}
-
 /**
- *	write_sgl - populate a scatter/gather list for a packet
+ *	make_sgl - populate a scatter/gather list for a packet
  *	@skb: the packet
  *	@sgp: the SGL to populate
  *	@start: start address of skb main body data to include in the SGL
  *	@len: length of skb main body data to include in the SGL
- *	@addr: the list of the mapped addresses
+ *	@pdev: the PCI device
  *
- *	Copies the scatter/gather list for the buffers that make up a packet
+ *	Generates a scatter/gather list for the buffers that make up a packet
  *	and returns the SGL size in 8-byte words.  The caller must size the SGL
  *	appropriately.
  */
-static inline unsigned int write_sgl(const struct sk_buff *skb,
+static inline unsigned int make_sgl(const struct sk_buff *skb,
 				    struct sg_ent *sgp, unsigned char *start,
-				    unsigned int len, const dma_addr_t *addr)
+				    unsigned int len, struct pci_dev *pdev)
 {
-	unsigned int i, j = 0, k = 0, nfrags;
+	dma_addr_t mapping;
+	unsigned int i, j = 0, nfrags;
 
 	if (len) {
+		mapping = pci_map_single(pdev, start, len, PCI_DMA_TODEVICE);
 		sgp->len[0] = cpu_to_be32(len);
-		sgp->addr[j++] = cpu_to_be64(addr[k++]);
+		sgp->addr[0] = cpu_to_be64(mapping);
+		j = 1;
 	}
 
 	nfrags = skb_shinfo(skb)->nr_frags;
 	for (i = 0; i < nfrags; i++) {
 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
+		mapping = skb_frag_dma_map(&pdev->dev, frag, 0, skb_frag_size(frag),
+					   DMA_TO_DEVICE);
 		sgp->len[j] = cpu_to_be32(skb_frag_size(frag));
-		sgp->addr[j] = cpu_to_be64(addr[k++]);
+		sgp->addr[j] = cpu_to_be64(mapping);
 		j ^= 1;
 		if (j == 0)
 			++sgp;
@@ -1178,7 +1138,7 @@
 			    const struct port_info *pi,
 			    unsigned int pidx, unsigned int gen,
 			    struct sge_txq *q, unsigned int ndesc,
-			    unsigned int compl, const dma_addr_t *addr)
+			    unsigned int compl)
 {
 	unsigned int flits, sgl_flits, cntrl, tso_info;
 	struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
@@ -1236,7 +1196,7 @@
 	}
 
 	sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
-	sgl_flits = write_sgl(skb, sgp, skb->data, skb_headlen(skb), addr);
+	sgl_flits = make_sgl(skb, sgp, skb->data, skb_headlen(skb), adap->pdev);
 
 	write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen,
 			 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl),
@@ -1267,7 +1227,6 @@
 	struct netdev_queue *txq;
 	struct sge_qset *qs;
 	struct sge_txq *q;
-	dma_addr_t addr[MAX_SKB_FRAGS + 1];
 
 	/*
 	 * The chip min packet length is 9 octets but play safe and reject
@@ -1296,11 +1255,6 @@
 		return NETDEV_TX_BUSY;
 	}
 
-	if (unlikely(map_skb(adap->pdev, skb, addr) < 0)) {
-		dev_kfree_skb(skb);
-		return NETDEV_TX_OK;
-	}
-
 	q->in_use += ndesc;
 	if (unlikely(credits - ndesc < q->stop_thres)) {
 		t3_stop_tx_queue(txq, qs, q);
@@ -1358,7 +1312,7 @@
 	if (likely(!skb_shared(skb)))
 		skb_orphan(skb);
 
-	write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl, addr);
+	write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl);
 	check_ring_tx_db(adap, q);
 	return NETDEV_TX_OK;
 }
@@ -1623,8 +1577,7 @@
  */
 static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
 			  struct sge_txq *q, unsigned int pidx,
-			  unsigned int gen, unsigned int ndesc,
-			  const dma_addr_t *addr)
+			  unsigned int gen, unsigned int ndesc)
 {
 	unsigned int sgl_flits, flits;
 	struct work_request_hdr *from;
@@ -1645,9 +1598,9 @@
 
 	flits = skb_transport_offset(skb) / 8;
 	sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
-	sgl_flits = write_sgl(skb, sgp, skb_transport_header(skb),
-			     skb_tail_pointer(skb) -
-			     skb_transport_header(skb), addr);
+	sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
+			     skb->tail - skb->transport_header,
+			     adap->pdev);
 	if (need_skb_unmap()) {
 		setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
 		skb->destructor = deferred_unmap_destructor;
@@ -1705,11 +1658,6 @@
 		goto again;
 	}
 
-	if (map_skb(adap->pdev, skb, (dma_addr_t *)skb->head)) {
-		spin_unlock(&q->lock);
-		return NET_XMIT_SUCCESS;
-	}
-
 	gen = q->gen;
 	q->in_use += ndesc;
 	pidx = q->pidx;
@@ -1720,7 +1668,7 @@
 	}
 	spin_unlock(&q->lock);
 
-	write_ofld_wr(adap, skb, q, pidx, gen, ndesc, (dma_addr_t *)skb->head);
+	write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
 	check_ring_tx_db(adap, q);
 	return NET_XMIT_SUCCESS;
 }
@@ -1738,7 +1686,6 @@
 	struct sge_txq *q = &qs->txq[TXQ_OFLD];
 	const struct port_info *pi = netdev_priv(qs->netdev);
 	struct adapter *adap = pi->adapter;
-	unsigned int written = 0;
 
 	spin_lock(&q->lock);
 again:	reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
@@ -1758,14 +1705,10 @@
 			break;
 		}
 
-		if (map_skb(adap->pdev, skb, (dma_addr_t *)skb->head))
-			break;
-
 		gen = q->gen;
 		q->in_use += ndesc;
 		pidx = q->pidx;
 		q->pidx += ndesc;
-		written += ndesc;
 		if (q->pidx >= q->size) {
 			q->pidx -= q->size;
 			q->gen ^= 1;
@@ -1773,8 +1716,7 @@
 		__skb_unlink(skb, &q->sendq);
 		spin_unlock(&q->lock);
 
-		write_ofld_wr(adap, skb, q, pidx, gen, ndesc,
-			     (dma_addr_t *)skb->head);
+		write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
 		spin_lock(&q->lock);
 	}
 	spin_unlock(&q->lock);
@@ -1784,9 +1726,8 @@
 	set_bit(TXQ_LAST_PKT_DB, &q->flags);
 #endif
 	wmb();
-	if (likely(written))
-		t3_write_reg(adap, A_SG_KDOORBELL,
-			     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
+	t3_write_reg(adap, A_SG_KDOORBELL,
+		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 }
 
 /**
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 6e6e0a1..8ec5d74 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -3048,6 +3048,9 @@
 
 		adapter->max_event_queues = le16_to_cpu(desc->eq_count);
 		adapter->if_cap_flags = le32_to_cpu(desc->cap_flags);
+
+		/* Clear flags that driver is not interested in */
+		adapter->if_cap_flags &=  BE_IF_CAP_FLAGS_WANT;
 	}
 err:
 	mutex_unlock(&adapter->mbox_lock);
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h
index 5228d88..1b3b9e8 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.h
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.h
@@ -563,6 +563,12 @@
 	BE_IF_FLAGS_MULTICAST = 0x1000
 };
 
+#define BE_IF_CAP_FLAGS_WANT (BE_IF_FLAGS_RSS | BE_IF_FLAGS_PROMISCUOUS |\
+			 BE_IF_FLAGS_BROADCAST | BE_IF_FLAGS_VLAN_PROMISCUOUS |\
+			 BE_IF_FLAGS_VLAN | BE_IF_FLAGS_MCAST_PROMISCUOUS |\
+			 BE_IF_FLAGS_PASS_L3L4_ERRORS | BE_IF_FLAGS_MULTICAST |\
+			 BE_IF_FLAGS_UNTAGGED)
+
 /* An RX interface is an object with one or more MAC addresses and
  * filtering capabilities. */
 struct be_cmd_req_if_create {
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 181edb5..4559c35 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -2563,8 +2563,8 @@
 	/* Wait for all pending tx completions to arrive so that
 	 * all tx skbs are freed.
 	 */
-	be_tx_compl_clean(adapter);
 	netif_tx_disable(netdev);
+	be_tx_compl_clean(adapter);
 
 	be_rx_qs_destroy(adapter);
 
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 2b0a0ea..ae23600 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -259,6 +259,7 @@
 struct fec_enet_delayed_work {
 	struct delayed_work delay_work;
 	bool timeout;
+	bool trig_tx;
 };
 
 /* The FEC buffer descriptors track the ring buffers.  The rx_bd_base and
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index d3ad5ea..77ea0db 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -93,6 +93,20 @@
 #define FEC_QUIRK_HAS_CSUM		(1 << 5)
 /* Controller has hardware vlan support */
 #define FEC_QUIRK_HAS_VLAN		(1 << 6)
+/* ENET IP errata ERR006358
+ *
+ * If the ready bit in the transmit buffer descriptor (TxBD[R]) is previously
+ * detected as not set during a prior frame transmission, then the
+ * ENET_TDAR[TDAR] bit is cleared at a later time, even if additional TxBDs
+ * were added to the ring and the ENET_TDAR[TDAR] bit is set. This results in
+ * If the ready bit in the transmit buffer descriptor (TxBD[R]) is previously
+ * detected as not set during a prior frame transmission, then the
+ * ENET_TDAR[TDAR] bit is cleared at a later time, even if additional TxBDs
+ * were added to the ring and the ENET_TDAR[TDAR] bit is set. This results in
+ * frames not being transmitted until there is a 0-to-1 transition on
+ * ENET_TDAR[TDAR].
+ */
+#define FEC_QUIRK_ERR006358            (1 << 7)
 
 static struct platform_device_id fec_devtype[] = {
 	{
@@ -112,7 +126,7 @@
 		.name = "imx6q-fec",
 		.driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT |
 				FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM |
-				FEC_QUIRK_HAS_VLAN,
+				FEC_QUIRK_HAS_VLAN | FEC_QUIRK_ERR006358,
 	}, {
 		.name = "mvf600-fec",
 		.driver_data = FEC_QUIRK_ENET_MAC,
@@ -275,16 +289,11 @@
 	struct fec_enet_private *fep = netdev_priv(ndev);
 	const struct platform_device_id *id_entry =
 				platform_get_device_id(fep->pdev);
-	struct bufdesc *bdp;
+	struct bufdesc *bdp, *bdp_pre;
 	void *bufaddr;
 	unsigned short	status;
 	unsigned int index;
 
-	if (!fep->link) {
-		/* Link is down or auto-negotiation is in progress. */
-		return NETDEV_TX_BUSY;
-	}
-
 	/* Fill in a Tx ring entry */
 	bdp = fep->cur_tx;
 
@@ -370,6 +379,15 @@
 				ebdp->cbd_esc |= BD_ENET_TX_PINS;
 		}
 	}
+
+	bdp_pre = fec_enet_get_prevdesc(bdp, fep->bufdesc_ex);
+	if ((id_entry->driver_data & FEC_QUIRK_ERR006358) &&
+	    !(bdp_pre->cbd_sc & BD_ENET_TX_READY)) {
+		fep->delay_work.trig_tx = true;
+		schedule_delayed_work(&(fep->delay_work.delay_work),
+					msecs_to_jiffies(1));
+	}
+
 	/* If this was the last BD in the ring, start at the beginning again. */
 	if (status & BD_ENET_TX_WRAP)
 		bdp = fep->tx_bd_base;
@@ -689,6 +707,11 @@
 		fec_restart(fep->netdev, fep->full_duplex);
 		netif_wake_queue(fep->netdev);
 	}
+
+	if (fep->delay_work.trig_tx) {
+		fep->delay_work.trig_tx = false;
+		writel(0, fep->hwp + FEC_X_DES_ACTIVE);
+	}
 }
 
 static void
@@ -2279,4 +2302,5 @@
 
 module_platform_driver(fec_driver);
 
+MODULE_ALIAS("platform:"DRIVER_NAME);
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 6a0c1b6..c1d72c0 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -3739,9 +3739,8 @@
 	rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
 
 	if (netdev->flags & IFF_PROMISC) {
-		u32 mrqc = rd32(E1000_MRQC);
 		/* retain VLAN HW filtering if in VT mode */
-		if (mrqc & E1000_MRQC_ENABLE_VMDQ)
+		if (adapter->vfs_allocated_count)
 			rctl |= E1000_RCTL_VFE;
 		rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
 		vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 7be725c..a6494e5 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -54,7 +54,7 @@
 
 #include <net/busy_poll.h>
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 #define LL_EXTENDED_STATS
 #endif
 /* common prefix used by pr_<> macros */
@@ -366,7 +366,7 @@
 	struct rcu_head rcu;	/* to avoid race with update stats on free */
 	char name[IFNAMSIZ + 9];
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 	unsigned int state;
 #define IXGBE_QV_STATE_IDLE        0
 #define IXGBE_QV_STATE_NAPI	   1    /* NAPI owns this QV */
@@ -377,12 +377,12 @@
 #define IXGBE_QV_YIELD (IXGBE_QV_STATE_NAPI_YIELD | IXGBE_QV_STATE_POLL_YIELD)
 #define IXGBE_QV_USER_PEND (IXGBE_QV_STATE_POLL | IXGBE_QV_STATE_POLL_YIELD)
 	spinlock_t lock;
-#endif  /* CONFIG_NET_LL_RX_POLL */
+#endif  /* CONFIG_NET_RX_BUSY_POLL */
 
 	/* for dynamic allocation of rings associated with this q_vector */
 	struct ixgbe_ring ring[0] ____cacheline_internodealigned_in_smp;
 };
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 static inline void ixgbe_qv_init_lock(struct ixgbe_q_vector *q_vector)
 {
 
@@ -462,7 +462,7 @@
 	WARN_ON(!(q_vector->state & IXGBE_QV_LOCKED));
 	return q_vector->state & IXGBE_QV_USER_PEND;
 }
-#else /* CONFIG_NET_LL_RX_POLL */
+#else /* CONFIG_NET_RX_BUSY_POLL */
 static inline void ixgbe_qv_init_lock(struct ixgbe_q_vector *q_vector)
 {
 }
@@ -491,7 +491,7 @@
 {
 	return false;
 }
-#endif /* CONFIG_NET_LL_RX_POLL */
+#endif /* CONFIG_NET_RX_BUSY_POLL */
 
 #ifdef CONFIG_IXGBE_HWMON
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c
index ac78077..7a77f37 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c
@@ -108,9 +108,8 @@
 
 	/* Enable arbiter */
 	reg &= ~IXGBE_DPMCS_ARBDIS;
-	/* Enable DFP and Recycle mode */
-	reg |= (IXGBE_DPMCS_TDPAC | IXGBE_DPMCS_TRM);
 	reg |= IXGBE_DPMCS_TSOEF;
+
 	/* Configure Max TSO packet size 34KB including payload and headers */
 	reg |= (0x4 << IXGBE_DPMCS_MTSOS_SHIFT);
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index bad8f14..be4b1fb 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1998,7 +1998,7 @@
 	return total_rx_packets;
 }
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 /* must be called with local_bh_disable()d */
 static int ixgbe_low_latency_recv(struct napi_struct *napi)
 {
@@ -2030,7 +2030,7 @@
 
 	return found;
 }
-#endif	/* CONFIG_NET_LL_RX_POLL */
+#endif	/* CONFIG_NET_RX_BUSY_POLL */
 
 /**
  * ixgbe_configure_msix - Configure MSI-X hardware
@@ -7227,7 +7227,7 @@
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= ixgbe_netpoll,
 #endif
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 	.ndo_busy_poll		= ixgbe_low_latency_recv,
 #endif
 #ifdef IXGBE_FCOE
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 712779f..b017818 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -88,6 +88,8 @@
 #define      MVNETA_TX_IN_PRGRS                  BIT(1)
 #define      MVNETA_TX_FIFO_EMPTY                BIT(8)
 #define MVNETA_RX_MIN_FRAME_SIZE                 0x247c
+#define MVNETA_SGMII_SERDES_CFG			 0x24A0
+#define      MVNETA_SGMII_SERDES_PROTO		 0x0cc7
 #define MVNETA_TYPE_PRIO                         0x24bc
 #define      MVNETA_FORCE_UNI                    BIT(21)
 #define MVNETA_TXQ_CMD_1                         0x24e4
@@ -655,6 +657,8 @@
 	val = mvreg_read(pp, MVNETA_GMAC_CTRL_2);
 	val |= MVNETA_GMAC2_PSC_ENABLE;
 	mvreg_write(pp, MVNETA_GMAC_CTRL_2, val);
+
+	mvreg_write(pp, MVNETA_SGMII_SERDES_CFG, MVNETA_SGMII_SERDES_PROTO);
 }
 
 /* Start the Ethernet port RX and TX activity */
@@ -2728,28 +2732,24 @@
 
 	pp = netdev_priv(dev);
 
-	pp->tx_done_timer.function = mvneta_tx_done_timer_callback;
-	init_timer(&pp->tx_done_timer);
-	clear_bit(MVNETA_F_TX_DONE_TIMER_BIT, &pp->flags);
-
 	pp->weight = MVNETA_RX_POLL_WEIGHT;
 	pp->phy_node = phy_node;
 	pp->phy_interface = phy_mode;
 
-	pp->base = of_iomap(dn, 0);
-	if (pp->base == NULL) {
-		err = -ENOMEM;
-		goto err_free_irq;
-	}
-
 	pp->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(pp->clk)) {
 		err = PTR_ERR(pp->clk);
-		goto err_unmap;
+		goto err_free_irq;
 	}
 
 	clk_prepare_enable(pp->clk);
 
+	pp->base = of_iomap(dn, 0);
+	if (pp->base == NULL) {
+		err = -ENOMEM;
+		goto err_clk;
+	}
+
 	dt_mac_addr = of_get_mac_address(dn);
 	if (dt_mac_addr && is_valid_ether_addr(dt_mac_addr)) {
 		mac_from = "device tree";
@@ -2766,6 +2766,9 @@
 	}
 
 	pp->tx_done_timer.data = (unsigned long)dev;
+	pp->tx_done_timer.function = mvneta_tx_done_timer_callback;
+	init_timer(&pp->tx_done_timer);
+	clear_bit(MVNETA_F_TX_DONE_TIMER_BIT, &pp->flags);
 
 	pp->tx_ring_size = MVNETA_MAX_TXD;
 	pp->rx_ring_size = MVNETA_MAX_RXD;
@@ -2776,7 +2779,7 @@
 	err = mvneta_init(pp, phy_addr);
 	if (err < 0) {
 		dev_err(&pdev->dev, "can't init eth hal\n");
-		goto err_clk;
+		goto err_unmap;
 	}
 	mvneta_port_power_up(pp, phy_mode);
 
@@ -2806,10 +2809,10 @@
 
 err_deinit:
 	mvneta_deinit(pp);
-err_clk:
-	clk_disable_unprepare(pp->clk);
 err_unmap:
 	iounmap(pp->base);
+err_clk:
+	clk_disable_unprepare(pp->clk);
 err_free_irq:
 	irq_dispose_mapping(dev->irq);
 err_free_netdev:
diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c
index c896079..ef94a59 100644
--- a/drivers/net/ethernet/marvell/skge.c
+++ b/drivers/net/ethernet/marvell/skge.c
@@ -931,17 +931,20 @@
 }
 
 /* Allocate and setup a new buffer for receiving */
-static void skge_rx_setup(struct skge_port *skge, struct skge_element *e,
-			  struct sk_buff *skb, unsigned int bufsize)
+static int skge_rx_setup(struct skge_port *skge, struct skge_element *e,
+			 struct sk_buff *skb, unsigned int bufsize)
 {
 	struct skge_rx_desc *rd = e->desc;
-	u64 map;
+	dma_addr_t map;
 
 	map = pci_map_single(skge->hw->pdev, skb->data, bufsize,
 			     PCI_DMA_FROMDEVICE);
 
-	rd->dma_lo = map;
-	rd->dma_hi = map >> 32;
+	if (pci_dma_mapping_error(skge->hw->pdev, map))
+		return -1;
+
+	rd->dma_lo = lower_32_bits(map);
+	rd->dma_hi = upper_32_bits(map);
 	e->skb = skb;
 	rd->csum1_start = ETH_HLEN;
 	rd->csum2_start = ETH_HLEN;
@@ -953,6 +956,7 @@
 	rd->control = BMU_OWN | BMU_STF | BMU_IRQ_EOF | BMU_TCP_CHECK | bufsize;
 	dma_unmap_addr_set(e, mapaddr, map);
 	dma_unmap_len_set(e, maplen, bufsize);
+	return 0;
 }
 
 /* Resume receiving using existing skb,
@@ -1014,7 +1018,10 @@
 			return -ENOMEM;
 
 		skb_reserve(skb, NET_IP_ALIGN);
-		skge_rx_setup(skge, e, skb, skge->rx_buf_size);
+		if (skge_rx_setup(skge, e, skb, skge->rx_buf_size) < 0) {
+			dev_kfree_skb(skb);
+			return -EIO;
+		}
 	} while ((e = e->next) != ring->start);
 
 	ring->to_clean = ring->start;
@@ -2544,7 +2551,7 @@
 
 	BUG_ON(skge->dma & 7);
 
-	if ((u64)skge->dma >> 32 != ((u64) skge->dma + skge->mem_size) >> 32) {
+	if (upper_32_bits(skge->dma) != upper_32_bits(skge->dma + skge->mem_size)) {
 		dev_err(&hw->pdev->dev, "pci_alloc_consistent region crosses 4G boundary\n");
 		err = -EINVAL;
 		goto free_pci_mem;
@@ -2729,7 +2736,7 @@
 	struct skge_tx_desc *td;
 	int i;
 	u32 control, len;
-	u64 map;
+	dma_addr_t map;
 
 	if (skb_padto(skb, ETH_ZLEN))
 		return NETDEV_TX_OK;
@@ -2743,11 +2750,14 @@
 	e->skb = skb;
 	len = skb_headlen(skb);
 	map = pci_map_single(hw->pdev, skb->data, len, PCI_DMA_TODEVICE);
+	if (pci_dma_mapping_error(hw->pdev, map))
+		goto mapping_error;
+
 	dma_unmap_addr_set(e, mapaddr, map);
 	dma_unmap_len_set(e, maplen, len);
 
-	td->dma_lo = map;
-	td->dma_hi = map >> 32;
+	td->dma_lo = lower_32_bits(map);
+	td->dma_hi = upper_32_bits(map);
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		const int offset = skb_checksum_start_offset(skb);
@@ -2778,14 +2788,16 @@
 
 			map = skb_frag_dma_map(&hw->pdev->dev, frag, 0,
 					       skb_frag_size(frag), DMA_TO_DEVICE);
+			if (dma_mapping_error(&hw->pdev->dev, map))
+				goto mapping_unwind;
 
 			e = e->next;
 			e->skb = skb;
 			tf = e->desc;
 			BUG_ON(tf->control & BMU_OWN);
 
-			tf->dma_lo = map;
-			tf->dma_hi = (u64) map >> 32;
+			tf->dma_lo = lower_32_bits(map);
+			tf->dma_hi = upper_32_bits(map);
 			dma_unmap_addr_set(e, mapaddr, map);
 			dma_unmap_len_set(e, maplen, skb_frag_size(frag));
 
@@ -2815,6 +2827,26 @@
 	}
 
 	return NETDEV_TX_OK;
+
+mapping_unwind:
+	e = skge->tx_ring.to_use;
+	pci_unmap_single(hw->pdev,
+			 dma_unmap_addr(e, mapaddr),
+			 dma_unmap_len(e, maplen),
+			 PCI_DMA_TODEVICE);
+	while (i-- > 0) {
+		e = e->next;
+		pci_unmap_page(hw->pdev,
+			       dma_unmap_addr(e, mapaddr),
+			       dma_unmap_len(e, maplen),
+			       PCI_DMA_TODEVICE);
+	}
+
+mapping_error:
+	if (net_ratelimit())
+		dev_warn(&hw->pdev->dev, "%s: tx mapping error\n", dev->name);
+	dev_kfree_skb(skb);
+	return NETDEV_TX_OK;
 }
 
 
@@ -3045,11 +3077,13 @@
 
 		pci_dma_sync_single_for_cpu(skge->hw->pdev,
 					    dma_unmap_addr(e, mapaddr),
-					    len, PCI_DMA_FROMDEVICE);
+					    dma_unmap_len(e, maplen),
+					    PCI_DMA_FROMDEVICE);
 		skb_copy_from_linear_data(e->skb, skb->data, len);
 		pci_dma_sync_single_for_device(skge->hw->pdev,
 					       dma_unmap_addr(e, mapaddr),
-					       len, PCI_DMA_FROMDEVICE);
+					       dma_unmap_len(e, maplen),
+					       PCI_DMA_FROMDEVICE);
 		skge_rx_reuse(e, skge->rx_buf_size);
 	} else {
 		struct sk_buff *nskb;
@@ -3058,13 +3092,17 @@
 		if (!nskb)
 			goto resubmit;
 
+		if (skge_rx_setup(skge, e, nskb, skge->rx_buf_size) < 0) {
+			dev_kfree_skb(nskb);
+			goto resubmit;
+		}
+
 		pci_unmap_single(skge->hw->pdev,
 				 dma_unmap_addr(e, mapaddr),
 				 dma_unmap_len(e, maplen),
 				 PCI_DMA_FROMDEVICE);
 		skb = e->skb;
 		prefetch(skb->data);
-		skge_rx_setup(skge, e, nskb, skge->rx_buf_size);
 	}
 
 	skb_put(skb, len);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index 727874f..a28cd80 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -223,7 +223,7 @@
 	case ETH_SS_STATS:
 		return (priv->stats_bitmap ? bit_count : NUM_ALL_STATS) +
 			(priv->tx_ring_num * 2) +
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 			(priv->rx_ring_num * 5);
 #else
 			(priv->rx_ring_num * 2);
@@ -276,7 +276,7 @@
 	for (i = 0; i < priv->rx_ring_num; i++) {
 		data[index++] = priv->rx_ring[i].packets;
 		data[index++] = priv->rx_ring[i].bytes;
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 		data[index++] = priv->rx_ring[i].yields;
 		data[index++] = priv->rx_ring[i].misses;
 		data[index++] = priv->rx_ring[i].cleaned;
@@ -344,7 +344,7 @@
 				"rx%d_packets", i);
 			sprintf(data + (index++) * ETH_GSTRING_LEN,
 				"rx%d_bytes", i);
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 			sprintf(data + (index++) * ETH_GSTRING_LEN,
 				"rx%d_napi_yield", i);
 			sprintf(data + (index++) * ETH_GSTRING_LEN,
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 5eac871..fa37b7a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -68,7 +68,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 /* must be called with local_bh_disable()d */
 static int mlx4_en_low_latency_recv(struct napi_struct *napi)
 {
@@ -94,7 +94,7 @@
 
 	return done;
 }
-#endif	/* CONFIG_NET_LL_RX_POLL */
+#endif	/* CONFIG_NET_RX_BUSY_POLL */
 
 #ifdef CONFIG_RFS_ACCEL
 
@@ -2140,7 +2140,7 @@
 #ifdef CONFIG_RFS_ACCEL
 	.ndo_rx_flow_steer	= mlx4_en_filter_rfs,
 #endif
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 	.ndo_busy_poll		= mlx4_en_low_latency_recv,
 #endif
 };
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 8873d68..6fc6dab 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -845,16 +845,7 @@
 			   MLX4_CMD_NATIVE);
 
 	if (!err && dev->caps.function != slave) {
-		/* if config MAC in DB use it */
-		if (priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier].state.mac)
-			def_mac = priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier].state.mac;
-		else {
-			/* set slave default_mac address */
-			MLX4_GET(def_mac, outbox->buf, QUERY_PORT_MAC_OFFSET);
-			def_mac += slave << 8;
-			priv->mfunc.master.vf_admin[slave].vport[vhcr->in_modifier].mac = def_mac;
-		}
-
+		def_mac = priv->mfunc.master.vf_oper[slave].vport[vhcr->in_modifier].state.mac;
 		MLX4_PUT(outbox->buf, def_mac, QUERY_PORT_MAC_OFFSET);
 
 		/* get port type - currently only eth is enabled */
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index e85af92..36be320 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -371,7 +371,7 @@
 
 	dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0;
 
-	if (!enable_64b_cqe_eqe) {
+	if (!enable_64b_cqe_eqe && !mlx4_is_slave(dev)) {
 		if (dev_cap->flags &
 		    (MLX4_DEV_CAP_FLAG_64B_CQE | MLX4_DEV_CAP_FLAG_64B_EQE)) {
 			mlx4_warn(dev, "64B EQEs/CQEs supported by the device but not enabled\n");
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 35fb60e..5e0aa56 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -292,7 +292,7 @@
 	void *rx_info;
 	unsigned long bytes;
 	unsigned long packets;
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 	unsigned long yields;
 	unsigned long misses;
 	unsigned long cleaned;
@@ -318,7 +318,7 @@
 	struct mlx4_cqe *buf;
 #define MLX4_EN_OPCODE_ERROR	0x1e
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 	unsigned int state;
 #define MLX4_EN_CQ_STATE_IDLE        0
 #define MLX4_EN_CQ_STATE_NAPI     1    /* NAPI owns this CQ */
@@ -329,7 +329,7 @@
 #define CQ_YIELD (MLX4_EN_CQ_STATE_NAPI_YIELD | MLX4_EN_CQ_STATE_POLL_YIELD)
 #define CQ_USER_PEND (MLX4_EN_CQ_STATE_POLL | MLX4_EN_CQ_STATE_POLL_YIELD)
 	spinlock_t poll_lock; /* protects from LLS/napi conflicts */
-#endif  /* CONFIG_NET_LL_RX_POLL */
+#endif  /* CONFIG_NET_RX_BUSY_POLL */
 };
 
 struct mlx4_en_port_profile {
@@ -580,7 +580,7 @@
 	struct rcu_head rcu;
 };
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 static inline void mlx4_en_cq_init_lock(struct mlx4_en_cq *cq)
 {
 	spin_lock_init(&cq->poll_lock);
@@ -687,7 +687,7 @@
 {
 	return false;
 }
-#endif /* CONFIG_NET_LL_RX_POLL */
+#endif /* CONFIG_NET_RX_BUSY_POLL */
 
 #define MLX4_EN_WOL_DO_MODIFY (1ULL << 63)
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 205753a..5472cbd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -46,7 +46,7 @@
 #include "mlx5_core.h"
 
 enum {
-	CMD_IF_REV = 3,
+	CMD_IF_REV = 5,
 };
 
 enum {
@@ -282,6 +282,12 @@
 	case MLX5_CMD_OP_TEARDOWN_HCA:
 		return "TEARDOWN_HCA";
 
+	case MLX5_CMD_OP_ENABLE_HCA:
+		return "MLX5_CMD_OP_ENABLE_HCA";
+
+	case MLX5_CMD_OP_DISABLE_HCA:
+		return "MLX5_CMD_OP_DISABLE_HCA";
+
 	case MLX5_CMD_OP_QUERY_PAGES:
 		return "QUERY_PAGES";
 
@@ -1113,7 +1119,13 @@
 
 	for (i = 0; i < (1 << cmd->log_sz); i++) {
 		if (test_bit(i, &vector)) {
+			struct semaphore *sem;
+
 			ent = cmd->ent_arr[i];
+			if (ent->page_queue)
+				sem = &cmd->pages_sem;
+			else
+				sem = &cmd->sem;
 			ktime_get_ts(&ent->ts2);
 			memcpy(ent->out->first.data, ent->lay->out, sizeof(ent->lay->out));
 			dump_command(dev, ent, 0);
@@ -1136,10 +1148,7 @@
 			} else {
 				complete(&ent->done);
 			}
-			if (ent->page_queue)
-				up(&cmd->pages_sem);
-			else
-				up(&cmd->sem);
+			up(sem);
 		}
 	}
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index c02cbcf..443cc4d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -268,7 +268,7 @@
 		case MLX5_EVENT_TYPE_PAGE_REQUEST:
 			{
 				u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id);
-				s16 npages = be16_to_cpu(eqe->data.req_pages.num_pages);
+				s32 npages = be32_to_cpu(eqe->data.req_pages.num_pages);
 
 				mlx5_core_dbg(dev, "page request for func 0x%x, napges %d\n", func_id, npages);
 				mlx5_core_req_pages_handler(dev, func_id, npages);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 72a5222..f012658 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -113,7 +113,7 @@
 	caps->log_max_srq = out->hca_cap.log_max_srqs & 0x1f;
 	caps->local_ca_ack_delay = out->hca_cap.local_ca_ack_delay & 0x1f;
 	caps->log_max_mcg = out->hca_cap.log_max_mcg;
-	caps->max_qp_mcg = be16_to_cpu(out->hca_cap.max_qp_mcg);
+	caps->max_qp_mcg = be32_to_cpu(out->hca_cap.max_qp_mcg) & 0xffffff;
 	caps->max_ra_res_qp = 1 << (out->hca_cap.log_max_ra_res_qp & 0x3f);
 	caps->max_ra_req_qp = 1 << (out->hca_cap.log_max_ra_req_qp & 0x3f);
 	caps->max_srq_wqes = 1 << out->hca_cap.log_max_srq_sz;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 748f10a..3e6670c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -55,33 +55,9 @@
 };
 
 static DEFINE_SPINLOCK(health_lock);
-
 static LIST_HEAD(health_list);
 static struct work_struct health_work;
 
-static health_handler_t reg_handler;
-int mlx5_register_health_report_handler(health_handler_t handler)
-{
-	spin_lock_irq(&health_lock);
-	if (reg_handler) {
-		spin_unlock_irq(&health_lock);
-		return -EEXIST;
-	}
-	reg_handler = handler;
-	spin_unlock_irq(&health_lock);
-
-	return 0;
-}
-EXPORT_SYMBOL(mlx5_register_health_report_handler);
-
-void mlx5_unregister_health_report_handler(void)
-{
-	spin_lock_irq(&health_lock);
-	reg_handler = NULL;
-	spin_unlock_irq(&health_lock);
-}
-EXPORT_SYMBOL(mlx5_unregister_health_report_handler);
-
 static void health_care(struct work_struct *work)
 {
 	struct mlx5_core_health *health, *n;
@@ -98,11 +74,8 @@
 		priv = container_of(health, struct mlx5_priv, health);
 		dev = container_of(priv, struct mlx5_core_dev, priv);
 		mlx5_core_warn(dev, "handling bad device here\n");
+		/* nothing yet */
 		spin_lock_irq(&health_lock);
-		if (reg_handler)
-			reg_handler(dev->pdev, health->health,
-				    sizeof(health->health));
-
 		list_del_init(&health->list);
 		spin_unlock_irq(&health_lock);
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 12242de..b47739b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -249,6 +249,44 @@
 	return err;
 }
 
+static int mlx5_core_enable_hca(struct mlx5_core_dev *dev)
+{
+	int err;
+	struct mlx5_enable_hca_mbox_in in;
+	struct mlx5_enable_hca_mbox_out out;
+
+	memset(&in, 0, sizeof(in));
+	memset(&out, 0, sizeof(out));
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ENABLE_HCA);
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	if (err)
+		return err;
+
+	if (out.hdr.status)
+		return mlx5_cmd_status_to_err(&out.hdr);
+
+	return 0;
+}
+
+static int mlx5_core_disable_hca(struct mlx5_core_dev *dev)
+{
+	int err;
+	struct mlx5_disable_hca_mbox_in in;
+	struct mlx5_disable_hca_mbox_out out;
+
+	memset(&in, 0, sizeof(in));
+	memset(&out, 0, sizeof(out));
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DISABLE_HCA);
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	if (err)
+		return err;
+
+	if (out.hdr.status)
+		return mlx5_cmd_status_to_err(&out.hdr);
+
+	return 0;
+}
+
 int mlx5_dev_init(struct mlx5_core_dev *dev, struct pci_dev *pdev)
 {
 	struct mlx5_priv *priv = &dev->priv;
@@ -304,28 +342,41 @@
 	}
 
 	mlx5_pagealloc_init(dev);
+
+	err = mlx5_core_enable_hca(dev);
+	if (err) {
+		dev_err(&pdev->dev, "enable hca failed\n");
+		goto err_pagealloc_cleanup;
+	}
+
+	err = mlx5_satisfy_startup_pages(dev, 1);
+	if (err) {
+		dev_err(&pdev->dev, "failed to allocate boot pages\n");
+		goto err_disable_hca;
+	}
+
 	err = set_hca_ctrl(dev);
 	if (err) {
 		dev_err(&pdev->dev, "set_hca_ctrl failed\n");
-		goto err_pagealloc_cleanup;
+		goto reclaim_boot_pages;
 	}
 
 	err = handle_hca_cap(dev);
 	if (err) {
 		dev_err(&pdev->dev, "handle_hca_cap failed\n");
-		goto err_pagealloc_cleanup;
+		goto reclaim_boot_pages;
 	}
 
-	err = mlx5_satisfy_startup_pages(dev);
+	err = mlx5_satisfy_startup_pages(dev, 0);
 	if (err) {
-		dev_err(&pdev->dev, "failed to allocate startup pages\n");
-		goto err_pagealloc_cleanup;
+		dev_err(&pdev->dev, "failed to allocate init pages\n");
+		goto reclaim_boot_pages;
 	}
 
 	err = mlx5_pagealloc_start(dev);
 	if (err) {
 		dev_err(&pdev->dev, "mlx5_pagealloc_start failed\n");
-		goto err_reclaim_pages;
+		goto reclaim_boot_pages;
 	}
 
 	err = mlx5_cmd_init_hca(dev);
@@ -396,9 +447,12 @@
 err_pagealloc_stop:
 	mlx5_pagealloc_stop(dev);
 
-err_reclaim_pages:
+reclaim_boot_pages:
 	mlx5_reclaim_startup_pages(dev);
 
+err_disable_hca:
+	mlx5_core_disable_hca(dev);
+
 err_pagealloc_cleanup:
 	mlx5_pagealloc_cleanup(dev);
 	mlx5_cmd_cleanup(dev);
@@ -434,6 +488,7 @@
 	mlx5_cmd_teardown_hca(dev);
 	mlx5_pagealloc_stop(dev);
 	mlx5_reclaim_startup_pages(dev);
+	mlx5_core_disable_hca(dev);
 	mlx5_pagealloc_cleanup(dev);
 	mlx5_cmd_cleanup(dev);
 	iounmap(dev->iseg);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index f0bf463..3a2408d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -43,10 +43,16 @@
 	MLX5_PAGES_TAKE		= 2
 };
 
+enum {
+	MLX5_BOOT_PAGES		= 1,
+	MLX5_INIT_PAGES		= 2,
+	MLX5_POST_INIT_PAGES	= 3
+};
+
 struct mlx5_pages_req {
 	struct mlx5_core_dev *dev;
 	u32	func_id;
-	s16	npages;
+	s32	npages;
 	struct work_struct work;
 };
 
@@ -64,27 +70,23 @@
 
 struct mlx5_query_pages_outbox {
 	struct mlx5_outbox_hdr	hdr;
-	u8			reserved[2];
+	__be16			rsvd;
 	__be16			func_id;
-	__be16			init_pages;
-	__be16			num_pages;
+	__be32			num_pages;
 };
 
 struct mlx5_manage_pages_inbox {
 	struct mlx5_inbox_hdr	hdr;
-	__be16			rsvd0;
+	__be16			rsvd;
 	__be16			func_id;
-	__be16			rsvd1;
-	__be16			num_entries;
-	u8			rsvd2[16];
+	__be32			num_entries;
 	__be64			pas[0];
 };
 
 struct mlx5_manage_pages_outbox {
 	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd0[2];
-	__be16			num_entries;
-	u8			rsvd1[20];
+	__be32			num_entries;
+	u8			rsvd[4];
 	__be64			pas[0];
 };
 
@@ -146,7 +148,7 @@
 }
 
 static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
-				s16 *pages, s16 *init_pages)
+				s32 *npages, int boot)
 {
 	struct mlx5_query_pages_inbox	in;
 	struct mlx5_query_pages_outbox	out;
@@ -155,6 +157,8 @@
 	memset(&in, 0, sizeof(in));
 	memset(&out, 0, sizeof(out));
 	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_PAGES);
+	in.hdr.opmod = boot ? cpu_to_be16(MLX5_BOOT_PAGES) : cpu_to_be16(MLX5_INIT_PAGES);
+
 	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
 	if (err)
 		return err;
@@ -162,10 +166,7 @@
 	if (out.hdr.status)
 		return mlx5_cmd_status_to_err(&out.hdr);
 
-	if (pages)
-		*pages = be16_to_cpu(out.num_pages);
-	if (init_pages)
-		*init_pages = be16_to_cpu(out.init_pages);
+	*npages = be32_to_cpu(out.num_pages);
 	*func_id = be16_to_cpu(out.func_id);
 
 	return err;
@@ -219,7 +220,7 @@
 	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
 	in->hdr.opmod = cpu_to_be16(MLX5_PAGES_GIVE);
 	in->func_id = cpu_to_be16(func_id);
-	in->num_entries = cpu_to_be16(npages);
+	in->num_entries = cpu_to_be32(npages);
 	err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
 	mlx5_core_dbg(dev, "err %d\n", err);
 	if (err) {
@@ -287,7 +288,7 @@
 	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES);
 	in.hdr.opmod = cpu_to_be16(MLX5_PAGES_TAKE);
 	in.func_id = cpu_to_be16(func_id);
-	in.num_entries = cpu_to_be16(npages);
+	in.num_entries = cpu_to_be32(npages);
 	mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
 	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen);
 	if (err) {
@@ -301,7 +302,7 @@
 		goto out_free;
 	}
 
-	num_claimed = be16_to_cpu(out->num_entries);
+	num_claimed = be32_to_cpu(out->num_entries);
 	if (nclaimed)
 		*nclaimed = num_claimed;
 
@@ -340,7 +341,7 @@
 }
 
 void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
-				 s16 npages)
+				 s32 npages)
 {
 	struct mlx5_pages_req *req;
 
@@ -357,19 +358,20 @@
 	queue_work(dev->priv.pg_wq, &req->work);
 }
 
-int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev)
+int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
 {
-	s16 uninitialized_var(init_pages);
 	u16 uninitialized_var(func_id);
+	s32 uninitialized_var(npages);
 	int err;
 
-	err = mlx5_cmd_query_pages(dev, &func_id, NULL, &init_pages);
+	err = mlx5_cmd_query_pages(dev, &func_id, &npages, boot);
 	if (err)
 		return err;
 
-	mlx5_core_dbg(dev, "requested %d init pages for func_id 0x%x\n", init_pages, func_id);
+	mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n",
+		      npages, boot ? "boot" : "init", func_id);
 
-	return give_pages(dev, func_id, init_pages, 0);
+	return give_pages(dev, func_id, npages, 0);
 }
 
 static int optimal_reclaimed_pages(void)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
index 71d4a39..68f5d9c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
@@ -164,6 +164,7 @@
 		uuari->uars[i].map = ioremap(addr, PAGE_SIZE);
 		if (!uuari->uars[i].map) {
 			mlx5_cmd_free_uar(dev, uuari->uars[i].index);
+			err = -ENOMEM;
 			goto out_count;
 		}
 		mlx5_core_dbg(dev, "allocated uar index 0x%x, mmaped at %p\n",
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig b/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig
index cb22341..a588ffd 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig
@@ -4,7 +4,7 @@
 
 config PCH_GBE
 	tristate "OKI SEMICONDUCTOR IOH(ML7223/ML7831) GbE"
-	depends on PCI
+	depends on PCI && (X86 || COMPILE_TEST)
 	select MII
 	select PTP_1588_CLOCK_PCH
 	---help---
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index b00cf56..221645e 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -1400,8 +1400,8 @@
 #define ADDR_IN_RANGE(addr, low, high)	\
 	(((addr) < (high)) && ((addr) >= (low)))
 
-#define QLCRD32(adapter, off) \
-	(adapter->ahw->hw_ops->read_reg)(adapter, off)
+#define QLCRD32(adapter, off, err) \
+	(adapter->ahw->hw_ops->read_reg)(adapter, off, err)
 
 #define QLCWR32(adapter, off, val) \
 	adapter->ahw->hw_ops->write_reg(adapter, off, val)
@@ -1604,7 +1604,7 @@
 struct qlcnic_hardware_ops {
 	void (*read_crb) (struct qlcnic_adapter *, char *, loff_t, size_t);
 	void (*write_crb) (struct qlcnic_adapter *, char *, loff_t, size_t);
-	int (*read_reg) (struct qlcnic_adapter *, ulong);
+	int (*read_reg) (struct qlcnic_adapter *, ulong, int *);
 	int (*write_reg) (struct qlcnic_adapter *, ulong, u32);
 	void (*get_ocm_win) (struct qlcnic_hardware_context *);
 	int (*get_mac_address) (struct qlcnic_adapter *, u8 *);
@@ -1662,12 +1662,6 @@
 	adapter->ahw->hw_ops->write_crb(adapter, buf, offset, size);
 }
 
-static inline int qlcnic_hw_read_wx_2M(struct qlcnic_adapter *adapter,
-				       ulong off)
-{
-	return adapter->ahw->hw_ops->read_reg(adapter, off);
-}
-
 static inline int qlcnic_hw_write_wx_2M(struct qlcnic_adapter *adapter,
 					ulong off, u32 data)
 {
@@ -1869,7 +1863,8 @@
 
 static inline void qlcnic_set_mac_filter_count(struct qlcnic_adapter *adapter)
 {
-	adapter->ahw->hw_ops->set_mac_filter_count(adapter);
+	if (adapter->ahw->hw_ops->set_mac_filter_count)
+		adapter->ahw->hw_ops->set_mac_filter_count(adapter);
 }
 
 static inline void qlcnic_dev_request_reset(struct qlcnic_adapter *adapter,
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
index 0913c62..9d4bb7f 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
@@ -228,17 +228,17 @@
 	return 0;
 }
 
-int qlcnic_83xx_rd_reg_indirect(struct qlcnic_adapter *adapter, ulong addr)
+int qlcnic_83xx_rd_reg_indirect(struct qlcnic_adapter *adapter, ulong addr,
+				int *err)
 {
-	int ret;
 	struct qlcnic_hardware_context *ahw = adapter->ahw;
 
-	ret = __qlcnic_set_win_base(adapter, (u32) addr);
-	if (!ret) {
+	*err = __qlcnic_set_win_base(adapter, (u32) addr);
+	if (!*err) {
 		return QLCRDX(ahw, QLCNIC_WILDCARD);
 	} else {
 		dev_err(&adapter->pdev->dev,
-			"%s failed, addr = 0x%x\n", __func__, (int)addr);
+			"%s failed, addr = 0x%lx\n", __func__, addr);
 		return -EIO;
 	}
 }
@@ -561,7 +561,7 @@
 void qlcnic_83xx_read_crb(struct qlcnic_adapter *adapter, char *buf,
 			  loff_t offset, size_t size)
 {
-	int ret;
+	int ret = 0;
 	u32 data;
 
 	if (qlcnic_api_lock(adapter)) {
@@ -571,7 +571,7 @@
 		return;
 	}
 
-	ret = qlcnic_83xx_rd_reg_indirect(adapter, (u32) offset);
+	data = QLCRD32(adapter, (u32) offset, &ret);
 	qlcnic_api_unlock(adapter);
 
 	if (ret == -EIO) {
@@ -580,7 +580,6 @@
 			__func__, (u32)offset);
 		return;
 	}
-	data = ret;
 	memcpy(buf, &data, size);
 }
 
@@ -2075,18 +2074,25 @@
 static void qlcnic_83xx_handle_link_aen(struct qlcnic_adapter *adapter,
 					u32 data[])
 {
+	struct qlcnic_hardware_context *ahw = adapter->ahw;
 	u8 link_status, duplex;
 	/* link speed */
 	link_status = LSB(data[3]) & 1;
-	adapter->ahw->link_speed = MSW(data[2]);
-	adapter->ahw->link_autoneg = MSB(MSW(data[3]));
-	adapter->ahw->module_type = MSB(LSW(data[3]));
-	duplex = LSB(MSW(data[3]));
-	if (duplex)
-		adapter->ahw->link_duplex = DUPLEX_FULL;
-	else
-		adapter->ahw->link_duplex = DUPLEX_HALF;
-	adapter->ahw->has_link_events = 1;
+	if (link_status) {
+		ahw->link_speed = MSW(data[2]);
+		duplex = LSB(MSW(data[3]));
+		if (duplex)
+			ahw->link_duplex = DUPLEX_FULL;
+		else
+			ahw->link_duplex = DUPLEX_HALF;
+	} else {
+		ahw->link_speed = SPEED_UNKNOWN;
+		ahw->link_duplex = DUPLEX_UNKNOWN;
+	}
+
+	ahw->link_autoneg = MSB(MSW(data[3]));
+	ahw->module_type = MSB(LSW(data[3]));
+	ahw->has_link_events = 1;
 	qlcnic_advert_link_change(adapter, link_status);
 }
 
@@ -2384,9 +2390,9 @@
 				      u32 flash_addr, u8 *p_data,
 				      int count)
 {
-	int i, ret;
-	u32 word, range, flash_offset, addr = flash_addr;
+	u32 word, range, flash_offset, addr = flash_addr, ret;
 	ulong indirect_add, direct_window;
+	int i, err = 0;
 
 	flash_offset = addr & (QLCNIC_FLASH_SECTOR_SIZE - 1);
 	if (addr & 0x3) {
@@ -2404,10 +2410,9 @@
 		/* Multi sector read */
 		for (i = 0; i < count; i++) {
 			indirect_add = QLC_83XX_FLASH_DIRECT_DATA(addr);
-			ret = qlcnic_83xx_rd_reg_indirect(adapter,
-							  indirect_add);
-			if (ret == -EIO)
-				return -EIO;
+			ret = QLCRD32(adapter, indirect_add, &err);
+			if (err == -EIO)
+				return err;
 
 			word = ret;
 			*(u32 *)p_data  = word;
@@ -2428,10 +2433,9 @@
 		/* Single sector read */
 		for (i = 0; i < count; i++) {
 			indirect_add = QLC_83XX_FLASH_DIRECT_DATA(addr);
-			ret = qlcnic_83xx_rd_reg_indirect(adapter,
-							  indirect_add);
-			if (ret == -EIO)
-				return -EIO;
+			ret = QLCRD32(adapter, indirect_add, &err);
+			if (err == -EIO)
+				return err;
 
 			word = ret;
 			*(u32 *)p_data  = word;
@@ -2447,10 +2451,13 @@
 {
 	u32 status;
 	int retries = QLC_83XX_FLASH_READ_RETRY_COUNT;
+	int err = 0;
 
 	do {
-		status = qlcnic_83xx_rd_reg_indirect(adapter,
-						     QLC_83XX_FLASH_STATUS);
+		status = QLCRD32(adapter, QLC_83XX_FLASH_STATUS, &err);
+		if (err == -EIO)
+			return err;
+
 		if ((status & QLC_83XX_FLASH_STATUS_READY) ==
 		    QLC_83XX_FLASH_STATUS_READY)
 			break;
@@ -2502,7 +2509,8 @@
 
 int qlcnic_83xx_read_flash_mfg_id(struct qlcnic_adapter *adapter)
 {
-	int ret, mfg_id;
+	int ret, err = 0;
+	u32 mfg_id;
 
 	if (qlcnic_83xx_lock_flash(adapter))
 		return -EIO;
@@ -2517,9 +2525,11 @@
 		return -EIO;
 	}
 
-	mfg_id = qlcnic_83xx_rd_reg_indirect(adapter, QLC_83XX_FLASH_RDDATA);
-	if (mfg_id == -EIO)
-		return -EIO;
+	mfg_id = QLCRD32(adapter, QLC_83XX_FLASH_RDDATA, &err);
+	if (err == -EIO) {
+		qlcnic_83xx_unlock_flash(adapter);
+		return err;
+	}
 
 	adapter->flash_mfg_id = (mfg_id & 0xFF);
 	qlcnic_83xx_unlock_flash(adapter);
@@ -2636,7 +2646,7 @@
 				 u32 *p_data, int count)
 {
 	u32 temp;
-	int ret = -EIO;
+	int ret = -EIO, err = 0;
 
 	if ((count < QLC_83XX_FLASH_WRITE_MIN) ||
 	    (count > QLC_83XX_FLASH_WRITE_MAX)) {
@@ -2645,8 +2655,10 @@
 		return -EIO;
 	}
 
-	temp = qlcnic_83xx_rd_reg_indirect(adapter,
-					   QLC_83XX_FLASH_SPI_CONTROL);
+	temp = QLCRD32(adapter, QLC_83XX_FLASH_SPI_CONTROL, &err);
+	if (err == -EIO)
+		return err;
+
 	qlcnic_83xx_wrt_reg_indirect(adapter, QLC_83XX_FLASH_SPI_CONTROL,
 				     (temp | QLC_83XX_FLASH_SPI_CTRL));
 	qlcnic_83xx_wrt_reg_indirect(adapter, QLC_83XX_FLASH_ADDR,
@@ -2695,13 +2707,18 @@
 		return -EIO;
 	}
 
-	ret = qlcnic_83xx_rd_reg_indirect(adapter, QLC_83XX_FLASH_SPI_STATUS);
+	ret = QLCRD32(adapter, QLC_83XX_FLASH_SPI_STATUS, &err);
+	if (err == -EIO)
+		return err;
+
 	if ((ret & QLC_83XX_FLASH_SPI_CTRL) == QLC_83XX_FLASH_SPI_CTRL) {
 		dev_err(&adapter->pdev->dev, "%s: failed at %d\n",
 			__func__, __LINE__);
 		/* Operation failed, clear error bit */
-		temp = qlcnic_83xx_rd_reg_indirect(adapter,
-						   QLC_83XX_FLASH_SPI_CONTROL);
+		temp = QLCRD32(adapter, QLC_83XX_FLASH_SPI_CONTROL, &err);
+		if (err == -EIO)
+			return err;
+
 		qlcnic_83xx_wrt_reg_indirect(adapter,
 					     QLC_83XX_FLASH_SPI_CONTROL,
 					     (temp | QLC_83XX_FLASH_SPI_CTRL));
@@ -2823,6 +2840,7 @@
 {
 	int i, j, ret = 0;
 	u32 temp;
+	int err = 0;
 
 	/* Check alignment */
 	if (addr & 0xF)
@@ -2855,8 +2873,12 @@
 					     QLCNIC_TA_WRITE_START);
 
 		for (j = 0; j < MAX_CTL_CHECK; j++) {
-			temp = qlcnic_83xx_rd_reg_indirect(adapter,
-							   QLCNIC_MS_CTRL);
+			temp = QLCRD32(adapter, QLCNIC_MS_CTRL, &err);
+			if (err == -EIO) {
+				mutex_unlock(&adapter->ahw->mem_lock);
+				return err;
+			}
+
 			if ((temp & TA_CTL_BUSY) == 0)
 				break;
 		}
@@ -2878,9 +2900,9 @@
 int qlcnic_83xx_flash_read32(struct qlcnic_adapter *adapter, u32 flash_addr,
 			     u8 *p_data, int count)
 {
-	int i, ret;
-	u32 word, addr = flash_addr;
+	u32 word, addr = flash_addr, ret;
 	ulong  indirect_addr;
+	int i, err = 0;
 
 	if (qlcnic_83xx_lock_flash(adapter) != 0)
 		return -EIO;
@@ -2900,10 +2922,10 @@
 		}
 
 		indirect_addr = QLC_83XX_FLASH_DIRECT_DATA(addr);
-		ret = qlcnic_83xx_rd_reg_indirect(adapter,
-						  indirect_addr);
-		if (ret == -EIO)
-			return -EIO;
+		ret = QLCRD32(adapter, indirect_addr, &err);
+		if (err == -EIO)
+			return err;
+
 		word = ret;
 		*(u32 *)p_data  = word;
 		p_data = p_data + 4;
@@ -3014,8 +3036,8 @@
 	}
 
 	if (ahw->port_type == QLCNIC_XGBE) {
-		ecmd->supported = SUPPORTED_1000baseT_Full;
-		ecmd->advertising = ADVERTISED_1000baseT_Full;
+		ecmd->supported = SUPPORTED_10000baseT_Full;
+		ecmd->advertising = ADVERTISED_10000baseT_Full;
 	} else {
 		ecmd->supported = (SUPPORTED_10baseT_Half |
 				   SUPPORTED_10baseT_Full |
@@ -3244,6 +3266,11 @@
 	u8 val;
 	int ret, max_sds_rings = adapter->max_sds_rings;
 
+	if (test_bit(__QLCNIC_RESETTING, &adapter->state)) {
+		netdev_info(netdev, "Device is resetting\n");
+		return -EBUSY;
+	}
+
 	if (qlcnic_get_diag_lock(adapter)) {
 		netdev_info(netdev, "Device in diagnostics mode\n");
 		return -EBUSY;
@@ -3369,7 +3396,8 @@
 
 static int qlcnic_83xx_read_flash_status_reg(struct qlcnic_adapter *adapter)
 {
-	int ret;
+	int ret, err = 0;
+	u32 temp;
 
 	qlcnic_83xx_wrt_reg_indirect(adapter, QLC_83XX_FLASH_ADDR,
 				     QLC_83XX_FLASH_OEM_READ_SIG);
@@ -3379,8 +3407,11 @@
 	if (ret)
 		return -EIO;
 
-	ret = qlcnic_83xx_rd_reg_indirect(adapter, QLC_83XX_FLASH_RDDATA);
-	return ret & 0xFF;
+	temp = QLCRD32(adapter, QLC_83XX_FLASH_RDDATA, &err);
+	if (err == -EIO)
+		return err;
+
+	return temp & 0xFF;
 }
 
 int qlcnic_83xx_flash_test(struct qlcnic_adapter *adapter)
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h
index 2548d14..272f56a 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.h
@@ -508,7 +508,7 @@
 void qlcnic_83xx_remove_sysfs(struct qlcnic_adapter *);
 void qlcnic_83xx_write_crb(struct qlcnic_adapter *, char *, loff_t, size_t);
 void qlcnic_83xx_read_crb(struct qlcnic_adapter *, char *, loff_t, size_t);
-int qlcnic_83xx_rd_reg_indirect(struct qlcnic_adapter *, ulong);
+int qlcnic_83xx_rd_reg_indirect(struct qlcnic_adapter *, ulong, int *);
 int qlcnic_83xx_wrt_reg_indirect(struct qlcnic_adapter *, ulong, u32);
 void qlcnic_83xx_process_rcv_diag(struct qlcnic_adapter *, int, u64 []);
 int qlcnic_83xx_nic_set_promisc(struct qlcnic_adapter *, u32);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
index f41dfab..345d987 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
@@ -629,7 +629,8 @@
 		return -EIO;
 	}
 
-	qlcnic_set_drv_version(adapter);
+	if (adapter->portnum == 0)
+		qlcnic_set_drv_version(adapter);
 	qlcnic_83xx_idc_attach_driver(adapter);
 
 	return 0;
@@ -1303,8 +1304,11 @@
 {
 	int i, j;
 	u32 val = 0, val1 = 0, reg = 0;
+	int err = 0;
 
-	val = QLCRD32(adapter, QLC_83XX_SRE_SHIM_REG);
+	val = QLCRD32(adapter, QLC_83XX_SRE_SHIM_REG, &err);
+	if (err == -EIO)
+		return;
 	dev_info(&adapter->pdev->dev, "SRE-Shim Ctrl:0x%x\n", val);
 
 	for (j = 0; j < 2; j++) {
@@ -1318,7 +1322,9 @@
 			reg = QLC_83XX_PORT1_THRESHOLD;
 		}
 		for (i = 0; i < 8; i++) {
-			val = QLCRD32(adapter, reg + (i * 0x4));
+			val = QLCRD32(adapter, reg + (i * 0x4), &err);
+			if (err == -EIO)
+				return;
 			dev_info(&adapter->pdev->dev, "0x%x  ", val);
 		}
 		dev_info(&adapter->pdev->dev, "\n");
@@ -1335,8 +1341,10 @@
 			reg = QLC_83XX_PORT1_TC_MC_REG;
 		}
 		for (i = 0; i < 4; i++) {
-			val = QLCRD32(adapter, reg + (i * 0x4));
-			 dev_info(&adapter->pdev->dev, "0x%x  ", val);
+			val = QLCRD32(adapter, reg + (i * 0x4), &err);
+			if (err == -EIO)
+				return;
+			dev_info(&adapter->pdev->dev, "0x%x  ", val);
 		}
 		dev_info(&adapter->pdev->dev, "\n");
 	}
@@ -1352,17 +1360,25 @@
 			reg = QLC_83XX_PORT1_TC_STATS;
 		}
 		for (i = 7; i >= 0; i--) {
-			val = QLCRD32(adapter, reg);
+			val = QLCRD32(adapter, reg, &err);
+			if (err == -EIO)
+				return;
 			val &= ~(0x7 << 29);    /* Reset bits 29 to 31 */
 			QLCWR32(adapter, reg, (val | (i << 29)));
-			val = QLCRD32(adapter, reg);
+			val = QLCRD32(adapter, reg, &err);
+			if (err == -EIO)
+				return;
 			dev_info(&adapter->pdev->dev, "0x%x  ", val);
 		}
 		dev_info(&adapter->pdev->dev, "\n");
 	}
 
-	val = QLCRD32(adapter, QLC_83XX_PORT2_IFB_THRESHOLD);
-	val1 = QLCRD32(adapter, QLC_83XX_PORT3_IFB_THRESHOLD);
+	val = QLCRD32(adapter, QLC_83XX_PORT2_IFB_THRESHOLD, &err);
+	if (err == -EIO)
+		return;
+	val1 = QLCRD32(adapter, QLC_83XX_PORT3_IFB_THRESHOLD, &err);
+	if (err == -EIO)
+		return;
 	dev_info(&adapter->pdev->dev,
 		 "IFB-Pause Thresholds: Port 2:0x%x, Port 3:0x%x\n",
 		 val, val1);
@@ -1425,7 +1441,7 @@
 static int qlcnic_83xx_check_heartbeat(struct qlcnic_adapter *p_dev)
 {
 	u32 heartbeat, peg_status;
-	int retries, ret = -EIO;
+	int retries, ret = -EIO, err = 0;
 
 	retries = QLCNIC_HEARTBEAT_CHECK_RETRY_COUNT;
 	p_dev->heartbeat = QLC_SHARED_REG_RD32(p_dev,
@@ -1453,11 +1469,11 @@
 			 "PEG_NET_2_PC: 0x%x, PEG_NET_3_PC: 0x%x,\n"
 			 "PEG_NET_4_PC: 0x%x\n", peg_status,
 			 QLC_SHARED_REG_RD32(p_dev, QLCNIC_PEG_HALT_STATUS2),
-			 QLCRD32(p_dev, QLC_83XX_CRB_PEG_NET_0),
-			 QLCRD32(p_dev, QLC_83XX_CRB_PEG_NET_1),
-			 QLCRD32(p_dev, QLC_83XX_CRB_PEG_NET_2),
-			 QLCRD32(p_dev, QLC_83XX_CRB_PEG_NET_3),
-			 QLCRD32(p_dev, QLC_83XX_CRB_PEG_NET_4));
+			 QLCRD32(p_dev, QLC_83XX_CRB_PEG_NET_0, &err),
+			 QLCRD32(p_dev, QLC_83XX_CRB_PEG_NET_1, &err),
+			 QLCRD32(p_dev, QLC_83XX_CRB_PEG_NET_2, &err),
+			 QLCRD32(p_dev, QLC_83XX_CRB_PEG_NET_3, &err),
+			 QLCRD32(p_dev, QLC_83XX_CRB_PEG_NET_4, &err));
 
 		if (QLCNIC_FWERROR_CODE(peg_status) == 0x67)
 			dev_err(&p_dev->pdev->dev,
@@ -1501,18 +1517,22 @@
 static int qlcnic_83xx_poll_reg(struct qlcnic_adapter *p_dev, u32 addr,
 				int duration, u32 mask, u32 status)
 {
+	int timeout_error, err = 0;
 	u32 value;
-	int timeout_error;
 	u8 retries;
 
-	value = qlcnic_83xx_rd_reg_indirect(p_dev, addr);
+	value = QLCRD32(p_dev, addr, &err);
+	if (err == -EIO)
+		return err;
 	retries = duration / 10;
 
 	do {
 		if ((value & mask) != status) {
 			timeout_error = 1;
 			msleep(duration / 10);
-			value = qlcnic_83xx_rd_reg_indirect(p_dev, addr);
+			value = QLCRD32(p_dev, addr, &err);
+			if (err == -EIO)
+				return err;
 		} else {
 			timeout_error = 0;
 			break;
@@ -1606,9 +1626,12 @@
 static void qlcnic_83xx_read_write_crb_reg(struct qlcnic_adapter *p_dev,
 					   u32 raddr, u32 waddr)
 {
-	int value;
+	int err = 0;
+	u32 value;
 
-	value = qlcnic_83xx_rd_reg_indirect(p_dev, raddr);
+	value = QLCRD32(p_dev, raddr, &err);
+	if (err == -EIO)
+		return;
 	qlcnic_83xx_wrt_reg_indirect(p_dev, waddr, value);
 }
 
@@ -1617,12 +1640,16 @@
 				    u32 raddr, u32 waddr,
 				    struct qlc_83xx_rmw *p_rmw_hdr)
 {
-	int value;
+	int err = 0;
+	u32 value;
 
-	if (p_rmw_hdr->index_a)
+	if (p_rmw_hdr->index_a) {
 		value = p_dev->ahw->reset.array[p_rmw_hdr->index_a];
-	else
-		value = qlcnic_83xx_rd_reg_indirect(p_dev, raddr);
+	} else {
+		value = QLCRD32(p_dev, raddr, &err);
+		if (err == -EIO)
+			return;
+	}
 
 	value &= p_rmw_hdr->mask;
 	value <<= p_rmw_hdr->shl;
@@ -1675,7 +1702,7 @@
 	long delay;
 	struct qlc_83xx_entry *entry;
 	struct qlc_83xx_poll *poll;
-	int i;
+	int i, err = 0;
 	unsigned long arg1, arg2;
 
 	poll = (struct qlc_83xx_poll *)((char *)p_hdr +
@@ -1699,10 +1726,12 @@
 							 arg1, delay,
 							 poll->mask,
 							 poll->status)){
-					qlcnic_83xx_rd_reg_indirect(p_dev,
-								    arg1);
-					qlcnic_83xx_rd_reg_indirect(p_dev,
-								    arg2);
+					QLCRD32(p_dev, arg1, &err);
+					if (err == -EIO)
+						return;
+					QLCRD32(p_dev, arg2, &err);
+					if (err == -EIO)
+						return;
 				}
 			}
 		}
@@ -1768,7 +1797,7 @@
 				       struct qlc_83xx_entry_hdr *p_hdr)
 {
 	long delay;
-	int index, i, j;
+	int index, i, j, err;
 	struct qlc_83xx_quad_entry *entry;
 	struct qlc_83xx_poll *poll;
 	unsigned long addr;
@@ -1788,7 +1817,10 @@
 						  poll->mask, poll->status)){
 				index = p_dev->ahw->reset.array_index;
 				addr = entry->dr_addr;
-				j = qlcnic_83xx_rd_reg_indirect(p_dev, addr);
+				j = QLCRD32(p_dev, addr, &err);
+				if (err == -EIO)
+					return;
+
 				p_dev->ahw->reset.array[index++] = j;
 
 				if (index == QLC_83XX_MAX_RESET_SEQ_ENTRIES)
@@ -2123,6 +2155,8 @@
 	set_bit(QLC_83XX_MBX_READY, &adapter->ahw->idc.status);
 	qlcnic_83xx_clear_function_resources(adapter);
 
+	INIT_DELAYED_WORK(&adapter->idc_aen_work, qlcnic_83xx_idc_aen_work);
+
 	/* register for NIC IDC AEN Events */
 	qlcnic_83xx_register_nic_idc_func(adapter, 1);
 
@@ -2140,8 +2174,6 @@
 	if (adapter->nic_ops->init_driver(adapter))
 		return -EIO;
 
-	INIT_DELAYED_WORK(&adapter->idc_aen_work, qlcnic_83xx_idc_aen_work);
-
 	/* Periodically monitor device status */
 	qlcnic_83xx_idc_poll_dev_state(&adapter->fw_work.work);
 
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
index 0581a48..d09389b 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
@@ -104,7 +104,7 @@
 qlcnic_poll_rsp(struct qlcnic_adapter *adapter)
 {
 	u32 rsp;
-	int timeout = 0;
+	int timeout = 0, err = 0;
 
 	do {
 		/* give atleast 1ms for firmware to respond */
@@ -113,7 +113,7 @@
 		if (++timeout > QLCNIC_OS_CRB_RETRY_COUNT)
 			return QLCNIC_CDRP_RSP_TIMEOUT;
 
-		rsp = QLCRD32(adapter, QLCNIC_CDRP_CRB_OFFSET);
+		rsp = QLCRD32(adapter, QLCNIC_CDRP_CRB_OFFSET, &err);
 	} while (!QLCNIC_CDRP_IS_RSP(rsp));
 
 	return rsp;
@@ -122,7 +122,7 @@
 int qlcnic_82xx_issue_cmd(struct qlcnic_adapter *adapter,
 			  struct qlcnic_cmd_args *cmd)
 {
-	int i;
+	int i, err = 0;
 	u32 rsp;
 	u32 signature;
 	struct pci_dev *pdev = adapter->pdev;
@@ -148,7 +148,7 @@
 		dev_err(&pdev->dev, "card response timeout.\n");
 		cmd->rsp.arg[0] = QLCNIC_RCODE_TIMEOUT;
 	} else if (rsp == QLCNIC_CDRP_RSP_FAIL) {
-		cmd->rsp.arg[0] = QLCRD32(adapter, QLCNIC_CDRP_ARG(1));
+		cmd->rsp.arg[0] = QLCRD32(adapter, QLCNIC_CDRP_ARG(1), &err);
 		switch (cmd->rsp.arg[0]) {
 		case QLCNIC_RCODE_INVALID_ARGS:
 			fmt = "CDRP invalid args: [%d]\n";
@@ -175,7 +175,7 @@
 		cmd->rsp.arg[0] = QLCNIC_RCODE_SUCCESS;
 
 	for (i = 1; i < cmd->rsp.num; i++)
-		cmd->rsp.arg[i] = QLCRD32(adapter, QLCNIC_CDRP_ARG(i));
+		cmd->rsp.arg[i] = QLCRD32(adapter, QLCNIC_CDRP_ARG(i), &err);
 
 	/* Release semaphore */
 	qlcnic_api_unlock(adapter);
@@ -210,10 +210,10 @@
 	if (err) {
 		dev_info(&adapter->pdev->dev,
 			 "Failed to set driver version in firmware\n");
-		return -EIO;
+		err = -EIO;
 	}
-
-	return 0;
+	qlcnic_free_mbx_args(&cmd);
+	return err;
 }
 
 int
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
index 700a463..7aac23a 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
@@ -150,6 +150,7 @@
 	"Link_Test_on_offline",
 	"Interrupt_Test_offline",
 	"Internal_Loopback_offline",
+	"External_Loopback_offline",
 	"EEPROM_Test_offline"
 };
 
@@ -266,7 +267,7 @@
 {
 	struct qlcnic_hardware_context *ahw = adapter->ahw;
 	u32 speed, reg;
-	int check_sfp_module = 0;
+	int check_sfp_module = 0, err = 0;
 	u16 pcifn = ahw->pci_func;
 
 	/* read which mode */
@@ -289,7 +290,7 @@
 
 	} else if (adapter->ahw->port_type == QLCNIC_XGBE) {
 		u32 val = 0;
-		val = QLCRD32(adapter, QLCNIC_PORT_MODE_ADDR);
+		val = QLCRD32(adapter, QLCNIC_PORT_MODE_ADDR, &err);
 
 		if (val == QLCNIC_PORT_MODE_802_3_AP) {
 			ecmd->supported = SUPPORTED_1000baseT_Full;
@@ -300,9 +301,13 @@
 		}
 
 		if (netif_running(adapter->netdev) && ahw->has_link_events) {
-			reg = QLCRD32(adapter, P3P_LINK_SPEED_REG(pcifn));
-			speed = P3P_LINK_SPEED_VAL(pcifn, reg);
-			ahw->link_speed = speed * P3P_LINK_SPEED_MHZ;
+			if (ahw->linkup) {
+				reg = QLCRD32(adapter,
+					      P3P_LINK_SPEED_REG(pcifn), &err);
+				speed = P3P_LINK_SPEED_VAL(pcifn, reg);
+				ahw->link_speed = speed * P3P_LINK_SPEED_MHZ;
+			}
+
 			ethtool_cmd_speed_set(ecmd, ahw->link_speed);
 			ecmd->autoneg = ahw->link_autoneg;
 			ecmd->duplex = ahw->link_duplex;
@@ -463,13 +468,14 @@
 static int qlcnic_82xx_get_registers(struct qlcnic_adapter *adapter,
 				     u32 *regs_buff)
 {
-	int i, j = 0;
+	int i, j = 0, err = 0;
 
 	for (i = QLCNIC_DEV_INFO_SIZE + 1; diag_registers[j] != -1; j++, i++)
 		regs_buff[i] = QLC_SHARED_REG_RD32(adapter, diag_registers[j]);
 	j = 0;
 	while (ext_diag_registers[j] != -1)
-		regs_buff[i++] = QLCRD32(adapter, ext_diag_registers[j++]);
+		regs_buff[i++] = QLCRD32(adapter, ext_diag_registers[j++],
+					 &err);
 	return i;
 }
 
@@ -519,13 +525,16 @@
 static u32 qlcnic_test_link(struct net_device *dev)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(dev);
+	int err = 0;
 	u32 val;
 
 	if (qlcnic_83xx_check(adapter)) {
 		val = qlcnic_83xx_test_link(adapter);
 		return (val & 1) ? 0 : 1;
 	}
-	val = QLCRD32(adapter, CRB_XG_STATE_P3P);
+	val = QLCRD32(adapter, CRB_XG_STATE_P3P, &err);
+	if (err == -EIO)
+		return err;
 	val = XG_LINK_STATE_P3P(adapter->ahw->pci_func, val);
 	return (val == XG_LINK_UP_P3P) ? 0 : 1;
 }
@@ -658,6 +667,7 @@
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 	int port = adapter->ahw->physical_port;
+	int err = 0;
 	__u32 val;
 
 	if (qlcnic_83xx_check(adapter)) {
@@ -668,9 +678,13 @@
 		if ((port < 0) || (port > QLCNIC_NIU_MAX_GBE_PORTS))
 			return;
 		/* get flow control settings */
-		val = QLCRD32(adapter, QLCNIC_NIU_GB_MAC_CONFIG_0(port));
+		val = QLCRD32(adapter, QLCNIC_NIU_GB_MAC_CONFIG_0(port), &err);
+		if (err == -EIO)
+			return;
 		pause->rx_pause = qlcnic_gb_get_rx_flowctl(val);
-		val = QLCRD32(adapter, QLCNIC_NIU_GB_PAUSE_CTL);
+		val = QLCRD32(adapter, QLCNIC_NIU_GB_PAUSE_CTL, &err);
+		if (err == -EIO)
+			return;
 		switch (port) {
 		case 0:
 			pause->tx_pause = !(qlcnic_gb_get_gb0_mask(val));
@@ -690,7 +704,9 @@
 		if ((port < 0) || (port > QLCNIC_NIU_MAX_XG_PORTS))
 			return;
 		pause->rx_pause = 1;
-		val = QLCRD32(adapter, QLCNIC_NIU_XG_PAUSE_CTL);
+		val = QLCRD32(adapter, QLCNIC_NIU_XG_PAUSE_CTL, &err);
+		if (err == -EIO)
+			return;
 		if (port == 0)
 			pause->tx_pause = !(qlcnic_xg_get_xg0_mask(val));
 		else
@@ -707,6 +723,7 @@
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 	int port = adapter->ahw->physical_port;
+	int err = 0;
 	__u32 val;
 
 	if (qlcnic_83xx_check(adapter))
@@ -717,7 +734,9 @@
 		if ((port < 0) || (port > QLCNIC_NIU_MAX_GBE_PORTS))
 			return -EIO;
 		/* set flow control */
-		val = QLCRD32(adapter, QLCNIC_NIU_GB_MAC_CONFIG_0(port));
+		val = QLCRD32(adapter, QLCNIC_NIU_GB_MAC_CONFIG_0(port), &err);
+		if (err == -EIO)
+			return err;
 
 		if (pause->rx_pause)
 			qlcnic_gb_rx_flowctl(val);
@@ -728,7 +747,9 @@
 				val);
 		QLCWR32(adapter, QLCNIC_NIU_GB_MAC_CONFIG_0(port), val);
 		/* set autoneg */
-		val = QLCRD32(adapter, QLCNIC_NIU_GB_PAUSE_CTL);
+		val = QLCRD32(adapter, QLCNIC_NIU_GB_PAUSE_CTL, &err);
+		if (err == -EIO)
+			return err;
 		switch (port) {
 		case 0:
 			if (pause->tx_pause)
@@ -764,7 +785,9 @@
 		if ((port < 0) || (port > QLCNIC_NIU_MAX_XG_PORTS))
 			return -EIO;
 
-		val = QLCRD32(adapter, QLCNIC_NIU_XG_PAUSE_CTL);
+		val = QLCRD32(adapter, QLCNIC_NIU_XG_PAUSE_CTL, &err);
+		if (err == -EIO)
+			return err;
 		if (port == 0) {
 			if (pause->tx_pause)
 				qlcnic_xg_unset_xg0_mask(val);
@@ -788,11 +811,14 @@
 {
 	struct qlcnic_adapter *adapter = netdev_priv(dev);
 	u32 data_read;
+	int err = 0;
 
 	if (qlcnic_83xx_check(adapter))
 		return qlcnic_83xx_reg_test(adapter);
 
-	data_read = QLCRD32(adapter, QLCNIC_PCIX_PH_REG(0));
+	data_read = QLCRD32(adapter, QLCNIC_PCIX_PH_REG(0), &err);
+	if (err == -EIO)
+		return err;
 	if ((data_read & 0xffff) != adapter->pdev->vendor)
 		return 1;
 
@@ -1026,8 +1052,15 @@
 		if (data[3])
 			eth_test->flags |= ETH_TEST_FL_FAILED;
 
-		data[4] = qlcnic_eeprom_test(dev);
-		if (data[4])
+		if (eth_test->flags & ETH_TEST_FL_EXTERNAL_LB) {
+			data[4] = qlcnic_loopback_test(dev, QLCNIC_ELB_MODE);
+			if (data[4])
+				eth_test->flags |= ETH_TEST_FL_FAILED;
+			eth_test->flags |= ETH_TEST_FL_EXTERNAL_LB_DONE;
+		}
+
+		data[5] = qlcnic_eeprom_test(dev);
+		if (data[5])
 			eth_test->flags |= ETH_TEST_FL_FAILED;
 	}
 }
@@ -1257,17 +1290,20 @@
 {
 	struct qlcnic_adapter *adapter = netdev_priv(dev);
 	u32 wol_cfg;
+	int err = 0;
 
 	if (qlcnic_83xx_check(adapter))
 		return;
 	wol->supported = 0;
 	wol->wolopts = 0;
 
-	wol_cfg = QLCRD32(adapter, QLCNIC_WOL_CONFIG_NV);
+	wol_cfg = QLCRD32(adapter, QLCNIC_WOL_CONFIG_NV, &err);
+	if (err == -EIO)
+		return;
 	if (wol_cfg & (1UL << adapter->portnum))
 		wol->supported |= WAKE_MAGIC;
 
-	wol_cfg = QLCRD32(adapter, QLCNIC_WOL_CONFIG);
+	wol_cfg = QLCRD32(adapter, QLCNIC_WOL_CONFIG, &err);
 	if (wol_cfg & (1UL << adapter->portnum))
 		wol->wolopts |= WAKE_MAGIC;
 }
@@ -1277,17 +1313,22 @@
 {
 	struct qlcnic_adapter *adapter = netdev_priv(dev);
 	u32 wol_cfg;
+	int err = 0;
 
 	if (qlcnic_83xx_check(adapter))
 		return -EOPNOTSUPP;
 	if (wol->wolopts & ~WAKE_MAGIC)
 		return -EINVAL;
 
-	wol_cfg = QLCRD32(adapter, QLCNIC_WOL_CONFIG_NV);
+	wol_cfg = QLCRD32(adapter, QLCNIC_WOL_CONFIG_NV, &err);
+	if (err == -EIO)
+		return err;
 	if (!(wol_cfg & (1 << adapter->portnum)))
 		return -EOPNOTSUPP;
 
-	wol_cfg = QLCRD32(adapter, QLCNIC_WOL_CONFIG);
+	wol_cfg = QLCRD32(adapter, QLCNIC_WOL_CONFIG, &err);
+	if (err == -EIO)
+		return err;
 	if (wol->wolopts & WAKE_MAGIC)
 		wol_cfg |= 1UL << adapter->portnum;
 	else
@@ -1540,7 +1581,7 @@
 		return 0;
 	case QLCNIC_SET_QUIESCENT:
 	case QLCNIC_RESET_QUIESCENT:
-		state = QLCRD32(adapter, QLCNIC_CRB_DEV_STATE);
+		state = QLC_SHARED_REG_RD32(adapter, QLCNIC_CRB_DEV_STATE);
 		if (state == QLCNIC_DEV_FAILED || (state == QLCNIC_DEV_BADBAD))
 			netdev_info(netdev, "Device in FAILED state\n");
 		return 0;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
index 5b5d2ed..4d5f59b 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
@@ -317,16 +317,20 @@
 int
 qlcnic_pcie_sem_lock(struct qlcnic_adapter *adapter, int sem, u32 id_reg)
 {
-	int done = 0, timeout = 0;
+	int timeout = 0;
+	int err = 0;
+	u32 done = 0;
 
 	while (!done) {
-		done = QLCRD32(adapter, QLCNIC_PCIE_REG(PCIE_SEM_LOCK(sem)));
+		done = QLCRD32(adapter, QLCNIC_PCIE_REG(PCIE_SEM_LOCK(sem)),
+			       &err);
 		if (done == 1)
 			break;
 		if (++timeout >= QLCNIC_PCIE_SEM_TIMEOUT) {
 			dev_err(&adapter->pdev->dev,
 				"Failed to acquire sem=%d lock; holdby=%d\n",
-				sem, id_reg ? QLCRD32(adapter, id_reg) : -1);
+				sem,
+				id_reg ? QLCRD32(adapter, id_reg, &err) : -1);
 			return -EIO;
 		}
 		msleep(1);
@@ -341,19 +345,22 @@
 void
 qlcnic_pcie_sem_unlock(struct qlcnic_adapter *adapter, int sem)
 {
-	QLCRD32(adapter, QLCNIC_PCIE_REG(PCIE_SEM_UNLOCK(sem)));
+	int err = 0;
+
+	QLCRD32(adapter, QLCNIC_PCIE_REG(PCIE_SEM_UNLOCK(sem)), &err);
 }
 
 int qlcnic_ind_rd(struct qlcnic_adapter *adapter, u32 addr)
 {
+	int err = 0;
 	u32 data;
 
 	if (qlcnic_82xx_check(adapter))
 		qlcnic_read_window_reg(addr, adapter->ahw->pci_base0, &data);
 	else {
-		data = qlcnic_83xx_rd_reg_indirect(adapter, addr);
-		if (data == -EIO)
-			return -EIO;
+		data = QLCRD32(adapter, addr, &err);
+		if (err == -EIO)
+			return err;
 	}
 	return data;
 }
@@ -516,20 +523,18 @@
 	if (netdev->flags & IFF_PROMISC) {
 		if (!(adapter->flags & QLCNIC_PROMISC_DISABLED))
 			mode = VPORT_MISS_MODE_ACCEPT_ALL;
-	} else if (netdev->flags & IFF_ALLMULTI) {
-		if (netdev_mc_count(netdev) > ahw->max_mc_count) {
-			mode = VPORT_MISS_MODE_ACCEPT_MULTI;
-		} else if (!netdev_mc_empty(netdev) &&
-			   !qlcnic_sriov_vf_check(adapter)) {
-				netdev_for_each_mc_addr(ha, netdev)
-					qlcnic_nic_add_mac(adapter, ha->addr,
-							   vlan);
-		}
-		if (mode != VPORT_MISS_MODE_ACCEPT_MULTI &&
-		    qlcnic_sriov_vf_check(adapter))
-			qlcnic_vf_add_mc_list(netdev, vlan);
+	} else if ((netdev->flags & IFF_ALLMULTI) ||
+		   (netdev_mc_count(netdev) > ahw->max_mc_count)) {
+		mode = VPORT_MISS_MODE_ACCEPT_MULTI;
+	} else if (!netdev_mc_empty(netdev) &&
+		   !qlcnic_sriov_vf_check(adapter)) {
+		netdev_for_each_mc_addr(ha, netdev)
+			qlcnic_nic_add_mac(adapter, ha->addr, vlan);
 	}
 
+	if (qlcnic_sriov_vf_check(adapter))
+		qlcnic_vf_add_mc_list(netdev, vlan);
+
 	/* configure unicast MAC address, if there is not sufficient space
 	 * to store all the unicast addresses then enable promiscuous mode
 	 */
@@ -1161,7 +1166,8 @@
 	return -EIO;
 }
 
-int qlcnic_82xx_hw_read_wx_2M(struct qlcnic_adapter *adapter, ulong off)
+int qlcnic_82xx_hw_read_wx_2M(struct qlcnic_adapter *adapter, ulong off,
+			      int *err)
 {
 	unsigned long flags;
 	int rv;
@@ -1417,7 +1423,7 @@
 
 int qlcnic_82xx_get_board_info(struct qlcnic_adapter *adapter)
 {
-	int offset, board_type, magic;
+	int offset, board_type, magic, err = 0;
 	struct pci_dev *pdev = adapter->pdev;
 
 	offset = QLCNIC_FW_MAGIC_OFFSET;
@@ -1437,7 +1443,9 @@
 	adapter->ahw->board_type = board_type;
 
 	if (board_type == QLCNIC_BRDTYPE_P3P_4_GB_MM) {
-		u32 gpio = QLCRD32(adapter, QLCNIC_ROMUSB_GLB_PAD_GPIO_I);
+		u32 gpio = QLCRD32(adapter, QLCNIC_ROMUSB_GLB_PAD_GPIO_I, &err);
+		if (err == -EIO)
+			return err;
 		if ((gpio & 0x8000) == 0)
 			board_type = QLCNIC_BRDTYPE_P3P_10G_TP;
 	}
@@ -1477,10 +1485,13 @@
 qlcnic_wol_supported(struct qlcnic_adapter *adapter)
 {
 	u32 wol_cfg;
+	int err = 0;
 
-	wol_cfg = QLCRD32(adapter, QLCNIC_WOL_CONFIG_NV);
+	wol_cfg = QLCRD32(adapter, QLCNIC_WOL_CONFIG_NV, &err);
 	if (wol_cfg & (1UL << adapter->portnum)) {
-		wol_cfg = QLCRD32(adapter, QLCNIC_WOL_CONFIG);
+		wol_cfg = QLCRD32(adapter, QLCNIC_WOL_CONFIG, &err);
+		if (err == -EIO)
+			return err;
 		if (wol_cfg & (1 << adapter->portnum))
 			return 1;
 	}
@@ -1541,6 +1552,7 @@
 void qlcnic_82xx_read_crb(struct qlcnic_adapter *adapter, char *buf,
 			  loff_t offset, size_t size)
 {
+	int err = 0;
 	u32 data;
 	u64 qmdata;
 
@@ -1548,7 +1560,7 @@
 		qlcnic_pci_camqm_read_2M(adapter, offset, &qmdata);
 		memcpy(buf, &qmdata, size);
 	} else {
-		data = QLCRD32(adapter, offset);
+		data = QLCRD32(adapter, offset, &err);
 		memcpy(buf, &data, size);
 	}
 }
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h
index 2c22504..4a71b28 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.h
@@ -154,7 +154,7 @@
 struct qlcnic_adapter;
 
 int qlcnic_82xx_start_firmware(struct qlcnic_adapter *);
-int qlcnic_82xx_hw_read_wx_2M(struct qlcnic_adapter *adapter, ulong);
+int qlcnic_82xx_hw_read_wx_2M(struct qlcnic_adapter *adapter, ulong, int *);
 int qlcnic_82xx_hw_write_wx_2M(struct qlcnic_adapter *, ulong, u32);
 int qlcnic_82xx_config_hw_lro(struct qlcnic_adapter *adapter, int);
 int qlcnic_82xx_nic_set_promisc(struct qlcnic_adapter *adapter, u32);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c
index d28336f..974d626 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c
@@ -142,7 +142,7 @@
 					 buffrag->length, PCI_DMA_TODEVICE);
 			buffrag->dma = 0ULL;
 		}
-		for (j = 0; j < cmd_buf->frag_count; j++) {
+		for (j = 1; j < cmd_buf->frag_count; j++) {
 			buffrag++;
 			if (buffrag->dma) {
 				pci_unmap_page(adapter->pdev, buffrag->dma,
@@ -286,10 +286,11 @@
 {
 	long timeout = 0;
 	long done = 0;
+	int err = 0;
 
 	cond_resched();
 	while (done == 0) {
-		done = QLCRD32(adapter, QLCNIC_ROMUSB_GLB_STATUS);
+		done = QLCRD32(adapter, QLCNIC_ROMUSB_GLB_STATUS, &err);
 		done &= 2;
 		if (++timeout >= QLCNIC_MAX_ROM_WAIT_USEC) {
 			dev_err(&adapter->pdev->dev,
@@ -304,6 +305,8 @@
 static int do_rom_fast_read(struct qlcnic_adapter *adapter,
 			    u32 addr, u32 *valp)
 {
+	int err = 0;
+
 	QLCWR32(adapter, QLCNIC_ROMUSB_ROM_ADDRESS, addr);
 	QLCWR32(adapter, QLCNIC_ROMUSB_ROM_DUMMY_BYTE_CNT, 0);
 	QLCWR32(adapter, QLCNIC_ROMUSB_ROM_ABYTE_CNT, 3);
@@ -317,7 +320,9 @@
 	udelay(10);
 	QLCWR32(adapter, QLCNIC_ROMUSB_ROM_DUMMY_BYTE_CNT, 0);
 
-	*valp = QLCRD32(adapter, QLCNIC_ROMUSB_ROM_RDATA);
+	*valp = QLCRD32(adapter, QLCNIC_ROMUSB_ROM_RDATA, &err);
+	if (err == -EIO)
+		return err;
 	return 0;
 }
 
@@ -369,11 +374,11 @@
 
 int qlcnic_pinit_from_rom(struct qlcnic_adapter *adapter)
 {
-	int addr, val;
+	int addr, err = 0;
 	int i, n, init_delay;
 	struct crb_addr_pair *buf;
 	unsigned offset;
-	u32 off;
+	u32 off, val;
 	struct pci_dev *pdev = adapter->pdev;
 
 	QLC_SHARED_REG_WR32(adapter, QLCNIC_CMDPEG_STATE, 0);
@@ -402,7 +407,9 @@
 	QLCWR32(adapter, QLCNIC_CRB_NIU + 0xb0000, 0x00);
 
 	/* halt sre */
-	val = QLCRD32(adapter, QLCNIC_CRB_SRE + 0x1000);
+	val = QLCRD32(adapter, QLCNIC_CRB_SRE + 0x1000, &err);
+	if (err == -EIO)
+		return err;
 	QLCWR32(adapter, QLCNIC_CRB_SRE + 0x1000, val & (~(0x1)));
 
 	/* halt epg */
@@ -719,10 +726,12 @@
 static int
 qlcnic_has_mn(struct qlcnic_adapter *adapter)
 {
-	u32 capability;
-	capability = 0;
+	u32 capability = 0;
+	int err = 0;
 
-	capability = QLCRD32(adapter, QLCNIC_PEG_TUNE_CAPABILITY);
+	capability = QLCRD32(adapter, QLCNIC_PEG_TUNE_CAPABILITY, &err);
+	if (err == -EIO)
+		return err;
 	if (capability & QLCNIC_PEG_TUNE_MN_PRESENT)
 		return 1;
 
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
index d3f8797..6946d35 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
@@ -161,36 +161,68 @@
 	return (qlcnic_get_sts_status(sts_data) == STATUS_CKSUM_LOOP) ? 1 : 0;
 }
 
+static void qlcnic_delete_rx_list_mac(struct qlcnic_adapter *adapter,
+				      struct qlcnic_filter *fil,
+				      void *addr, u16 vlan_id)
+{
+	int ret;
+	u8 op;
+
+	op = vlan_id ? QLCNIC_MAC_VLAN_ADD : QLCNIC_MAC_ADD;
+	ret = qlcnic_sre_macaddr_change(adapter, addr, vlan_id, op);
+	if (ret)
+		return;
+
+	op = vlan_id ? QLCNIC_MAC_VLAN_DEL : QLCNIC_MAC_DEL;
+	ret = qlcnic_sre_macaddr_change(adapter, addr, vlan_id, op);
+	if (!ret) {
+		hlist_del(&fil->fnode);
+		adapter->rx_fhash.fnum--;
+	}
+}
+
+static struct qlcnic_filter *qlcnic_find_mac_filter(struct hlist_head *head,
+						    void *addr, u16 vlan_id)
+{
+	struct qlcnic_filter *tmp_fil = NULL;
+	struct hlist_node *n;
+
+	hlist_for_each_entry_safe(tmp_fil, n, head, fnode) {
+		if (!memcmp(tmp_fil->faddr, addr, ETH_ALEN) &&
+		    tmp_fil->vlan_id == vlan_id)
+			return tmp_fil;
+	}
+
+	return NULL;
+}
+
 void qlcnic_add_lb_filter(struct qlcnic_adapter *adapter, struct sk_buff *skb,
 			  int loopback_pkt, u16 vlan_id)
 {
 	struct ethhdr *phdr = (struct ethhdr *)(skb->data);
 	struct qlcnic_filter *fil, *tmp_fil;
-	struct hlist_node *n;
 	struct hlist_head *head;
 	unsigned long time;
 	u64 src_addr = 0;
-	u8 hindex, found = 0, op;
+	u8 hindex, op;
 	int ret;
 
 	memcpy(&src_addr, phdr->h_source, ETH_ALEN);
+	hindex = qlcnic_mac_hash(src_addr) &
+		 (adapter->fhash.fbucket_size - 1);
 
 	if (loopback_pkt) {
 		if (adapter->rx_fhash.fnum >= adapter->rx_fhash.fmax)
 			return;
 
-		hindex = qlcnic_mac_hash(src_addr) &
-			 (adapter->fhash.fbucket_size - 1);
 		head = &(adapter->rx_fhash.fhead[hindex]);
 
-		hlist_for_each_entry_safe(tmp_fil, n, head, fnode) {
-			if (!memcmp(tmp_fil->faddr, &src_addr, ETH_ALEN) &&
-			    tmp_fil->vlan_id == vlan_id) {
-				time = tmp_fil->ftime;
-				if (jiffies > (QLCNIC_READD_AGE * HZ + time))
-					tmp_fil->ftime = jiffies;
-				return;
-			}
+		tmp_fil = qlcnic_find_mac_filter(head, &src_addr, vlan_id);
+		if (tmp_fil) {
+			time = tmp_fil->ftime;
+			if (time_after(jiffies, QLCNIC_READD_AGE * HZ + time))
+				tmp_fil->ftime = jiffies;
+			return;
 		}
 
 		fil = kzalloc(sizeof(struct qlcnic_filter), GFP_ATOMIC);
@@ -205,36 +237,37 @@
 		adapter->rx_fhash.fnum++;
 		spin_unlock(&adapter->rx_mac_learn_lock);
 	} else {
-		hindex = qlcnic_mac_hash(src_addr) &
-			 (adapter->fhash.fbucket_size - 1);
-		head = &(adapter->rx_fhash.fhead[hindex]);
-		spin_lock(&adapter->rx_mac_learn_lock);
-		hlist_for_each_entry_safe(tmp_fil, n, head, fnode) {
-			if (!memcmp(tmp_fil->faddr, &src_addr, ETH_ALEN) &&
-			    tmp_fil->vlan_id == vlan_id) {
-				found = 1;
-				break;
-			}
-		}
+		head = &adapter->fhash.fhead[hindex];
 
-		if (!found) {
-			spin_unlock(&adapter->rx_mac_learn_lock);
-			return;
-		}
+		spin_lock(&adapter->mac_learn_lock);
 
-		op = vlan_id ? QLCNIC_MAC_VLAN_ADD : QLCNIC_MAC_ADD;
-		ret = qlcnic_sre_macaddr_change(adapter, (u8 *)&src_addr,
-						vlan_id, op);
-		if (!ret) {
+		tmp_fil = qlcnic_find_mac_filter(head, &src_addr, vlan_id);
+		if (tmp_fil) {
 			op = vlan_id ? QLCNIC_MAC_VLAN_DEL : QLCNIC_MAC_DEL;
 			ret = qlcnic_sre_macaddr_change(adapter,
 							(u8 *)&src_addr,
 							vlan_id, op);
 			if (!ret) {
-				hlist_del(&(tmp_fil->fnode));
-				adapter->rx_fhash.fnum--;
+				hlist_del(&tmp_fil->fnode);
+				adapter->fhash.fnum--;
 			}
+
+			spin_unlock(&adapter->mac_learn_lock);
+
+			return;
 		}
+
+		spin_unlock(&adapter->mac_learn_lock);
+
+		head = &adapter->rx_fhash.fhead[hindex];
+
+		spin_lock(&adapter->rx_mac_learn_lock);
+
+		tmp_fil = qlcnic_find_mac_filter(head, &src_addr, vlan_id);
+		if (tmp_fil)
+			qlcnic_delete_rx_list_mac(adapter, tmp_fil, &src_addr,
+						  vlan_id);
+
 		spin_unlock(&adapter->rx_mac_learn_lock);
 	}
 }
@@ -262,7 +295,7 @@
 
 	mac_req = (struct qlcnic_mac_req *)&(req->words[0]);
 	mac_req->op = vlan_id ? QLCNIC_MAC_VLAN_ADD : QLCNIC_MAC_ADD;
-	memcpy(mac_req->mac_addr, &uaddr, ETH_ALEN);
+	memcpy(mac_req->mac_addr, uaddr, ETH_ALEN);
 
 	vlan_req = (struct qlcnic_vlan_req *)&req->words[1];
 	vlan_req->vlan_id = cpu_to_le16(vlan_id);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 4528f8e..bc05d01 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -977,8 +977,8 @@
 static int
 qlcnic_initialize_nic(struct qlcnic_adapter *adapter)
 {
-	int err;
 	struct qlcnic_info nic_info;
+	int err = 0;
 
 	memset(&nic_info, 0, sizeof(struct qlcnic_info));
 	err = qlcnic_get_nic_info(adapter, &nic_info, adapter->ahw->pci_func);
@@ -993,7 +993,9 @@
 
 	if (adapter->ahw->capabilities & QLCNIC_FW_CAPABILITY_MORE_CAPS) {
 		u32 temp;
-		temp = QLCRD32(adapter, CRB_FW_CAPABILITIES_2);
+		temp = QLCRD32(adapter, CRB_FW_CAPABILITIES_2, &err);
+		if (err == -EIO)
+			return err;
 		adapter->ahw->extra_capability[0] = temp;
 	}
 	adapter->ahw->max_mac_filters = nic_info.max_mac_filters;
@@ -1383,6 +1385,8 @@
 	if (adapter->ahw->diag_test == QLCNIC_INTERRUPT_TEST) {
 		if (qlcnic_82xx_check(adapter))
 			handler = qlcnic_tmp_intr;
+		else
+			handler = qlcnic_83xx_tmp_intr;
 		if (!QLCNIC_IS_MSI_FAMILY(adapter))
 			flags |= IRQF_SHARED;
 
@@ -1531,12 +1535,12 @@
 	if (netdev->features & NETIF_F_LRO)
 		qlcnic_config_hw_lro(adapter, QLCNIC_LRO_ENABLED);
 
+	set_bit(__QLCNIC_DEV_UP, &adapter->state);
 	qlcnic_napi_enable(adapter);
 
 	qlcnic_linkevent_request(adapter, 1);
 
 	adapter->ahw->reset_context = 0;
-	set_bit(__QLCNIC_DEV_UP, &adapter->state);
 	return 0;
 }
 
@@ -2139,7 +2143,7 @@
 	if (qlcnic_83xx_check(adapter) && !qlcnic_use_msi_x &&
 	    !!qlcnic_use_msi)
 		dev_warn(&pdev->dev,
-			 "83xx adapter do not support MSI interrupts\n");
+			 "Device does not support MSI interrupts\n");
 
 	err = qlcnic_setup_intr(adapter, 0);
 	if (err) {
@@ -2161,7 +2165,8 @@
 	if (err)
 		goto err_out_disable_mbx_intr;
 
-	qlcnic_set_drv_version(adapter);
+	if (adapter->portnum == 0)
+		qlcnic_set_drv_version(adapter);
 
 	pci_set_drvdata(pdev, adapter);
 
@@ -3081,7 +3086,8 @@
 	adapter->fw_fail_cnt = 0;
 	adapter->flags &= ~QLCNIC_FW_HANG;
 	clear_bit(__QLCNIC_RESETTING, &adapter->state);
-	qlcnic_set_drv_version(adapter);
+	if (adapter->portnum == 0)
+		qlcnic_set_drv_version(adapter);
 
 	if (!qlcnic_clr_drv_state(adapter))
 		qlcnic_schedule_work(adapter, qlcnic_fw_poll_work,
@@ -3093,6 +3099,7 @@
 {
 	u32 state = 0, heartbeat;
 	u32 peg_status;
+	int err = 0;
 
 	if (qlcnic_check_temp(adapter))
 		goto detach;
@@ -3139,11 +3146,11 @@
 			"PEG_NET_4_PC: 0x%x\n",
 			peg_status,
 			QLC_SHARED_REG_RD32(adapter, QLCNIC_PEG_HALT_STATUS2),
-			QLCRD32(adapter, QLCNIC_CRB_PEG_NET_0 + 0x3c),
-			QLCRD32(adapter, QLCNIC_CRB_PEG_NET_1 + 0x3c),
-			QLCRD32(adapter, QLCNIC_CRB_PEG_NET_2 + 0x3c),
-			QLCRD32(adapter, QLCNIC_CRB_PEG_NET_3 + 0x3c),
-			QLCRD32(adapter, QLCNIC_CRB_PEG_NET_4 + 0x3c));
+			QLCRD32(adapter, QLCNIC_CRB_PEG_NET_0 + 0x3c, &err),
+			QLCRD32(adapter, QLCNIC_CRB_PEG_NET_1 + 0x3c, &err),
+			QLCRD32(adapter, QLCNIC_CRB_PEG_NET_2 + 0x3c, &err),
+			QLCRD32(adapter, QLCNIC_CRB_PEG_NET_3 + 0x3c, &err),
+			QLCRD32(adapter, QLCNIC_CRB_PEG_NET_4 + 0x3c, &err));
 	if (QLCNIC_FWERROR_CODE(peg_status) == 0x67)
 		dev_err(&adapter->pdev->dev,
 			"Firmware aborted with error code 0x00006700. "
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c
index ab8a674..79e54ef 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c
@@ -1084,7 +1084,7 @@
 	tmpl_hdr = ahw->fw_dump.tmpl_hdr;
 	tmpl_hdr->drv_cap_mask = QLCNIC_DUMP_MASK_DEF;
 
-	if ((tmpl_hdr->version & 0xffffff) >= 0x20001)
+	if ((tmpl_hdr->version & 0xfffff) >= 0x20001)
 		ahw->fw_dump.use_pex_dma = true;
 	else
 		ahw->fw_dump.use_pex_dma = false;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
index 62380ce..5d40045 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
@@ -562,7 +562,7 @@
 	INIT_LIST_HEAD(&adapter->vf_mc_list);
 	if (!qlcnic_use_msi_x && !!qlcnic_use_msi)
 		dev_warn(&adapter->pdev->dev,
-			 "83xx adapter do not support MSI interrupts\n");
+			 "Device does not support MSI interrupts\n");
 
 	err = qlcnic_setup_intr(adapter, 1);
 	if (err) {
@@ -762,6 +762,7 @@
 			memset(mbx->rsp.arg, 0, sizeof(u32) * mbx->rsp.num);
 			mbx->req.arg[0] = (type | (mbx->req.num << 16) |
 					   (3 << 29));
+			mbx->rsp.arg[0] = (type & 0xffff) | mbx->rsp.num << 16;
 			return 0;
 		}
 	}
@@ -813,6 +814,7 @@
 		cmd->req.num = trans->req_pay_size / 4;
 		cmd->rsp.num = trans->rsp_pay_size / 4;
 		hdr = trans->rsp_hdr;
+		cmd->op_type = trans->req_hdr->op_type;
 	}
 
 	trans->trans_id = seq;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
index ee0c1d3..eb49cd6 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c
@@ -635,12 +635,12 @@
 					   struct qlcnic_cmd_args *cmd)
 {
 	struct qlcnic_vf_info *vf = trans->vf;
-	struct qlcnic_adapter *adapter = vf->adapter;
-	int err;
+	struct qlcnic_vport *vp = vf->vp;
+	struct qlcnic_adapter *adapter;
 	u16 func = vf->pci_func;
+	int err;
 
-	cmd->rsp.arg[0] = trans->req_hdr->cmd_op;
-	cmd->rsp.arg[0] |= (1 << 16);
+	adapter = vf->adapter;
 
 	if (trans->req_hdr->cmd_op == QLCNIC_BC_CMD_CHANNEL_INIT) {
 		err = qlcnic_sriov_pf_config_vport(adapter, 1, func);
@@ -650,6 +650,8 @@
 				qlcnic_sriov_pf_config_vport(adapter, 0, func);
 		}
 	} else {
+		if (vp->vlan_mode == QLC_GUEST_VLAN_MODE)
+			vp->vlan = 0;
 		err = qlcnic_sriov_pf_config_vport(adapter, 0, func);
 	}
 
@@ -1183,7 +1185,7 @@
 	u8 cmd_op, mode = vp->vlan_mode;
 
 	cmd_op = trans->req_hdr->cmd_op;
-	cmd->rsp.arg[0] = (cmd_op & 0xffff) | 14 << 16 | 1 << 25;
+	cmd->rsp.arg[0] |= 1 << 25;
 
 	switch (mode) {
 	case QLC_GUEST_VLAN_MODE:
@@ -1561,6 +1563,7 @@
 				struct qlcnic_vf_info *vf)
 {
 	struct net_device *dev = vf->adapter->netdev;
+	struct qlcnic_vport *vp = vf->vp;
 
 	if (!test_and_clear_bit(QLC_BC_VF_STATE, &vf->state)) {
 		clear_bit(QLC_BC_VF_FLR, &vf->state);
@@ -1573,6 +1576,9 @@
 		return;
 	}
 
+	if (vp->vlan_mode == QLC_GUEST_VLAN_MODE)
+		vp->vlan = 0;
+
 	qlcnic_sriov_schedule_flr(sriov, vf, qlcnic_sriov_pf_process_flr);
 	netdev_info(dev, "FLR received for PCI func %d\n", vf->pci_func);
 }
@@ -1621,13 +1627,15 @@
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 	struct qlcnic_sriov *sriov = adapter->ahw->sriov;
-	int i, num_vfs = sriov->num_vfs;
+	int i, num_vfs;
 	struct qlcnic_vf_info *vf_info;
 	u8 *curr_mac;
 
 	if (!qlcnic_sriov_pf_check(adapter))
 		return -EOPNOTSUPP;
 
+	num_vfs = sriov->num_vfs;
+
 	if (!is_valid_ether_addr(mac) || vf >= num_vfs)
 		return -EINVAL;
 
@@ -1741,6 +1749,7 @@
 
 	switch (vlan) {
 	case 4095:
+		vp->vlan = 0;
 		vp->vlan_mode = QLC_GUEST_VLAN_MODE;
 		break;
 	case 0:
@@ -1759,6 +1768,29 @@
 	return 0;
 }
 
+static inline __u32 qlcnic_sriov_get_vf_vlan(struct qlcnic_adapter *adapter,
+					     struct qlcnic_vport *vp, int vf)
+{
+	__u32 vlan = 0;
+
+	switch (vp->vlan_mode) {
+	case QLC_PVID_MODE:
+		vlan = vp->vlan;
+		break;
+	case QLC_GUEST_VLAN_MODE:
+		vlan = MAX_VLAN_ID;
+		break;
+	case QLC_NO_VLAN_MODE:
+		vlan = 0;
+		break;
+	default:
+		netdev_info(adapter->netdev, "Invalid VLAN mode = %d for VF %d\n",
+			    vp->vlan_mode, vf);
+	}
+
+	return vlan;
+}
+
 int qlcnic_sriov_get_vf_config(struct net_device *netdev,
 			       int vf, struct ifla_vf_info *ivi)
 {
@@ -1774,7 +1806,7 @@
 
 	vp = sriov->vf_info[vf].vp;
 	memcpy(&ivi->mac, vp->mac, ETH_ALEN);
-	ivi->vlan = vp->vlan;
+	ivi->vlan = qlcnic_sriov_get_vf_vlan(adapter, vp, vf);
 	ivi->qos = vp->qos;
 	ivi->spoofchk = vp->spoofchk;
 	if (vp->max_tx_bw == MAX_BW)
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
index 10ed82b..660c3f5 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
@@ -170,9 +170,9 @@
 
 	if (ahw->extra_capability[0] & QLCNIC_FW_CAPABILITY_2_BEACON) {
 		err = qlcnic_get_beacon_state(adapter, &h_beacon_state);
-		if (!err) {
-			dev_info(&adapter->pdev->dev,
-				 "Failed to get current beacon state\n");
+		if (err) {
+			netdev_err(adapter->netdev,
+				   "Failed to get current beacon state\n");
 		} else {
 			if (h_beacon_state == QLCNIC_BEACON_DISABLE)
 				ahw->beacon_state = 0;
diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c
index e6acb9f..d2e5919 100644
--- a/drivers/net/ethernet/realtek/8139cp.c
+++ b/drivers/net/ethernet/realtek/8139cp.c
@@ -478,7 +478,7 @@
 
 	while (1) {
 		u32 status, len;
-		dma_addr_t mapping;
+		dma_addr_t mapping, new_mapping;
 		struct sk_buff *skb, *new_skb;
 		struct cp_desc *desc;
 		const unsigned buflen = cp->rx_buf_sz;
@@ -520,6 +520,14 @@
 			goto rx_next;
 		}
 
+		new_mapping = dma_map_single(&cp->pdev->dev, new_skb->data, buflen,
+					 PCI_DMA_FROMDEVICE);
+		if (dma_mapping_error(&cp->pdev->dev, new_mapping)) {
+			dev->stats.rx_dropped++;
+			kfree_skb(new_skb);
+			goto rx_next;
+		}
+
 		dma_unmap_single(&cp->pdev->dev, mapping,
 				 buflen, PCI_DMA_FROMDEVICE);
 
@@ -531,12 +539,11 @@
 
 		skb_put(skb, len);
 
-		mapping = dma_map_single(&cp->pdev->dev, new_skb->data, buflen,
-					 PCI_DMA_FROMDEVICE);
 		cp->rx_skb[rx_tail] = new_skb;
 
 		cp_rx_skb(cp, skb, desc);
 		rx++;
+		mapping = new_mapping;
 
 rx_next:
 		cp->rx_ring[rx_tail].opts2 = 0;
@@ -716,6 +723,22 @@
 		TxVlanTag | swab16(vlan_tx_tag_get(skb)) : 0x00;
 }
 
+static void unwind_tx_frag_mapping(struct cp_private *cp, struct sk_buff *skb,
+				   int first, int entry_last)
+{
+	int frag, index;
+	struct cp_desc *txd;
+	skb_frag_t *this_frag;
+	for (frag = 0; frag+first < entry_last; frag++) {
+		index = first+frag;
+		cp->tx_skb[index] = NULL;
+		txd = &cp->tx_ring[index];
+		this_frag = &skb_shinfo(skb)->frags[frag];
+		dma_unmap_single(&cp->pdev->dev, le64_to_cpu(txd->addr),
+				 skb_frag_size(this_frag), PCI_DMA_TODEVICE);
+	}
+}
+
 static netdev_tx_t cp_start_xmit (struct sk_buff *skb,
 					struct net_device *dev)
 {
@@ -749,6 +772,9 @@
 
 		len = skb->len;
 		mapping = dma_map_single(&cp->pdev->dev, skb->data, len, PCI_DMA_TODEVICE);
+		if (dma_mapping_error(&cp->pdev->dev, mapping))
+			goto out_dma_error;
+
 		txd->opts2 = opts2;
 		txd->addr = cpu_to_le64(mapping);
 		wmb();
@@ -786,6 +812,9 @@
 		first_len = skb_headlen(skb);
 		first_mapping = dma_map_single(&cp->pdev->dev, skb->data,
 					       first_len, PCI_DMA_TODEVICE);
+		if (dma_mapping_error(&cp->pdev->dev, first_mapping))
+			goto out_dma_error;
+
 		cp->tx_skb[entry] = skb;
 		entry = NEXT_TX(entry);
 
@@ -799,6 +828,11 @@
 			mapping = dma_map_single(&cp->pdev->dev,
 						 skb_frag_address(this_frag),
 						 len, PCI_DMA_TODEVICE);
+			if (dma_mapping_error(&cp->pdev->dev, mapping)) {
+				unwind_tx_frag_mapping(cp, skb, first_entry, entry);
+				goto out_dma_error;
+			}
+
 			eor = (entry == (CP_TX_RING_SIZE - 1)) ? RingEnd : 0;
 
 			ctrl = eor | len | DescOwn;
@@ -859,11 +893,16 @@
 	if (TX_BUFFS_AVAIL(cp) <= (MAX_SKB_FRAGS + 1))
 		netif_stop_queue(dev);
 
+out_unlock:
 	spin_unlock_irqrestore(&cp->lock, intr_flags);
 
 	cpw8(TxPoll, NormalTxPoll);
 
 	return NETDEV_TX_OK;
+out_dma_error:
+	kfree_skb(skb);
+	cp->dev->stats.tx_dropped++;
+	goto out_unlock;
 }
 
 /* Set or clear the multicast filter for this adaptor.
@@ -1054,6 +1093,10 @@
 
 		mapping = dma_map_single(&cp->pdev->dev, skb->data,
 					 cp->rx_buf_sz, PCI_DMA_FROMDEVICE);
+		if (dma_mapping_error(&cp->pdev->dev, mapping)) {
+			kfree_skb(skb);
+			goto err_out;
+		}
 		cp->rx_skb[i] = skb;
 
 		cp->rx_ring[i].opts2 = 0;
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 4106a74..85e5c97 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -3689,7 +3689,7 @@
 	if (tp->link_ok(ioaddr))
 		return;
 
-	netif_warn(tp, link, tp->dev, "PHY reset until link up\n");
+	netif_dbg(tp, link, tp->dev, "PHY reset until link up\n");
 
 	tp->phy_reset_enable(tp);
 
@@ -6468,6 +6468,8 @@
 	rtl8169_down(dev);
 	rtl_unlock_work(tp);
 
+	cancel_work_sync(&tp->wk.work);
+
 	free_irq(pdev->irq, dev);
 
 	dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray,
@@ -6793,8 +6795,6 @@
 		rtl8168_driver_stop(tp);
 	}
 
-	cancel_work_sync(&tp->wk.work);
-
 	netif_napi_del(&tp->napi);
 
 	unregister_netdev(dev);
@@ -7088,7 +7088,7 @@
 
 	RTL_W8(Cfg9346, Cfg9346_Unlock);
 	RTL_W8(Config1, RTL_R8(Config1) | PMEnable);
-	RTL_W8(Config5, RTL_R8(Config5) & PMEStatus);
+	RTL_W8(Config5, RTL_R8(Config5) & (BWF | MWF | UWF | LanWake | PMEStatus));
 	if ((RTL_R8(Config3) & (LinkUp | MagicPacket)) != 0)
 		tp->features |= RTL_FEATURE_WOL;
 	if ((RTL_R8(Config5) & (UWF | BWF | MWF)) != 0)
diff --git a/drivers/net/ethernet/sfc/filter.c b/drivers/net/ethernet/sfc/filter.c
index b74a60a..30d7442 100644
--- a/drivers/net/ethernet/sfc/filter.c
+++ b/drivers/net/ethernet/sfc/filter.c
@@ -675,7 +675,7 @@
 		BUILD_BUG_ON(EFX_FILTER_INDEX_UC_DEF != 0);
 		BUILD_BUG_ON(EFX_FILTER_INDEX_MC_DEF !=
 			     EFX_FILTER_MC_DEF - EFX_FILTER_UC_DEF);
-		rep_index = spec->type - EFX_FILTER_INDEX_UC_DEF;
+		rep_index = spec->type - EFX_FILTER_UC_DEF;
 		ins_index = rep_index;
 
 		spin_lock_bh(&state->lock);
@@ -1209,7 +1209,9 @@
 	EFX_BUG_ON_PARANOID(skb_headlen(skb) < nhoff + 4 * ip->ihl + 4);
 	ports = (const __be16 *)(skb->data + nhoff + 4 * ip->ihl);
 
-	efx_filter_init_rx(&spec, EFX_FILTER_PRI_HINT, 0, rxq_index);
+	efx_filter_init_rx(&spec, EFX_FILTER_PRI_HINT,
+			   efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0,
+			   rxq_index);
 	rc = efx_filter_set_ipv4_full(&spec, ip->protocol,
 				      ip->daddr, ports[1], ip->saddr, ports[0]);
 	if (rc)
diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c
index eb4aea3..f5d7ad7 100644
--- a/drivers/net/ethernet/sis/sis900.c
+++ b/drivers/net/ethernet/sis/sis900.c
@@ -1318,7 +1318,7 @@
 		if (duplex){
 			sis900_set_mode(sis_priv, speed, duplex);
 			sis630_set_eq(net_dev, sis_priv->chipset_rev);
-			netif_start_queue(net_dev);
+			netif_carrier_on(net_dev);
 		}
 
 		sis_priv->timer.expires = jiffies + HZ;
@@ -1336,10 +1336,8 @@
 		status = sis900_default_phy(net_dev);
 		mii_phy = sis_priv->mii;
 
-		if (status & MII_STAT_LINK){
+		if (status & MII_STAT_LINK)
 			sis900_check_mode(net_dev, mii_phy);
-			netif_carrier_on(net_dev);
-		}
 	} else {
 	/* Link ON -> OFF */
                 if (!(status & MII_STAT_LINK)){
@@ -1612,12 +1610,6 @@
 	unsigned int  index_cur_tx, index_dirty_tx;
 	unsigned int  count_dirty_tx;
 
-	/* Don't transmit data before the complete of auto-negotiation */
-	if(!sis_priv->autong_complete){
-		netif_stop_queue(net_dev);
-		return NETDEV_TX_BUSY;
-	}
-
 	spin_lock_irqsave(&sis_priv->lock, flags);
 
 	/* Calculate the next Tx descriptor entry. */
diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
index c9d942a..1ef9d8a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
+++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
@@ -33,10 +33,15 @@
 	struct stmmac_priv *priv = (struct stmmac_priv *)p;
 	unsigned int txsize = priv->dma_tx_size;
 	unsigned int entry = priv->cur_tx % txsize;
-	struct dma_desc *desc = priv->dma_tx + entry;
+	struct dma_desc *desc;
 	unsigned int nopaged_len = skb_headlen(skb);
 	unsigned int bmax, len;
 
+	if (priv->extend_desc)
+		desc = (struct dma_desc *)(priv->dma_etx + entry);
+	else
+		desc = priv->dma_tx + entry;
+
 	if (priv->plat->enh_desc)
 		bmax = BUF_SIZE_8KiB;
 	else
@@ -54,7 +59,11 @@
 						STMMAC_RING_MODE);
 		wmb();
 		entry = (++priv->cur_tx) % txsize;
-		desc = priv->dma_tx + entry;
+
+		if (priv->extend_desc)
+			desc = (struct dma_desc *)(priv->dma_etx + entry);
+		else
+			desc = priv->dma_tx + entry;
 
 		desc->des2 = dma_map_single(priv->device, skb->data + bmax,
 					    len, DMA_TO_DEVICE);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index f2ccb36..0a9bb9d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -939,15 +939,20 @@
 
 	skb = __netdev_alloc_skb(priv->dev, priv->dma_buf_sz + NET_IP_ALIGN,
 				 GFP_KERNEL);
-	if (unlikely(skb == NULL)) {
+	if (!skb) {
 		pr_err("%s: Rx init fails; skb is NULL\n", __func__);
-		return 1;
+		return -ENOMEM;
 	}
 	skb_reserve(skb, NET_IP_ALIGN);
 	priv->rx_skbuff[i] = skb;
 	priv->rx_skbuff_dma[i] = dma_map_single(priv->device, skb->data,
 						priv->dma_buf_sz,
 						DMA_FROM_DEVICE);
+	if (dma_mapping_error(priv->device, priv->rx_skbuff_dma[i])) {
+		pr_err("%s: DMA mapping error\n", __func__);
+		dev_kfree_skb_any(skb);
+		return -EINVAL;
+	}
 
 	p->des2 = priv->rx_skbuff_dma[i];
 
@@ -958,6 +963,16 @@
 	return 0;
 }
 
+static void stmmac_free_rx_buffers(struct stmmac_priv *priv, int i)
+{
+	if (priv->rx_skbuff[i]) {
+		dma_unmap_single(priv->device, priv->rx_skbuff_dma[i],
+				 priv->dma_buf_sz, DMA_FROM_DEVICE);
+		dev_kfree_skb_any(priv->rx_skbuff[i]);
+	}
+	priv->rx_skbuff[i] = NULL;
+}
+
 /**
  * init_dma_desc_rings - init the RX/TX descriptor rings
  * @dev: net device structure
@@ -965,13 +980,14 @@
  * and allocates the socket buffers. It suppors the chained and ring
  * modes.
  */
-static void init_dma_desc_rings(struct net_device *dev)
+static int init_dma_desc_rings(struct net_device *dev)
 {
 	int i;
 	struct stmmac_priv *priv = netdev_priv(dev);
 	unsigned int txsize = priv->dma_tx_size;
 	unsigned int rxsize = priv->dma_rx_size;
 	unsigned int bfsize = 0;
+	int ret = -ENOMEM;
 
 	/* Set the max buffer size according to the DESC mode
 	 * and the MTU. Note that RING mode allows 16KiB bsize.
@@ -992,34 +1008,60 @@
 							  dma_extended_desc),
 						   &priv->dma_rx_phy,
 						   GFP_KERNEL);
+		if (!priv->dma_erx)
+			goto err_dma;
+
 		priv->dma_etx = dma_alloc_coherent(priv->device, txsize *
 						   sizeof(struct
 							  dma_extended_desc),
 						   &priv->dma_tx_phy,
 						   GFP_KERNEL);
-		if ((!priv->dma_erx) || (!priv->dma_etx))
-			return;
+		if (!priv->dma_etx) {
+			dma_free_coherent(priv->device, priv->dma_rx_size *
+					sizeof(struct dma_extended_desc),
+					priv->dma_erx, priv->dma_rx_phy);
+			goto err_dma;
+		}
 	} else {
 		priv->dma_rx = dma_alloc_coherent(priv->device, rxsize *
 						  sizeof(struct dma_desc),
 						  &priv->dma_rx_phy,
 						  GFP_KERNEL);
+		if (!priv->dma_rx)
+			goto err_dma;
+
 		priv->dma_tx = dma_alloc_coherent(priv->device, txsize *
 						  sizeof(struct dma_desc),
 						  &priv->dma_tx_phy,
 						  GFP_KERNEL);
-		if ((!priv->dma_rx) || (!priv->dma_tx))
-			return;
+		if (!priv->dma_tx) {
+			dma_free_coherent(priv->device, priv->dma_rx_size *
+					sizeof(struct dma_desc),
+					priv->dma_rx, priv->dma_rx_phy);
+			goto err_dma;
+		}
 	}
 
 	priv->rx_skbuff_dma = kmalloc_array(rxsize, sizeof(dma_addr_t),
 					    GFP_KERNEL);
+	if (!priv->rx_skbuff_dma)
+		goto err_rx_skbuff_dma;
+
 	priv->rx_skbuff = kmalloc_array(rxsize, sizeof(struct sk_buff *),
 					GFP_KERNEL);
+	if (!priv->rx_skbuff)
+		goto err_rx_skbuff;
+
 	priv->tx_skbuff_dma = kmalloc_array(txsize, sizeof(dma_addr_t),
 					    GFP_KERNEL);
+	if (!priv->tx_skbuff_dma)
+		goto err_tx_skbuff_dma;
+
 	priv->tx_skbuff = kmalloc_array(txsize, sizeof(struct sk_buff *),
 					GFP_KERNEL);
+	if (!priv->tx_skbuff)
+		goto err_tx_skbuff;
+
 	if (netif_msg_probe(priv)) {
 		pr_debug("(%s) dma_rx_phy=0x%08x dma_tx_phy=0x%08x\n", __func__,
 			 (u32) priv->dma_rx_phy, (u32) priv->dma_tx_phy);
@@ -1034,8 +1076,9 @@
 		else
 			p = priv->dma_rx + i;
 
-		if (stmmac_init_rx_buffers(priv, p, i))
-			break;
+		ret = stmmac_init_rx_buffers(priv, p, i);
+		if (ret)
+			goto err_init_rx_buffers;
 
 		if (netif_msg_probe(priv))
 			pr_debug("[%p]\t[%p]\t[%x]\n", priv->rx_skbuff[i],
@@ -1081,20 +1124,44 @@
 
 	if (netif_msg_hw(priv))
 		stmmac_display_rings(priv);
+
+	return 0;
+err_init_rx_buffers:
+	while (--i >= 0)
+		stmmac_free_rx_buffers(priv, i);
+	kfree(priv->tx_skbuff);
+err_tx_skbuff:
+	kfree(priv->tx_skbuff_dma);
+err_tx_skbuff_dma:
+	kfree(priv->rx_skbuff);
+err_rx_skbuff:
+	kfree(priv->rx_skbuff_dma);
+err_rx_skbuff_dma:
+	if (priv->extend_desc) {
+		dma_free_coherent(priv->device, priv->dma_tx_size *
+				  sizeof(struct dma_extended_desc),
+				  priv->dma_etx, priv->dma_tx_phy);
+		dma_free_coherent(priv->device, priv->dma_rx_size *
+				  sizeof(struct dma_extended_desc),
+				  priv->dma_erx, priv->dma_rx_phy);
+	} else {
+		dma_free_coherent(priv->device,
+				priv->dma_tx_size * sizeof(struct dma_desc),
+				priv->dma_tx, priv->dma_tx_phy);
+		dma_free_coherent(priv->device,
+				priv->dma_rx_size * sizeof(struct dma_desc),
+				priv->dma_rx, priv->dma_rx_phy);
+	}
+err_dma:
+	return ret;
 }
 
 static void dma_free_rx_skbufs(struct stmmac_priv *priv)
 {
 	int i;
 
-	for (i = 0; i < priv->dma_rx_size; i++) {
-		if (priv->rx_skbuff[i]) {
-			dma_unmap_single(priv->device, priv->rx_skbuff_dma[i],
-					 priv->dma_buf_sz, DMA_FROM_DEVICE);
-			dev_kfree_skb_any(priv->rx_skbuff[i]);
-		}
-		priv->rx_skbuff[i] = NULL;
-	}
+	for (i = 0; i < priv->dma_rx_size; i++)
+		stmmac_free_rx_buffers(priv, i);
 }
 
 static void dma_free_tx_skbufs(struct stmmac_priv *priv)
@@ -1560,12 +1627,17 @@
 	priv->dma_tx_size = STMMAC_ALIGN(dma_txsize);
 	priv->dma_rx_size = STMMAC_ALIGN(dma_rxsize);
 	priv->dma_buf_sz = STMMAC_ALIGN(buf_sz);
-	init_dma_desc_rings(dev);
+
+	ret = init_dma_desc_rings(dev);
+	if (ret < 0) {
+		pr_err("%s: DMA descriptors initialization failed\n", __func__);
+		goto dma_desc_error;
+	}
 
 	/* DMA initialization and SW reset */
 	ret = stmmac_init_dma_engine(priv);
 	if (ret < 0) {
-		pr_err("%s: DMA initialization failed\n", __func__);
+		pr_err("%s: DMA engine initialization failed\n", __func__);
 		goto init_error;
 	}
 
@@ -1672,6 +1744,7 @@
 
 init_error:
 	free_dma_desc_resources(priv);
+dma_desc_error:
 	if (priv->phydev)
 		phy_disconnect(priv->phydev);
 phy_error:
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 05a1674..22a7a43 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1867,7 +1867,7 @@
 
 	while ((res = platform_get_resource(priv->pdev, IORESOURCE_IRQ, k))) {
 		for (i = res->start; i <= res->end; i++) {
-			if (request_irq(i, cpsw_interrupt, IRQF_DISABLED,
+			if (request_irq(i, cpsw_interrupt, 0,
 					dev_name(&pdev->dev), priv)) {
 				dev_err(priv->dev, "error attaching irq\n");
 				goto clean_ale_ret;
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index 07b176b..1a222bce 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -1568,8 +1568,7 @@
 	while ((res = platform_get_resource(priv->pdev, IORESOURCE_IRQ, k))) {
 		for (i = res->start; i <= res->end; i++) {
 			if (devm_request_irq(&priv->pdev->dev, i, emac_irq,
-					     IRQF_DISABLED,
-					     ndev->name, ndev))
+					     0, ndev->name, ndev))
 				goto rollback;
 		}
 		k++;
diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c
index 1d6dc41..d01cacf 100644
--- a/drivers/net/ethernet/via/via-velocity.c
+++ b/drivers/net/ethernet/via/via-velocity.c
@@ -2100,7 +2100,7 @@
 
 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
 	}
-	netif_rx(skb);
+	netif_receive_skb(skb);
 
 	stats->rx_bytes += pkt_len;
 	stats->rx_packets++;
@@ -2884,6 +2884,7 @@
 	return ret;
 
 err_iounmap:
+	netif_napi_del(&vptr->napi);
 	iounmap(regs);
 err_free_dev:
 	free_netdev(netdev);
@@ -2904,6 +2905,7 @@
 	struct velocity_info *vptr = netdev_priv(netdev);
 
 	unregister_netdev(netdev);
+	netif_napi_del(&vptr->napi);
 	iounmap(vptr->mac_regs);
 	free_netdev(netdev);
 	velocity_nics--;
diff --git a/drivers/net/irda/via-ircc.c b/drivers/net/irda/via-ircc.c
index 51f2bc3..2dcc60f 100644
--- a/drivers/net/irda/via-ircc.c
+++ b/drivers/net/irda/via-ircc.c
@@ -210,8 +210,7 @@
 			pci_write_config_byte(pcidev,0x42,(bTmp | 0xf0));
 			pci_write_config_byte(pcidev,0x5a,0xc0);
 			WriteLPCReg(0x28, 0x70 );
-			if (via_ircc_open(pcidev, &info, 0x3076) == 0)
-				rc=0;
+			rc = via_ircc_open(pcidev, &info, 0x3076);
 		} else
 			rc = -ENODEV; //IR not turn on	 
 	} else { //Not VT1211
@@ -249,8 +248,7 @@
 			info.irq=FirIRQ;
 			info.dma=FirDRQ1;
 			info.dma2=FirDRQ0;
-			if (via_ircc_open(pcidev, &info, 0x3096) == 0)
-				rc=0;
+			rc = via_ircc_open(pcidev, &info, 0x3096);
 		} else
 			rc = -ENODEV; //IR not turn on !!!!!
 	}//Not VT1211
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 18373b6..16b43bf 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -337,8 +337,11 @@
 	int err;
 
 	if (vlan->port->passthru) {
-		if (!(vlan->flags & MACVLAN_FLAG_NOPROMISC))
-			dev_set_promiscuity(lowerdev, 1);
+		if (!(vlan->flags & MACVLAN_FLAG_NOPROMISC)) {
+			err = dev_set_promiscuity(lowerdev, 1);
+			if (err < 0)
+				goto out;
+		}
 		goto hash_add;
 	}
 
@@ -736,6 +739,10 @@
 			return -EADDRNOTAVAIL;
 	}
 
+	if (data && data[IFLA_MACVLAN_FLAGS] &&
+	    nla_get_u16(data[IFLA_MACVLAN_FLAGS]) & ~MACVLAN_FLAG_NOPROMISC)
+		return -EINVAL;
+
 	if (data && data[IFLA_MACVLAN_MODE]) {
 		switch (nla_get_u32(data[IFLA_MACVLAN_MODE])) {
 		case MACVLAN_MODE_PRIVATE:
@@ -863,6 +870,18 @@
 		struct nlattr *tb[], struct nlattr *data[])
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
+	enum macvlan_mode mode;
+	bool set_mode = false;
+
+	/* Validate mode, but don't set yet: setting flags may fail. */
+	if (data && data[IFLA_MACVLAN_MODE]) {
+		set_mode = true;
+		mode = nla_get_u32(data[IFLA_MACVLAN_MODE]);
+		/* Passthrough mode can't be set or cleared dynamically */
+		if ((mode == MACVLAN_MODE_PASSTHRU) !=
+		    (vlan->mode == MACVLAN_MODE_PASSTHRU))
+			return -EINVAL;
+	}
 
 	if (data && data[IFLA_MACVLAN_FLAGS]) {
 		__u16 flags = nla_get_u16(data[IFLA_MACVLAN_FLAGS]);
@@ -879,8 +898,8 @@
 		}
 		vlan->flags = flags;
 	}
-	if (data && data[IFLA_MACVLAN_MODE])
-		vlan->mode = nla_get_u32(data[IFLA_MACVLAN_MODE]);
+	if (set_mode)
+		vlan->mode = mode;
 	return 0;
 }
 
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index a98fb0e..ea53abb 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -68,6 +68,8 @@
 #define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \
 		      NETIF_F_TSO6 | NETIF_F_UFO)
 #define RX_OFFLOADS (NETIF_F_GRO | NETIF_F_LRO)
+#define TAP_FEATURES (NETIF_F_GSO | NETIF_F_SG)
+
 /*
  * RCU usage:
  * The macvtap_queue and the macvlan_dev are loosely coupled, the
@@ -278,7 +280,8 @@
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	struct macvtap_queue *q = macvtap_get_queue(dev, skb);
-	netdev_features_t features;
+	netdev_features_t features = TAP_FEATURES;
+
 	if (!q)
 		goto drop;
 
@@ -287,9 +290,11 @@
 
 	skb->dev = dev;
 	/* Apply the forward feature mask so that we perform segmentation
-	 * according to users wishes.
+	 * according to users wishes.  This only works if VNET_HDR is
+	 * enabled.
 	 */
-	features = netif_skb_features(skb) & vlan->tap_features;
+	if (q->flags & IFF_VNET_HDR)
+		features |= vlan->tap_features;
 	if (netif_needs_gso(skb, features)) {
 		struct sk_buff *segs = __skb_gso_segment(skb, features, false);
 
@@ -818,10 +823,13 @@
 		skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
 		skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
 	}
-	if (vlan)
+	if (vlan) {
+		local_bh_disable();
 		macvlan_start_xmit(skb, vlan->dev);
-	else
+		local_bh_enable();
+	} else {
 		kfree_skb(skb);
+	}
 	rcu_read_unlock();
 
 	return total_len;
@@ -912,8 +920,11 @@
 done:
 	rcu_read_lock();
 	vlan = rcu_dereference(q->vlan);
-	if (vlan)
+	if (vlan) {
+		preempt_disable();
 		macvlan_count_rx(vlan, copied - vnet_hdr_len, ret == 0, 0);
+		preempt_enable();
+	}
 	rcu_read_unlock();
 
 	return ret ? ret : copied;
@@ -1058,8 +1069,7 @@
 	/* tap_features are the same as features on tun/tap and
 	 * reflect user expectations.
 	 */
-	vlan->tap_features = vlan->dev->features &
-			    (feature_mask | ~TUN_OFFLOADS);
+	vlan->tap_features = feature_mask;
 	vlan->set_features = features;
 	netdev_update_features(vlan->dev);
 
@@ -1155,10 +1165,6 @@
 			    TUN_F_TSO_ECN | TUN_F_UFO))
 			return -EINVAL;
 
-		/* TODO: only accept frames with the features that
-			 got enabled for forwarded frames */
-		if (!(q->flags & IFF_VNET_HDR))
-			return  -EINVAL;
 		rtnl_lock();
 		ret = set_offload(q, arg);
 		rtnl_unlock();
diff --git a/drivers/net/phy/mdio-sun4i.c b/drivers/net/phy/mdio-sun4i.c
index 61d3f4e..7f25e49 100644
--- a/drivers/net/phy/mdio-sun4i.c
+++ b/drivers/net/phy/mdio-sun4i.c
@@ -40,7 +40,7 @@
 static int sun4i_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
 {
 	struct sun4i_mdio_data *data = bus->priv;
-	unsigned long start_jiffies;
+	unsigned long timeout_jiffies;
 	int value;
 
 	/* issue the phy address and reg */
@@ -49,10 +49,9 @@
 	writel(0x1, data->membase + EMAC_MAC_MCMD_REG);
 
 	/* Wait read complete */
-	start_jiffies = jiffies;
+	timeout_jiffies = jiffies + MDIO_TIMEOUT;
 	while (readl(data->membase + EMAC_MAC_MIND_REG) & 0x1) {
-		if (time_after(start_jiffies,
-			       start_jiffies + MDIO_TIMEOUT))
+		if (time_is_before_jiffies(timeout_jiffies))
 			return -ETIMEDOUT;
 		msleep(1);
 	}
@@ -69,7 +68,7 @@
 			    u16 value)
 {
 	struct sun4i_mdio_data *data = bus->priv;
-	unsigned long start_jiffies;
+	unsigned long timeout_jiffies;
 
 	/* issue the phy address and reg */
 	writel((mii_id << 8) | regnum, data->membase + EMAC_MAC_MADR_REG);
@@ -77,10 +76,9 @@
 	writel(0x1, data->membase + EMAC_MAC_MCMD_REG);
 
 	/* Wait read complete */
-	start_jiffies = jiffies;
+	timeout_jiffies = jiffies + MDIO_TIMEOUT;
 	while (readl(data->membase + EMAC_MAC_MIND_REG) & 0x1) {
-		if (time_after(start_jiffies,
-			       start_jiffies + MDIO_TIMEOUT))
+		if (time_is_before_jiffies(timeout_jiffies))
 			return -ETIMEDOUT;
 		msleep(1);
 	}
diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c
index 8e7af83..138de83 100644
--- a/drivers/net/phy/realtek.c
+++ b/drivers/net/phy/realtek.c
@@ -23,7 +23,7 @@
 #define RTL821x_INER_INIT	0x6400
 #define RTL821x_INSR		0x13
 
-#define	RTL8211E_INER_LINK_STAT	0x10
+#define	RTL8211E_INER_LINK_STATUS	0x400
 
 MODULE_DESCRIPTION("Realtek PHY driver");
 MODULE_AUTHOR("Johnson Leung");
@@ -57,7 +57,7 @@
 
 	if (phydev->interrupts == PHY_INTERRUPT_ENABLED)
 		err = phy_write(phydev, RTL821x_INER,
-				RTL8211E_INER_LINK_STAT);
+				RTL8211E_INER_LINK_STATUS);
 	else
 		err = phy_write(phydev, RTL821x_INER, 0);
 
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index db690a3..71af122 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1074,8 +1074,9 @@
 	u32 rxhash;
 
 	if (!(tun->flags & TUN_NO_PI)) {
-		if ((len -= sizeof(pi)) > total_len)
+		if (len < sizeof(pi))
 			return -EINVAL;
+		len -= sizeof(pi);
 
 		if (memcpy_fromiovecend((void *)&pi, iv, 0, sizeof(pi)))
 			return -EFAULT;
@@ -1083,8 +1084,9 @@
 	}
 
 	if (tun->flags & TUN_VNET_HDR) {
-		if ((len -= tun->vnet_hdr_sz) > total_len)
+		if (len < tun->vnet_hdr_sz)
 			return -EINVAL;
+		len -= tun->vnet_hdr_sz;
 
 		if (memcpy_fromiovecend((void *)&gso, iv, offset, sizeof(gso)))
 			return -EFAULT;
diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index 1e3c302..2bc87e3 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -1029,10 +1029,10 @@
 	dev->mii.supports_gmii = 1;
 
 	dev->net->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
-			      NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_TSO;
+			      NETIF_F_RXCSUM;
 
 	dev->net->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
-				 NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_TSO;
+				 NETIF_F_RXCSUM;
 
 	/* Enable checksum offload */
 	*tmp = AX_RXCOE_IP | AX_RXCOE_TCP | AX_RXCOE_UDP |
@@ -1173,7 +1173,6 @@
 	if (((skb->len + 8) % frame_size) == 0)
 		tx_hdr2 |= 0x80008000;	/* Enable padding */
 
-	skb_linearize(skb);
 	headroom = skb_headroom(skb);
 	tailroom = skb_tailroom(skb);
 
@@ -1317,10 +1316,10 @@
 			  1, 1, tmp);
 
 	dev->net->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
-			      NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_TSO;
+			      NETIF_F_RXCSUM;
 
 	dev->net->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
-				 NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_TSO;
+				 NETIF_F_RXCSUM;
 
 	/* Enable checksum offload */
 	*tmp = AX_RXCOE_IP | AX_RXCOE_TCP | AX_RXCOE_UDP |
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index cba1d46..86292e6 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -2816,13 +2816,16 @@
 static int hso_get_config_data(struct usb_interface *interface)
 {
 	struct usb_device *usbdev = interface_to_usbdev(interface);
-	u8 config_data[17];
+	u8 *config_data = kmalloc(17, GFP_KERNEL);
 	u32 if_num = interface->altsetting->desc.bInterfaceNumber;
 	s32 result;
 
+	if (!config_data)
+		return -ENOMEM;
 	if (usb_control_msg(usbdev, usb_rcvctrlpipe(usbdev, 0),
 			    0x86, 0xC0, 0, 0, config_data, 17,
 			    USB_CTRL_SET_TIMEOUT) != 0x11) {
+		kfree(config_data);
 		return -EIO;
 	}
 
@@ -2873,6 +2876,7 @@
 	if (config_data[16] & 0x1)
 		result |= HSO_INFO_CRC_BUG;
 
+	kfree(config_data);
 	return result;
 }
 
@@ -2886,6 +2890,11 @@
 	struct hso_shared_int *shared_int;
 	struct hso_device *tmp_dev = NULL;
 
+	if (interface->cur_altsetting->desc.bInterfaceClass != 0xFF) {
+		dev_err(&interface->dev, "Not our interface\n");
+		return -ENODEV;
+	}
+
 	if_num = interface->altsetting->desc.bInterfaceNumber;
 
 	/* Get the interface/port specification from either driver_info or from
@@ -2895,10 +2904,6 @@
 	else
 		port_spec = hso_get_config_data(interface);
 
-	if (interface->cur_altsetting->desc.bInterfaceClass != 0xFF) {
-		dev_err(&interface->dev, "Not our interface\n");
-		return -ENODEV;
-	}
 	/* Check if we need to switch to alt interfaces prior to port
 	 * configuration */
 	if (interface->num_altsetting > 1)
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index ee13f9e..11c51f2 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -344,17 +344,41 @@
 static
 int get_registers(struct r8152 *tp, u16 value, u16 index, u16 size, void *data)
 {
-	return usb_control_msg(tp->udev, usb_rcvctrlpipe(tp->udev, 0),
+	int ret;
+	void *tmp;
+
+	tmp = kmalloc(size, GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	ret = usb_control_msg(tp->udev, usb_rcvctrlpipe(tp->udev, 0),
 			       RTL8152_REQ_GET_REGS, RTL8152_REQT_READ,
-			       value, index, data, size, 500);
+			       value, index, tmp, size, 500);
+
+	memcpy(data, tmp, size);
+	kfree(tmp);
+
+	return ret;
 }
 
 static
 int set_registers(struct r8152 *tp, u16 value, u16 index, u16 size, void *data)
 {
-	return usb_control_msg(tp->udev, usb_sndctrlpipe(tp->udev, 0),
+	int ret;
+	void *tmp;
+
+	tmp = kmalloc(size, GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	memcpy(tmp, data, size);
+
+	ret = usb_control_msg(tp->udev, usb_sndctrlpipe(tp->udev, 0),
 			       RTL8152_REQ_SET_REGS, RTL8152_REQT_WRITE,
-			       value, index, data, size, 500);
+			       value, index, tmp, size, 500);
+
+	kfree(tmp);
+	return ret;
 }
 
 static int generic_ocp_read(struct r8152 *tp, u16 index, u16 size,
@@ -490,37 +514,31 @@
 
 static u32 ocp_read_dword(struct r8152 *tp, u16 type, u16 index)
 {
-	u32 data;
+	__le32 data;
 
-	if (type == MCU_TYPE_PLA)
-		pla_ocp_read(tp, index, sizeof(data), &data);
-	else
-		usb_ocp_read(tp, index, sizeof(data), &data);
+	generic_ocp_read(tp, index, sizeof(data), &data, type);
 
 	return __le32_to_cpu(data);
 }
 
 static void ocp_write_dword(struct r8152 *tp, u16 type, u16 index, u32 data)
 {
-	if (type == MCU_TYPE_PLA)
-		pla_ocp_write(tp, index, BYTE_EN_DWORD, sizeof(data), &data);
-	else
-		usb_ocp_write(tp, index, BYTE_EN_DWORD, sizeof(data), &data);
+	__le32 tmp = __cpu_to_le32(data);
+
+	generic_ocp_write(tp, index, BYTE_EN_DWORD, sizeof(tmp), &tmp, type);
 }
 
 static u16 ocp_read_word(struct r8152 *tp, u16 type, u16 index)
 {
 	u32 data;
+	__le32 tmp;
 	u8 shift = index & 2;
 
 	index &= ~3;
 
-	if (type == MCU_TYPE_PLA)
-		pla_ocp_read(tp, index, sizeof(data), &data);
-	else
-		usb_ocp_read(tp, index, sizeof(data), &data);
+	generic_ocp_read(tp, index, sizeof(tmp), &tmp, type);
 
-	data = __le32_to_cpu(data);
+	data = __le32_to_cpu(tmp);
 	data >>= (shift * 8);
 	data &= 0xffff;
 
@@ -529,7 +547,8 @@
 
 static void ocp_write_word(struct r8152 *tp, u16 type, u16 index, u32 data)
 {
-	u32 tmp, mask = 0xffff;
+	u32 mask = 0xffff;
+	__le32 tmp;
 	u16 byen = BYTE_EN_WORD;
 	u8 shift = index & 2;
 
@@ -542,34 +561,25 @@
 		index &= ~3;
 	}
 
-	if (type == MCU_TYPE_PLA)
-		pla_ocp_read(tp, index, sizeof(tmp), &tmp);
-	else
-		usb_ocp_read(tp, index, sizeof(tmp), &tmp);
+	generic_ocp_read(tp, index, sizeof(tmp), &tmp, type);
 
-	tmp = __le32_to_cpu(tmp) & ~mask;
-	tmp |= data;
-	tmp = __cpu_to_le32(tmp);
+	data |= __le32_to_cpu(tmp) & ~mask;
+	tmp = __cpu_to_le32(data);
 
-	if (type == MCU_TYPE_PLA)
-		pla_ocp_write(tp, index, byen, sizeof(tmp), &tmp);
-	else
-		usb_ocp_write(tp, index, byen, sizeof(tmp), &tmp);
+	generic_ocp_write(tp, index, byen, sizeof(tmp), &tmp, type);
 }
 
 static u8 ocp_read_byte(struct r8152 *tp, u16 type, u16 index)
 {
 	u32 data;
+	__le32 tmp;
 	u8 shift = index & 3;
 
 	index &= ~3;
 
-	if (type == MCU_TYPE_PLA)
-		pla_ocp_read(tp, index, sizeof(data), &data);
-	else
-		usb_ocp_read(tp, index, sizeof(data), &data);
+	generic_ocp_read(tp, index, sizeof(tmp), &tmp, type);
 
-	data = __le32_to_cpu(data);
+	data = __le32_to_cpu(tmp);
 	data >>= (shift * 8);
 	data &= 0xff;
 
@@ -578,7 +588,8 @@
 
 static void ocp_write_byte(struct r8152 *tp, u16 type, u16 index, u32 data)
 {
-	u32 tmp, mask = 0xff;
+	u32 mask = 0xff;
+	__le32 tmp;
 	u16 byen = BYTE_EN_BYTE;
 	u8 shift = index & 3;
 
@@ -591,19 +602,12 @@
 		index &= ~3;
 	}
 
-	if (type == MCU_TYPE_PLA)
-		pla_ocp_read(tp, index, sizeof(tmp), &tmp);
-	else
-		usb_ocp_read(tp, index, sizeof(tmp), &tmp);
+	generic_ocp_read(tp, index, sizeof(tmp), &tmp, type);
 
-	tmp = __le32_to_cpu(tmp) & ~mask;
-	tmp |= data;
-	tmp = __cpu_to_le32(tmp);
+	data |= __le32_to_cpu(tmp) & ~mask;
+	tmp = __cpu_to_le32(data);
 
-	if (type == MCU_TYPE_PLA)
-		pla_ocp_write(tp, index, byen, sizeof(tmp), &tmp);
-	else
-		usb_ocp_write(tp, index, byen, sizeof(tmp), &tmp);
+	generic_ocp_write(tp, index, byen, sizeof(tmp), &tmp, type);
 }
 
 static void r8152_mdio_write(struct r8152 *tp, u32 reg_addr, u32 value)
@@ -685,21 +689,14 @@
 static inline void set_ethernet_addr(struct r8152 *tp)
 {
 	struct net_device *dev = tp->netdev;
-	u8 *node_id;
+	u8 node_id[8] = {0};
 
-	node_id = kmalloc(sizeof(u8) * 8, GFP_KERNEL);
-	if (!node_id) {
-		netif_err(tp, probe, dev, "out of memory");
-		return;
-	}
-
-	if (pla_ocp_read(tp, PLA_IDR, sizeof(u8) * 8, node_id) < 0)
+	if (pla_ocp_read(tp, PLA_IDR, sizeof(node_id), node_id) < 0)
 		netif_notice(tp, probe, dev, "inet addr fail\n");
 	else {
 		memcpy(dev->dev_addr, node_id, dev->addr_len);
 		memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
 	}
-	kfree(node_id);
 }
 
 static int rtl8152_set_mac_address(struct net_device *netdev, void *p)
@@ -882,15 +879,10 @@
 static void _rtl8152_set_rx_mode(struct net_device *netdev)
 {
 	struct r8152 *tp = netdev_priv(netdev);
-	u32 tmp, *mc_filter;	/* Multicast hash filter */
+	u32 mc_filter[2];	/* Multicast hash filter */
+	__le32 tmp[2];
 	u32 ocp_data;
 
-	mc_filter = kmalloc(sizeof(u32) * 2, GFP_KERNEL);
-	if (!mc_filter) {
-		netif_err(tp, link, netdev, "out of memory");
-		return;
-	}
-
 	clear_bit(RTL8152_SET_RX_MODE, &tp->flags);
 	netif_stop_queue(netdev);
 	ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR);
@@ -918,14 +910,12 @@
 		}
 	}
 
-	tmp = mc_filter[0];
-	mc_filter[0] = __cpu_to_le32(swab32(mc_filter[1]));
-	mc_filter[1] = __cpu_to_le32(swab32(tmp));
+	tmp[0] = __cpu_to_le32(swab32(mc_filter[1]));
+	tmp[1] = __cpu_to_le32(swab32(mc_filter[0]));
 
-	pla_ocp_write(tp, PLA_MAR, BYTE_EN_DWORD, sizeof(u32) * 2, mc_filter);
+	pla_ocp_write(tp, PLA_MAR, BYTE_EN_DWORD, sizeof(tmp), tmp);
 	ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data);
 	netif_wake_queue(netdev);
-	kfree(mc_filter);
 }
 
 static netdev_tx_t rtl8152_start_xmit(struct sk_buff *skb,
diff --git a/drivers/net/usb/r815x.c b/drivers/net/usb/r815x.c
index 8523922..2df2f4f 100644
--- a/drivers/net/usb/r815x.c
+++ b/drivers/net/usb/r815x.c
@@ -24,34 +24,43 @@
 
 static int pla_read_word(struct usb_device *udev, u16 index)
 {
-	int data, ret;
+	int ret;
 	u8 shift = index & 2;
-	__le32 ocp_data;
+	__le32 *tmp;
+
+	tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
 
 	index &= ~3;
 
 	ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
 			      RTL815x_REQ_GET_REGS, RTL815x_REQT_READ,
-			      index, MCU_TYPE_PLA, &ocp_data, sizeof(ocp_data),
-			      500);
+			      index, MCU_TYPE_PLA, tmp, sizeof(*tmp), 500);
 	if (ret < 0)
-		return ret;
+		goto out2;
 
-	data = __le32_to_cpu(ocp_data);
-	data >>= (shift * 8);
-	data &= 0xffff;
+	ret = __le32_to_cpu(*tmp);
+	ret >>= (shift * 8);
+	ret &= 0xffff;
 
-	return data;
+out2:
+	kfree(tmp);
+	return ret;
 }
 
 static int pla_write_word(struct usb_device *udev, u16 index, u32 data)
 {
-	__le32 ocp_data;
+	__le32 *tmp;
 	u32 mask = 0xffff;
 	u16 byen = BYTE_EN_WORD;
 	u8 shift = index & 2;
 	int ret;
 
+	tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
 	data &= mask;
 
 	if (shift) {
@@ -63,19 +72,20 @@
 
 	ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0),
 			      RTL815x_REQ_GET_REGS, RTL815x_REQT_READ,
-			      index, MCU_TYPE_PLA, &ocp_data, sizeof(ocp_data),
-			      500);
+			      index, MCU_TYPE_PLA, tmp, sizeof(*tmp), 500);
 	if (ret < 0)
-		return ret;
+		goto out3;
 
-	data |= __le32_to_cpu(ocp_data) & ~mask;
-	ocp_data = __cpu_to_le32(data);
+	data |= __le32_to_cpu(*tmp) & ~mask;
+	*tmp = __cpu_to_le32(data);
 
 	ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0),
 			      RTL815x_REQ_SET_REGS, RTL815x_REQT_WRITE,
-			      index, MCU_TYPE_PLA | byen, &ocp_data,
-			      sizeof(ocp_data), 500);
+			      index, MCU_TYPE_PLA | byen, tmp, sizeof(*tmp),
+			      500);
 
+out3:
+	kfree(tmp);
 	return ret;
 }
 
@@ -116,11 +126,18 @@
 static int r815x_mdio_read(struct net_device *netdev, int phy_id, int reg)
 {
 	struct usbnet *dev = netdev_priv(netdev);
+	int ret;
 
 	if (phy_id != R815x_PHY_ID)
 		return -EINVAL;
 
-	return ocp_reg_read(dev, BASE_MII + reg * 2);
+	if (usb_autopm_get_interface(dev->intf) < 0)
+		return -ENODEV;
+
+	ret = ocp_reg_read(dev, BASE_MII + reg * 2);
+
+	usb_autopm_put_interface(dev->intf);
+	return ret;
 }
 
 static
@@ -131,7 +148,12 @@
 	if (phy_id != R815x_PHY_ID)
 		return;
 
+	if (usb_autopm_get_interface(dev->intf) < 0)
+		return;
+
 	ocp_reg_write(dev, BASE_MII + reg * 2, val);
+
+	usb_autopm_put_interface(dev->intf);
 }
 
 static int r8153_bind(struct usbnet *dev, struct usb_interface *intf)
@@ -150,7 +172,7 @@
 	dev->mii.phy_id = R815x_PHY_ID;
 	dev->mii.supports_gmii = 1;
 
-	return 0;
+	return status;
 }
 
 static int r8152_bind(struct usbnet *dev, struct usb_interface *intf)
@@ -169,7 +191,7 @@
 	dev->mii.phy_id = R815x_PHY_ID;
 	dev->mii.supports_gmii = 0;
 
-	return 0;
+	return status;
 }
 
 static const struct driver_info r8152_info = {
diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index 75409748c..66ebbac 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -45,7 +45,6 @@
 #define EEPROM_MAC_OFFSET		(0x01)
 #define DEFAULT_TX_CSUM_ENABLE		(true)
 #define DEFAULT_RX_CSUM_ENABLE		(true)
-#define DEFAULT_TSO_ENABLE		(true)
 #define SMSC75XX_INTERNAL_PHY_ID	(1)
 #define SMSC75XX_TX_OVERHEAD		(8)
 #define MAX_RX_FIFO_SIZE		(20 * 1024)
@@ -1410,17 +1409,14 @@
 
 	INIT_WORK(&pdata->set_multicast, smsc75xx_deferred_multicast_write);
 
-	if (DEFAULT_TX_CSUM_ENABLE) {
+	if (DEFAULT_TX_CSUM_ENABLE)
 		dev->net->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
-		if (DEFAULT_TSO_ENABLE)
-			dev->net->features |= NETIF_F_SG |
-				NETIF_F_TSO | NETIF_F_TSO6;
-	}
+
 	if (DEFAULT_RX_CSUM_ENABLE)
 		dev->net->features |= NETIF_F_RXCSUM;
 
 	dev->net->hw_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
-		NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_RXCSUM;
+				NETIF_F_RXCSUM;
 
 	ret = smsc75xx_wait_ready(dev, 0);
 	if (ret < 0) {
@@ -2200,8 +2196,6 @@
 {
 	u32 tx_cmd_a, tx_cmd_b;
 
-	skb_linearize(skb);
-
 	if (skb_headroom(skb) < SMSC75XX_TX_OVERHEAD) {
 		struct sk_buff *skb2 =
 			skb_copy_expand(skb, SMSC75XX_TX_OVERHEAD, 0, flags);
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index da86652..eee1f19 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -269,6 +269,7 @@
 	dev->ethtool_ops = &veth_ethtool_ops;
 	dev->features |= NETIF_F_LLTX;
 	dev->features |= VETH_FEATURES;
+	dev->vlan_features = dev->features;
 	dev->destructor = veth_dev_free;
 
 	dev->hw_features = VETH_FEATURES;
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index a5ba8dd..767f7af 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -136,7 +136,8 @@
 	u32		  flags;	/* VXLAN_F_* below */
 
 	struct work_struct sock_work;
-	struct work_struct igmp_work;
+	struct work_struct igmp_join;
+	struct work_struct igmp_leave;
 
 	unsigned long	  age_interval;
 	struct timer_list age_timer;
@@ -736,7 +737,6 @@
 	return false;
 }
 
-
 /* See if multicast group is already in use by other ID */
 static bool vxlan_group_used(struct vxlan_net *vn, __be32 remote_ip)
 {
@@ -770,12 +770,13 @@
 	queue_work(vxlan_wq, &vs->del_work);
 }
 
-/* Callback to update multicast group membership.
- * Scheduled when vxlan goes up/down.
+/* Callback to update multicast group membership when first VNI on
+ * multicast asddress is brought up
+ * Done as workqueue because ip_mc_join_group acquires RTNL.
  */
-static void vxlan_igmp_work(struct work_struct *work)
+static void vxlan_igmp_join(struct work_struct *work)
 {
-	struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_work);
+	struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_join);
 	struct vxlan_net *vn = net_generic(dev_net(vxlan->dev), vxlan_net_id);
 	struct vxlan_sock *vs = vxlan->vn_sock;
 	struct sock *sk = vs->sock->sk;
@@ -785,10 +786,27 @@
 	};
 
 	lock_sock(sk);
-	if (vxlan_group_used(vn, vxlan->default_dst.remote_ip))
-		ip_mc_join_group(sk, &mreq);
-	else
-		ip_mc_leave_group(sk, &mreq);
+	ip_mc_join_group(sk, &mreq);
+	release_sock(sk);
+
+	vxlan_sock_release(vn, vs);
+	dev_put(vxlan->dev);
+}
+
+/* Inverse of vxlan_igmp_join when last VNI is brought down */
+static void vxlan_igmp_leave(struct work_struct *work)
+{
+	struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_leave);
+	struct vxlan_net *vn = net_generic(dev_net(vxlan->dev), vxlan_net_id);
+	struct vxlan_sock *vs = vxlan->vn_sock;
+	struct sock *sk = vs->sock->sk;
+	struct ip_mreqn mreq = {
+		.imr_multiaddr.s_addr	= vxlan->default_dst.remote_ip,
+		.imr_ifindex		= vxlan->default_dst.remote_ifindex,
+	};
+
+	lock_sock(sk);
+	ip_mc_leave_group(sk, &mreq);
 	release_sock(sk);
 
 	vxlan_sock_release(vn, vs);
@@ -1359,6 +1377,7 @@
 /* Start ageing timer and join group when device is brought up */
 static int vxlan_open(struct net_device *dev)
 {
+	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	struct vxlan_sock *vs = vxlan->vn_sock;
 
@@ -1366,10 +1385,11 @@
 	if (!vs)
 		return -ENOTCONN;
 
-	if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) {
+	if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip)) &&
+	    vxlan_group_used(vn, vxlan->default_dst.remote_ip)) {
 		vxlan_sock_hold(vs);
 		dev_hold(dev);
-		queue_work(vxlan_wq, &vxlan->igmp_work);
+		queue_work(vxlan_wq, &vxlan->igmp_join);
 	}
 
 	if (vxlan->age_interval)
@@ -1400,13 +1420,15 @@
 /* Cleanup timer and forwarding table on shutdown */
 static int vxlan_stop(struct net_device *dev)
 {
+	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	struct vxlan_sock *vs = vxlan->vn_sock;
 
-	if (vs && IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) {
+	if (vs && IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip)) &&
+	    ! vxlan_group_used(vn, vxlan->default_dst.remote_ip)) {
 		vxlan_sock_hold(vs);
 		dev_hold(dev);
-		queue_work(vxlan_wq, &vxlan->igmp_work);
+		queue_work(vxlan_wq, &vxlan->igmp_leave);
 	}
 
 	del_timer_sync(&vxlan->age_timer);
@@ -1471,7 +1493,8 @@
 
 	INIT_LIST_HEAD(&vxlan->next);
 	spin_lock_init(&vxlan->hash_lock);
-	INIT_WORK(&vxlan->igmp_work, vxlan_igmp_work);
+	INIT_WORK(&vxlan->igmp_join, vxlan_igmp_join);
+	INIT_WORK(&vxlan->igmp_leave, vxlan_igmp_leave);
 	INIT_WORK(&vxlan->sock_work, vxlan_sock_work);
 
 	init_timer_deferrable(&vxlan->age_timer);
@@ -1770,8 +1793,6 @@
 	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 
-	flush_workqueue(vxlan_wq);
-
 	spin_lock(&vn->sock_lock);
 	hlist_del_rcu(&vxlan->hlist);
 	spin_unlock(&vn->sock_lock);
@@ -1878,10 +1899,12 @@
 {
 	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
 	struct vxlan_dev *vxlan;
+	LIST_HEAD(list);
 
 	rtnl_lock();
 	list_for_each_entry(vxlan, &vn->vxlan_list, next)
-		dev_close(vxlan->dev);
+		unregister_netdevice_queue(vxlan->dev, &list);
+	unregister_netdevice_many(&list);
 	rtnl_unlock();
 }
 
diff --git a/drivers/net/wireless/ath/ath10k/Kconfig b/drivers/net/wireless/ath/ath10k/Kconfig
index cde58fe..82e8088 100644
--- a/drivers/net/wireless/ath/ath10k/Kconfig
+++ b/drivers/net/wireless/ath/ath10k/Kconfig
@@ -1,6 +1,6 @@
 config ATH10K
         tristate "Atheros 802.11ac wireless cards support"
-        depends on MAC80211
+        depends on MAC80211 && HAS_DMA
 	select ATH_COMMON
         ---help---
           This module adds support for wireless adapters based on
diff --git a/drivers/net/wireless/ath/ath5k/mac80211-ops.c b/drivers/net/wireless/ath/ath5k/mac80211-ops.c
index 81b686c..40825d4 100644
--- a/drivers/net/wireless/ath/ath5k/mac80211-ops.c
+++ b/drivers/net/wireless/ath/ath5k/mac80211-ops.c
@@ -325,7 +325,7 @@
 	struct netdev_hw_addr *ha;
 
 	mfilt[0] = 0;
-	mfilt[1] = 1;
+	mfilt[1] = 0;
 
 	netdev_hw_addr_list_for_each(ha, mc_list) {
 		/* calculate XOR of eight 6-bit values */
diff --git a/drivers/net/wireless/ath/ath9k/ar5008_phy.c b/drivers/net/wireless/ath/ath9k/ar5008_phy.c
index d1acfe9..1576d58 100644
--- a/drivers/net/wireless/ath/ath9k/ar5008_phy.c
+++ b/drivers/net/wireless/ath/ath9k/ar5008_phy.c
@@ -610,7 +610,15 @@
 	REG_SET_BIT(ah, AR_DIAG_SW, (AR_DIAG_RX_DIS | AR_DIAG_RX_ABORT));
 
 	if (AR_SREV_9280_20_OR_LATER(ah)) {
-		val = REG_READ(ah, AR_PCU_MISC_MODE2);
+		/*
+		 * For AR9280 and above, there is a new feature that allows
+		 * Multicast search based on both MAC Address and Key ID.
+		 * By default, this feature is enabled. But since the driver
+		 * is not using this feature, we switch it off; otherwise
+		 * multicast search based on MAC addr only will fail.
+		 */
+		val = REG_READ(ah, AR_PCU_MISC_MODE2) &
+			(~AR_ADHOC_MCAST_KEYID_ENABLE);
 
 		if (!AR_SREV_9271(ah))
 			val &= ~AR_PCU_MISC_MODE2_HWWAR1;
diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c
index 9e582e1..5205a36 100644
--- a/drivers/net/wireless/ath/ath9k/hif_usb.c
+++ b/drivers/net/wireless/ath/ath9k/hif_usb.c
@@ -1082,7 +1082,7 @@
 	struct device *dev = &hif_dev->udev->dev;
 	struct device *parent = dev->parent;
 
-	complete(&hif_dev->fw_done);
+	complete_all(&hif_dev->fw_done);
 
 	if (parent)
 		device_lock(parent);
@@ -1131,7 +1131,7 @@
 
 	release_firmware(fw);
 	hif_dev->flags |= HIF_USB_READY;
-	complete(&hif_dev->fw_done);
+	complete_all(&hif_dev->fw_done);
 
 	return;
 
@@ -1295,7 +1295,9 @@
 
 	usb_set_intfdata(interface, NULL);
 
-	if (!unplugged && (hif_dev->flags & HIF_USB_START))
+	/* If firmware was loaded we should drop it
+	 * go back to first stage bootloader. */
+	if (!unplugged && (hif_dev->flags & HIF_USB_READY))
 		ath9k_hif_usb_reboot(udev);
 
 	kfree(hif_dev);
@@ -1316,7 +1318,10 @@
 	if (!(hif_dev->flags & HIF_USB_START))
 		ath9k_htc_suspend(hif_dev->htc_handle);
 
-	ath9k_hif_usb_dealloc_urbs(hif_dev);
+	wait_for_completion(&hif_dev->fw_done);
+
+	if (hif_dev->flags & HIF_USB_READY)
+		ath9k_hif_usb_dealloc_urbs(hif_dev);
 
 	return 0;
 }
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_init.c b/drivers/net/wireless/ath/ath9k/htc_drv_init.c
index 71a183f..c3676bf 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_init.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_init.c
@@ -861,6 +861,7 @@
 	if (error != 0)
 		goto err_rx;
 
+	ath9k_hw_disable(priv->ah);
 #ifdef CONFIG_MAC80211_LEDS
 	/* must be initialized before ieee80211_register_hw */
 	priv->led_cdev.default_trigger = ieee80211_create_tpt_led_trigger(priv->hw,
diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c
index c59ae43..9279927 100644
--- a/drivers/net/wireless/ath/ath9k/xmit.c
+++ b/drivers/net/wireless/ath/ath9k/xmit.c
@@ -146,6 +146,28 @@
 			       ARRAY_SIZE(bf->rates));
 }
 
+static void ath_txq_skb_done(struct ath_softc *sc, struct ath_txq *txq,
+			     struct sk_buff *skb)
+{
+	int q;
+
+	q = skb_get_queue_mapping(skb);
+	if (txq == sc->tx.uapsdq)
+		txq = sc->tx.txq_map[q];
+
+	if (txq != sc->tx.txq_map[q])
+		return;
+
+	if (WARN_ON(--txq->pending_frames < 0))
+		txq->pending_frames = 0;
+
+	if (txq->stopped &&
+	    txq->pending_frames < sc->tx.txq_max_pending[q]) {
+		ieee80211_wake_queue(sc->hw, q);
+		txq->stopped = false;
+	}
+}
+
 static void ath_tx_flush_tid(struct ath_softc *sc, struct ath_atx_tid *tid)
 {
 	struct ath_txq *txq = tid->ac->txq;
@@ -167,6 +189,7 @@
 		if (!bf) {
 			bf = ath_tx_setup_buffer(sc, txq, tid, skb);
 			if (!bf) {
+				ath_txq_skb_done(sc, txq, skb);
 				ieee80211_free_txskb(sc->hw, skb);
 				continue;
 			}
@@ -811,6 +834,7 @@
 
 		if (!bf) {
 			__skb_unlink(skb, &tid->buf_q);
+			ath_txq_skb_done(sc, txq, skb);
 			ieee80211_free_txskb(sc->hw, skb);
 			continue;
 		}
@@ -1824,6 +1848,7 @@
 
 	bf = ath_tx_setup_buffer(sc, txq, tid, skb);
 	if (!bf) {
+		ath_txq_skb_done(sc, txq, skb);
 		ieee80211_free_txskb(sc->hw, skb);
 		return;
 	}
@@ -2090,6 +2115,7 @@
 
 	bf = ath_tx_setup_buffer(sc, txq, tid, skb);
 	if (!bf) {
+		ath_txq_skb_done(sc, txq, skb);
 		if (txctl->paprd)
 			dev_kfree_skb_any(skb);
 		else
@@ -2189,7 +2215,7 @@
 	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
 	struct ath_common *common = ath9k_hw_common(sc->sc_ah);
 	struct ieee80211_hdr * hdr = (struct ieee80211_hdr *)skb->data;
-	int q, padpos, padsize;
+	int padpos, padsize;
 	unsigned long flags;
 
 	ath_dbg(common, XMIT, "TX complete: skb: %p\n", skb);
@@ -2225,21 +2251,7 @@
 	spin_unlock_irqrestore(&sc->sc_pm_lock, flags);
 
 	__skb_queue_tail(&txq->complete_q, skb);
-
-	q = skb_get_queue_mapping(skb);
-	if (txq == sc->tx.uapsdq)
-		txq = sc->tx.txq_map[q];
-
-	if (txq == sc->tx.txq_map[q]) {
-		if (WARN_ON(--txq->pending_frames < 0))
-			txq->pending_frames = 0;
-
-		if (txq->stopped &&
-		    txq->pending_frames < sc->tx.txq_max_pending[q]) {
-			ieee80211_wake_queue(sc->hw, q);
-			txq->stopped = false;
-		}
-	}
+	ath_txq_skb_done(sc, txq, skb);
 }
 
 static void ath_tx_complete_buf(struct ath_softc *sc, struct ath_buf *bf,
diff --git a/drivers/net/wireless/ath/wil6210/debugfs.c b/drivers/net/wireless/ath/wil6210/debugfs.c
index e8308ec..ab63676 100644
--- a/drivers/net/wireless/ath/wil6210/debugfs.c
+++ b/drivers/net/wireless/ath/wil6210/debugfs.c
@@ -145,7 +145,7 @@
 				   le16_to_cpu(hdr.type), hdr.flags);
 			if (len <= MAX_MBOXITEM_SIZE) {
 				int n = 0;
-				unsigned char printbuf[16 * 3 + 2];
+				char printbuf[16 * 3 + 2];
 				unsigned char databuf[MAX_MBOXITEM_SIZE];
 				void __iomem *src = wmi_buffer(wil, d.addr) +
 					sizeof(struct wil6210_mbox_hdr);
@@ -416,7 +416,7 @@
 		seq_printf(s, "  SKB = %p\n", skb);
 
 		if (skb) {
-			unsigned char printbuf[16 * 3 + 2];
+			char printbuf[16 * 3 + 2];
 			int i = 0;
 			int len = le16_to_cpu(d->dma.length);
 			void *p = skb->data;
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/dhd_linux.c b/drivers/net/wireless/brcm80211/brcmfmac/dhd_linux.c
index 8e89755..8009901 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/dhd_linux.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/dhd_linux.c
@@ -242,7 +242,7 @@
 {
 	unsigned long flags;
 
-	if (!ifp)
+	if (!ifp || !ifp->ndev)
 		return;
 
 	brcmf_dbg(TRACE, "enter: idx=%d stop=0x%X reason=%d state=%d\n",
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/fwsignal.c b/drivers/net/wireless/brcm80211/brcmfmac/fwsignal.c
index f0d9f7f..29b1f24 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/fwsignal.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/fwsignal.c
@@ -1744,13 +1744,14 @@
 	ulong flags;
 	int fifo = BRCMF_FWS_FIFO_BCMC;
 	bool multicast = is_multicast_ether_addr(eh->h_dest);
+	bool pae = eh->h_proto == htons(ETH_P_PAE);
 
 	/* determine the priority */
 	if (!skb->priority)
 		skb->priority = cfg80211_classify8021d(skb);
 
 	drvr->tx_multicast += !!multicast;
-	if (ntohs(eh->h_proto) == ETH_P_PAE)
+	if (pae)
 		atomic_inc(&ifp->pend_8021x_cnt);
 
 	if (!brcmf_fws_fc_active(fws)) {
@@ -1781,6 +1782,11 @@
 		brcmf_fws_schedule_deq(fws);
 	} else {
 		brcmf_err("drop skb: no hanger slot\n");
+		if (pae) {
+			atomic_dec(&ifp->pend_8021x_cnt);
+			if (waitqueue_active(&ifp->pend_8021x_wait))
+				wake_up(&ifp->pend_8021x_wait);
+		}
 		brcmu_pkt_buf_free_skb(skb);
 	}
 	brcmf_fws_unlock(drvr, flags);
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c
index 277b37a..7fa71f7 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c
@@ -1093,8 +1093,11 @@
 		brcmf_dbg(INFO, "Call WLC_DISASSOC to stop excess roaming\n ");
 		err = brcmf_fil_cmd_data_set(vif->ifp,
 					     BRCMF_C_DISASSOC, NULL, 0);
-		if (err)
+		if (err) {
 			brcmf_err("WLC_DISASSOC failed (%d)\n", err);
+			cfg80211_disconnected(vif->wdev.netdev, 0,
+					      NULL, 0, GFP_KERNEL);
+		}
 		clear_bit(BRCMF_VIF_STATUS_CONNECTED, &vif->sme_state);
 	}
 	clear_bit(BRCMF_VIF_STATUS_CONNECTING, &vif->sme_state);
diff --git a/drivers/net/wireless/cw1200/sta.c b/drivers/net/wireless/cw1200/sta.c
index 7365674..010b252 100644
--- a/drivers/net/wireless/cw1200/sta.c
+++ b/drivers/net/wireless/cw1200/sta.c
@@ -1406,11 +1406,8 @@
 	if (!priv->join_status)
 		goto done;
 
-	if (priv->join_status > CW1200_JOIN_STATUS_IBSS) {
-		wiphy_err(priv->hw->wiphy, "Unexpected: join status: %d\n",
-			  priv->join_status);
-		BUG_ON(1);
-	}
+	if (priv->join_status == CW1200_JOIN_STATUS_AP)
+		goto done;
 
 	cancel_work_sync(&priv->update_filtering_work);
 	cancel_work_sync(&priv->set_beacon_wakeup_period_work);
diff --git a/drivers/net/wireless/cw1200/txrx.c b/drivers/net/wireless/cw1200/txrx.c
index 5862c37..e824d4d 100644
--- a/drivers/net/wireless/cw1200/txrx.c
+++ b/drivers/net/wireless/cw1200/txrx.c
@@ -1165,7 +1165,7 @@
 		if (cw1200_handle_action_rx(priv, skb))
 			return;
 	} else if (ieee80211_is_beacon(frame->frame_control) &&
-		   !arg->status &&
+		   !arg->status && priv->vif &&
 		   !memcmp(ieee80211_get_SA(frame), priv->vif->bss_conf.bssid,
 			   ETH_ALEN)) {
 		const u8 *tim_ie;
diff --git a/drivers/net/wireless/hostap/hostap_ioctl.c b/drivers/net/wireless/hostap/hostap_ioctl.c
index ac07473..e509030 100644
--- a/drivers/net/wireless/hostap/hostap_ioctl.c
+++ b/drivers/net/wireless/hostap/hostap_ioctl.c
@@ -523,9 +523,9 @@
 
 	data->length = prism2_ap_get_sta_qual(local, addr, qual, IW_MAX_AP, 1);
 
-	memcpy(extra, &addr, sizeof(struct sockaddr) * data->length);
+	memcpy(extra, addr, sizeof(struct sockaddr) * data->length);
 	data->flags = 1; /* has quality information */
-	memcpy(extra + sizeof(struct sockaddr) * data->length, &qual,
+	memcpy(extra + sizeof(struct sockaddr) * data->length, qual,
 	       sizeof(struct iw_quality) * data->length);
 
 	kfree(addr);
diff --git a/drivers/net/wireless/iwlegacy/4965-mac.c b/drivers/net/wireless/iwlegacy/4965-mac.c
index b9b2bb5..f2ed62e 100644
--- a/drivers/net/wireless/iwlegacy/4965-mac.c
+++ b/drivers/net/wireless/iwlegacy/4965-mac.c
@@ -4460,12 +4460,12 @@
 		 * is killed. Hence update the killswitch state here. The
 		 * rfkill handler will care about restarting if needed.
 		 */
-		if (!test_bit(S_ALIVE, &il->status)) {
-			if (hw_rf_kill)
-				set_bit(S_RFKILL, &il->status);
-			else
-				clear_bit(S_RFKILL, &il->status);
+		if (hw_rf_kill) {
+			set_bit(S_RFKILL, &il->status);
+		} else {
+			clear_bit(S_RFKILL, &il->status);
 			wiphy_rfkill_set_hw_state(il->hw->wiphy, hw_rf_kill);
+			il_force_reset(il, true);
 		}
 
 		handled |= CSR_INT_BIT_RF_KILL;
@@ -5334,6 +5334,9 @@
 
 	il->active_rate = RATES_MASK;
 
+	il_power_update_mode(il, true);
+	D_INFO("Updated power mode\n");
+
 	if (il_is_associated(il)) {
 		struct il_rxon_cmd *active_rxon =
 		    (struct il_rxon_cmd *)&il->active;
@@ -5364,9 +5367,6 @@
 	D_INFO("ALIVE processing complete.\n");
 	wake_up(&il->wait_command_queue);
 
-	il_power_update_mode(il, true);
-	D_INFO("Updated power mode\n");
-
 	return;
 
 restart:
diff --git a/drivers/net/wireless/iwlegacy/common.c b/drivers/net/wireless/iwlegacy/common.c
index 3195aad..b03e22e 100644
--- a/drivers/net/wireless/iwlegacy/common.c
+++ b/drivers/net/wireless/iwlegacy/common.c
@@ -4660,6 +4660,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL(il_force_reset);
 
 int
 il_mac_change_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
diff --git a/drivers/net/wireless/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/iwlwifi/dvm/mac80211.c
index 822f1a0..3193872 100644
--- a/drivers/net/wireless/iwlwifi/dvm/mac80211.c
+++ b/drivers/net/wireless/iwlwifi/dvm/mac80211.c
@@ -1068,7 +1068,10 @@
 	if (test_bit(STATUS_EXIT_PENDING, &priv->status))
 		return;
 
-	if (test_and_clear_bit(STATUS_CHANNEL_SWITCH_PENDING, &priv->status))
+	if (!test_and_clear_bit(STATUS_CHANNEL_SWITCH_PENDING, &priv->status))
+		return;
+
+	if (ctx->vif)
 		ieee80211_chswitch_done(ctx->vif, is_success);
 }
 
diff --git a/drivers/net/wireless/iwlwifi/dvm/main.c b/drivers/net/wireless/iwlwifi/dvm/main.c
index 3952ddf..1531a4f 100644
--- a/drivers/net/wireless/iwlwifi/dvm/main.c
+++ b/drivers/net/wireless/iwlwifi/dvm/main.c
@@ -758,7 +758,7 @@
 					 BT_COEX_PRIO_TBL_EVT_INIT_CALIB2);
 		if (ret)
 			return ret;
-	} else {
+	} else if (priv->lib->bt_params) {
 		/*
 		 * default is 2-wire BT coexexistence support
 		 */
diff --git a/drivers/net/wireless/iwlwifi/mvm/d3.c b/drivers/net/wireless/iwlwifi/mvm/d3.c
index 7e5e5c2..83da884 100644
--- a/drivers/net/wireless/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/iwlwifi/mvm/d3.c
@@ -134,7 +134,7 @@
 	struct iwl_wowlan_rsc_tsc_params_cmd *rsc_tsc;
 	struct iwl_wowlan_tkip_params_cmd *tkip;
 	bool error, use_rsc_tsc, use_tkip;
-	int gtk_key_idx;
+	int wep_key_idx;
 };
 
 static void iwl_mvm_wowlan_program_keys(struct ieee80211_hw *hw,
@@ -188,8 +188,8 @@
 			wkc.wep_key.key_offset = 0;
 		} else {
 			/* others start at 1 */
-			data->gtk_key_idx++;
-			wkc.wep_key.key_offset = data->gtk_key_idx;
+			data->wep_key_idx++;
+			wkc.wep_key.key_offset = data->wep_key_idx;
 		}
 
 		ret = iwl_mvm_send_cmd_pdu(mvm, WEP_KEY, CMD_SYNC,
@@ -316,8 +316,13 @@
 		mvm->ptk_ivlen = key->iv_len;
 		mvm->ptk_icvlen = key->icv_len;
 	} else {
-		data->gtk_key_idx++;
-		key->hw_key_idx = data->gtk_key_idx;
+		/*
+		 * firmware only supports TSC/RSC for a single key,
+		 * so if there are multiple keep overwriting them
+		 * with new ones -- this relies on mac80211 doing
+		 * list_add_tail().
+		 */
+		key->hw_key_idx = 1;
 		mvm->gtk_ivlen = key->iv_len;
 		mvm->gtk_icvlen = key->icv_len;
 	}
diff --git a/drivers/net/wireless/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/iwlwifi/mvm/debugfs.c
index e56ed2a..c24a744 100644
--- a/drivers/net/wireless/iwlwifi/mvm/debugfs.c
+++ b/drivers/net/wireless/iwlwifi/mvm/debugfs.c
@@ -988,7 +988,11 @@
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
 	char buf[100];
 
-	if (!dbgfs_dir)
+	/*
+	 * Check if debugfs directory already exist before creating it.
+	 * This may happen when, for example, resetting hw or suspend-resume
+	 */
+	if (!dbgfs_dir || mvmvif->dbgfs_dir)
 		return;
 
 	mvmvif->dbgfs_dir = debugfs_create_dir("iwlmvm", dbgfs_dir);
diff --git a/drivers/net/wireless/iwlwifi/mvm/fw-api-scan.h b/drivers/net/wireless/iwlwifi/mvm/fw-api-scan.h
index b60d141..365095a 100644
--- a/drivers/net/wireless/iwlwifi/mvm/fw-api-scan.h
+++ b/drivers/net/wireless/iwlwifi/mvm/fw-api-scan.h
@@ -69,7 +69,6 @@
 /* Scan Commands, Responses, Notifications */
 
 /* Masks for iwl_scan_channel.type flags */
-#define SCAN_CHANNEL_TYPE_PASSIVE	0
 #define SCAN_CHANNEL_TYPE_ACTIVE	BIT(0)
 #define SCAN_CHANNEL_NARROW_BAND	BIT(22)
 
diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
index e08683b..f19baf0 100644
--- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
@@ -257,7 +257,11 @@
 	if (ret)
 		return ret;
 
-	return ieee80211_register_hw(mvm->hw);
+	ret = ieee80211_register_hw(mvm->hw);
+	if (ret)
+		iwl_mvm_leds_exit(mvm);
+
+	return ret;
 }
 
 static void iwl_mvm_mac_tx(struct ieee80211_hw *hw,
@@ -385,6 +389,7 @@
 	ieee80211_wake_queues(mvm->hw);
 
 	mvm->vif_count = 0;
+	mvm->rx_ba_sessions = 0;
 }
 
 static int iwl_mvm_mac_start(struct ieee80211_hw *hw)
@@ -507,6 +512,27 @@
 		goto out_unlock;
 
 	/*
+	 * TODO: remove this temporary code.
+	 * Currently MVM FW supports power management only on single MAC.
+	 * If new interface added, disable PM on existing interface.
+	 * P2P device is a special case, since it is handled by FW similary to
+	 * scan. If P2P deviced is added, PM remains enabled on existing
+	 * interface.
+	 * Note: the method below does not count the new interface being added
+	 * at this moment.
+	 */
+	if (vif->type != NL80211_IFTYPE_P2P_DEVICE)
+		mvm->vif_count++;
+	if (mvm->vif_count > 1) {
+		IWL_DEBUG_MAC80211(mvm,
+				   "Disable power on existing interfaces\n");
+		ieee80211_iterate_active_interfaces_atomic(
+					    mvm->hw,
+					    IEEE80211_IFACE_ITER_NORMAL,
+					    iwl_mvm_pm_disable_iterator, mvm);
+	}
+
+	/*
 	 * The AP binding flow can be done only after the beacon
 	 * template is configured (which happens only in the mac80211
 	 * start_ap() flow), and adding the broadcast station can happen
@@ -529,27 +555,6 @@
 		goto out_unlock;
 	}
 
-	/*
-	 * TODO: remove this temporary code.
-	 * Currently MVM FW supports power management only on single MAC.
-	 * If new interface added, disable PM on existing interface.
-	 * P2P device is a special case, since it is handled by FW similary to
-	 * scan. If P2P deviced is added, PM remains enabled on existing
-	 * interface.
-	 * Note: the method below does not count the new interface being added
-	 * at this moment.
-	 */
-	if (vif->type != NL80211_IFTYPE_P2P_DEVICE)
-		mvm->vif_count++;
-	if (mvm->vif_count > 1) {
-		IWL_DEBUG_MAC80211(mvm,
-				   "Disable power on existing interfaces\n");
-		ieee80211_iterate_active_interfaces_atomic(
-					    mvm->hw,
-					    IEEE80211_IFACE_ITER_NORMAL,
-					    iwl_mvm_pm_disable_iterator, mvm);
-	}
-
 	ret = iwl_mvm_mac_ctxt_add(mvm, vif);
 	if (ret)
 		goto out_release;
@@ -1006,6 +1011,21 @@
 	mutex_lock(&mvm->mutex);
 	if (old_state == IEEE80211_STA_NOTEXIST &&
 	    new_state == IEEE80211_STA_NONE) {
+		/*
+		 * Firmware bug - it'll crash if the beacon interval is less
+		 * than 16. We can't avoid connecting at all, so refuse the
+		 * station state change, this will cause mac80211 to abandon
+		 * attempts to connect to this AP, and eventually wpa_s will
+		 * blacklist the AP...
+		 */
+		if (vif->type == NL80211_IFTYPE_STATION &&
+		    vif->bss_conf.beacon_int < 16) {
+			IWL_ERR(mvm,
+				"AP %pM beacon interval is %d, refusing due to firmware bug!\n",
+				sta->addr, vif->bss_conf.beacon_int);
+			ret = -EINVAL;
+			goto out_unlock;
+		}
 		ret = iwl_mvm_add_sta(mvm, vif, sta);
 	} else if (old_state == IEEE80211_STA_NONE &&
 		   new_state == IEEE80211_STA_AUTH) {
@@ -1038,6 +1058,7 @@
 	} else {
 		ret = -EIO;
 	}
+ out_unlock:
 	mutex_unlock(&mvm->mutex);
 
 	return ret;
diff --git a/drivers/net/wireless/iwlwifi/mvm/mvm.h b/drivers/net/wireless/iwlwifi/mvm/mvm.h
index d40d7db..420e82d 100644
--- a/drivers/net/wireless/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/iwlwifi/mvm/mvm.h
@@ -419,6 +419,7 @@
 	struct work_struct sta_drained_wk;
 	unsigned long sta_drained[BITS_TO_LONGS(IWL_MVM_STATION_COUNT)];
 	atomic_t pending_frames[IWL_MVM_STATION_COUNT];
+	u8 rx_ba_sessions;
 
 	/* configured by mac80211 */
 	u32 rts_threshold;
diff --git a/drivers/net/wireless/iwlwifi/mvm/scan.c b/drivers/net/wireless/iwlwifi/mvm/scan.c
index 2157b0f..acdff6b 100644
--- a/drivers/net/wireless/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/iwlwifi/mvm/scan.c
@@ -137,8 +137,8 @@
 {
 	int fw_idx, req_idx;
 
-	fw_idx = 0;
-	for (req_idx = req->n_ssids - 1; req_idx > 0; req_idx--) {
+	for (req_idx = req->n_ssids - 1, fw_idx = 0; req_idx > 0;
+	     req_idx--, fw_idx++) {
 		cmd->direct_scan[fw_idx].id = WLAN_EID_SSID;
 		cmd->direct_scan[fw_idx].len = req->ssids[req_idx].ssid_len;
 		memcpy(cmd->direct_scan[fw_idx].ssid,
@@ -153,7 +153,9 @@
  * just to notify that this scan is active and not passive.
  * In order to notify the FW of the number of SSIDs we wish to scan (including
  * the zero-length one), we need to set the corresponding bits in chan->type,
- * one for each SSID, and set the active bit (first).
+ * one for each SSID, and set the active bit (first). The first SSID is already
+ * included in the probe template, so we need to set only req->n_ssids - 1 bits
+ * in addition to the first bit.
  */
 static u16 iwl_mvm_get_active_dwell(enum ieee80211_band band, int n_ssids)
 {
@@ -176,19 +178,12 @@
 	struct iwl_scan_channel *chan = (struct iwl_scan_channel *)
 		(cmd->data + le16_to_cpu(cmd->tx_cmd.len));
 	int i;
-	__le32 chan_type_value;
-
-	if (req->n_ssids > 0)
-		chan_type_value = cpu_to_le32(BIT(req->n_ssids + 1) - 1);
-	else
-		chan_type_value = SCAN_CHANNEL_TYPE_PASSIVE;
 
 	for (i = 0; i < cmd->channel_count; i++) {
 		chan->channel = cpu_to_le16(req->channels[i]->hw_value);
+		chan->type = cpu_to_le32(BIT(req->n_ssids) - 1);
 		if (req->channels[i]->flags & IEEE80211_CHAN_PASSIVE_SCAN)
-			chan->type = SCAN_CHANNEL_TYPE_PASSIVE;
-		else
-			chan->type = chan_type_value;
+			chan->type &= cpu_to_le32(~SCAN_CHANNEL_TYPE_ACTIVE);
 		chan->active_dwell = cpu_to_le16(active_dwell);
 		chan->passive_dwell = cpu_to_le16(passive_dwell);
 		chan->iteration_count = cpu_to_le16(1);
diff --git a/drivers/net/wireless/iwlwifi/mvm/sta.c b/drivers/net/wireless/iwlwifi/mvm/sta.c
index 62fe520..563f559 100644
--- a/drivers/net/wireless/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/iwlwifi/mvm/sta.c
@@ -608,6 +608,8 @@
 	return ret;
 }
 
+#define IWL_MAX_RX_BA_SESSIONS 16
+
 int iwl_mvm_sta_rx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 		       int tid, u16 ssn, bool start)
 {
@@ -618,11 +620,20 @@
 
 	lockdep_assert_held(&mvm->mutex);
 
+	if (start && mvm->rx_ba_sessions >= IWL_MAX_RX_BA_SESSIONS) {
+		IWL_WARN(mvm, "Not enough RX BA SESSIONS\n");
+		return -ENOSPC;
+	}
+
 	cmd.mac_id_n_color = cpu_to_le32(mvm_sta->mac_id_n_color);
 	cmd.sta_id = mvm_sta->sta_id;
 	cmd.add_modify = STA_MODE_MODIFY;
-	cmd.add_immediate_ba_tid = (u8) tid;
-	cmd.add_immediate_ba_ssn = cpu_to_le16(ssn);
+	if (start) {
+		cmd.add_immediate_ba_tid = (u8) tid;
+		cmd.add_immediate_ba_ssn = cpu_to_le16(ssn);
+	} else {
+		cmd.remove_immediate_ba_tid = (u8) tid;
+	}
 	cmd.modify_mask = start ? STA_MODIFY_ADD_BA_TID :
 				  STA_MODIFY_REMOVE_BA_TID;
 
@@ -648,6 +659,14 @@
 		break;
 	}
 
+	if (!ret) {
+		if (start)
+			mvm->rx_ba_sessions++;
+		else if (mvm->rx_ba_sessions > 0)
+			/* check that restart flow didn't zero the counter */
+			mvm->rx_ba_sessions--;
+	}
+
 	return ret;
 }
 
@@ -896,6 +915,7 @@
 	struct iwl_mvm_sta *mvmsta = (void *)sta->drv_priv;
 	struct iwl_mvm_tid_data *tid_data = &mvmsta->tid_data[tid];
 	u16 txq_id;
+	enum iwl_mvm_agg_state old_state;
 
 	/*
 	 * First set the agg state to OFF to avoid calling
@@ -905,13 +925,17 @@
 	txq_id = tid_data->txq_id;
 	IWL_DEBUG_TX_QUEUES(mvm, "Flush AGG: sta %d tid %d q %d state %d\n",
 			    mvmsta->sta_id, tid, txq_id, tid_data->state);
+	old_state = tid_data->state;
 	tid_data->state = IWL_AGG_OFF;
 	spin_unlock_bh(&mvmsta->lock);
 
-	if (iwl_mvm_flush_tx_path(mvm, BIT(txq_id), true))
-		IWL_ERR(mvm, "Couldn't flush the AGG queue\n");
+	if (old_state >= IWL_AGG_ON) {
+		if (iwl_mvm_flush_tx_path(mvm, BIT(txq_id), true))
+			IWL_ERR(mvm, "Couldn't flush the AGG queue\n");
 
-	iwl_trans_txq_disable(mvm->trans, tid_data->txq_id);
+		iwl_trans_txq_disable(mvm->trans, tid_data->txq_id);
+	}
+
 	mvm->queue_to_mac80211[tid_data->txq_id] =
 				IWL_INVALID_MAC80211_QUEUE;
 
diff --git a/drivers/net/wireless/iwlwifi/mvm/time-event.c b/drivers/net/wireless/iwlwifi/mvm/time-event.c
index ad9bbca..7fd6fbf 100644
--- a/drivers/net/wireless/iwlwifi/mvm/time-event.c
+++ b/drivers/net/wireless/iwlwifi/mvm/time-event.c
@@ -138,6 +138,20 @@
 	schedule_work(&mvm->roc_done_wk);
 }
 
+static bool iwl_mvm_te_check_disconnect(struct iwl_mvm *mvm,
+					struct ieee80211_vif *vif,
+					const char *errmsg)
+{
+	if (vif->type != NL80211_IFTYPE_STATION)
+		return false;
+	if (vif->bss_conf.assoc && vif->bss_conf.dtim_period)
+		return false;
+	if (errmsg)
+		IWL_ERR(mvm, "%s\n", errmsg);
+	ieee80211_connection_loss(vif);
+	return true;
+}
+
 /*
  * Handles a FW notification for an event that is known to the driver.
  *
@@ -163,8 +177,13 @@
 	 * P2P Device discoveribility, while there are other higher priority
 	 * events in the system).
 	 */
-	WARN_ONCE(!le32_to_cpu(notif->status),
-		  "Failed to schedule time event\n");
+	if (WARN_ONCE(!le32_to_cpu(notif->status),
+		      "Failed to schedule time event\n")) {
+		if (iwl_mvm_te_check_disconnect(mvm, te_data->vif, NULL)) {
+			iwl_mvm_te_clear_data(mvm, te_data);
+			return;
+		}
+	}
 
 	if (le32_to_cpu(notif->action) & TE_NOTIF_HOST_EVENT_END) {
 		IWL_DEBUG_TE(mvm,
@@ -180,14 +199,8 @@
 		 * By now, we should have finished association
 		 * and know the dtim period.
 		 */
-		if (te_data->vif->type == NL80211_IFTYPE_STATION &&
-		    (!te_data->vif->bss_conf.assoc ||
-		     !te_data->vif->bss_conf.dtim_period)) {
-			IWL_ERR(mvm,
-				"No assocation and the time event is over already...\n");
-			ieee80211_connection_loss(te_data->vif);
-		}
-
+		iwl_mvm_te_check_disconnect(mvm, te_data->vif,
+			"No assocation and the time event is over already...");
 		iwl_mvm_te_clear_data(mvm, te_data);
 	} else if (le32_to_cpu(notif->action) & TE_NOTIF_HOST_EVENT_START) {
 		te_data->running = true;
diff --git a/drivers/net/wireless/iwlwifi/pcie/drv.c b/drivers/net/wireless/iwlwifi/pcie/drv.c
index 81f3ea5..ff13458 100644
--- a/drivers/net/wireless/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/iwlwifi/pcie/drv.c
@@ -130,6 +130,7 @@
 	{IWL_PCI_DEVICE(0x423C, 0x1306, iwl5150_abg_cfg)}, /* Half Mini Card */
 	{IWL_PCI_DEVICE(0x423C, 0x1221, iwl5150_agn_cfg)}, /* Mini Card */
 	{IWL_PCI_DEVICE(0x423C, 0x1321, iwl5150_agn_cfg)}, /* Half Mini Card */
+	{IWL_PCI_DEVICE(0x423C, 0x1326, iwl5150_abg_cfg)}, /* Half Mini Card */
 
 	{IWL_PCI_DEVICE(0x423D, 0x1211, iwl5150_agn_cfg)}, /* Mini Card */
 	{IWL_PCI_DEVICE(0x423D, 0x1311, iwl5150_agn_cfg)}, /* Half Mini Card */
diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c
index 826c156..390e2f0 100644
--- a/drivers/net/wireless/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/iwlwifi/pcie/trans.c
@@ -670,6 +670,11 @@
 		return err;
 	}
 
+	/* Reset the entire device */
+	iwl_set_bit(trans, CSR_RESET, CSR_RESET_REG_FLAG_SW_RESET);
+
+	usleep_range(10, 15);
+
 	iwl_pcie_apm_init(trans);
 
 	/* From now on, the op_mode will be kept updated about RF kill state */
@@ -1497,16 +1502,16 @@
 	spin_lock_init(&trans_pcie->reg_lock);
 	init_waitqueue_head(&trans_pcie->ucode_write_waitq);
 
-	/* W/A - seems to solve weird behavior. We need to remove this if we
-	 * don't want to stay in L1 all the time. This wastes a lot of power */
-	pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1 |
-			       PCIE_LINK_STATE_CLKPM);
-
 	if (pci_enable_device(pdev)) {
 		err = -ENODEV;
 		goto out_no_pci;
 	}
 
+	/* W/A - seems to solve weird behavior. We need to remove this if we
+	 * don't want to stay in L1 all the time. This wastes a lot of power */
+	pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S | PCIE_LINK_STATE_L1 |
+			       PCIE_LINK_STATE_CLKPM);
+
 	pci_set_master(pdev);
 
 	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(36));
diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c
index ef5fa89..89459db 100644
--- a/drivers/net/wireless/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/mwifiex/cfg80211.c
@@ -1716,9 +1716,9 @@
 	struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev);
 	int ret;
 
-	if (priv->bss_mode != NL80211_IFTYPE_STATION) {
+	if (GET_BSS_ROLE(priv) != MWIFIEX_BSS_ROLE_STA) {
 		wiphy_err(wiphy,
-			  "%s: reject infra assoc request in non-STA mode\n",
+			  "%s: reject infra assoc request in non-STA role\n",
 			  dev->name);
 		return -EINVAL;
 	}
diff --git a/drivers/net/wireless/mwifiex/cfp.c b/drivers/net/wireless/mwifiex/cfp.c
index 988552d..5178c46 100644
--- a/drivers/net/wireless/mwifiex/cfp.c
+++ b/drivers/net/wireless/mwifiex/cfp.c
@@ -415,7 +415,8 @@
 	u32 k = 0;
 	struct mwifiex_adapter *adapter = priv->adapter;
 
-	if (priv->bss_mode == NL80211_IFTYPE_STATION) {
+	if (priv->bss_mode == NL80211_IFTYPE_STATION ||
+	    priv->bss_mode == NL80211_IFTYPE_P2P_CLIENT) {
 		switch (adapter->config_bands) {
 		case BAND_B:
 			dev_dbg(adapter->dev, "info: infra band=%d "
diff --git a/drivers/net/wireless/mwifiex/init.c b/drivers/net/wireless/mwifiex/init.c
index caaf4bd..2cf8b96 100644
--- a/drivers/net/wireless/mwifiex/init.c
+++ b/drivers/net/wireless/mwifiex/init.c
@@ -693,7 +693,7 @@
 		if (!ret) {
 			dev_notice(adapter->dev,
 				   "WLAN FW already running! Skip FW dnld\n");
-			goto done;
+			return 0;
 		}
 
 		poll_num = MAX_FIRMWARE_POLL_TRIES;
@@ -719,14 +719,8 @@
 poll_fw:
 	/* Check if the firmware is downloaded successfully or not */
 	ret = adapter->if_ops.check_fw_status(adapter, poll_num);
-	if (ret) {
+	if (ret)
 		dev_err(adapter->dev, "FW failed to be active in time\n");
-		return -1;
-	}
-done:
-	/* re-enable host interrupt for mwifiex after fw dnld is successful */
-	if (adapter->if_ops.enable_int)
-		adapter->if_ops.enable_int(adapter);
 
 	return ret;
 }
diff --git a/drivers/net/wireless/mwifiex/join.c b/drivers/net/wireless/mwifiex/join.c
index 1c8a771..12e7781 100644
--- a/drivers/net/wireless/mwifiex/join.c
+++ b/drivers/net/wireless/mwifiex/join.c
@@ -1291,8 +1291,10 @@
 {
 	u8 current_bssid[ETH_ALEN];
 
-	/* Return error if the adapter or table entry is not marked as infra */
-	if ((priv->bss_mode != NL80211_IFTYPE_STATION) ||
+	/* Return error if the adapter is not STA role or table entry
+	 * is not marked as infra.
+	 */
+	if ((GET_BSS_ROLE(priv) != MWIFIEX_BSS_ROLE_STA) ||
 	    (bss_desc->bss_mode != NL80211_IFTYPE_STATION))
 		return -1;
 
diff --git a/drivers/net/wireless/mwifiex/main.c b/drivers/net/wireless/mwifiex/main.c
index e15ab72..1753431 100644
--- a/drivers/net/wireless/mwifiex/main.c
+++ b/drivers/net/wireless/mwifiex/main.c
@@ -427,6 +427,10 @@
 				"Cal data request_firmware() failed\n");
 	}
 
+	/* enable host interrupt after fw dnld is successful */
+	if (adapter->if_ops.enable_int)
+		adapter->if_ops.enable_int(adapter);
+
 	adapter->init_wait_q_woken = false;
 	ret = mwifiex_init_fw(adapter);
 	if (ret == -1) {
@@ -478,6 +482,8 @@
 	mwifiex_del_virtual_intf(adapter->wiphy, priv->wdev);
 	rtnl_unlock();
 err_init_fw:
+	if (adapter->if_ops.disable_int)
+		adapter->if_ops.disable_int(adapter);
 	pr_debug("info: %s: unregister device\n", __func__);
 	adapter->if_ops.unregister_dev(adapter);
 done:
@@ -855,7 +861,7 @@
 	INIT_WORK(&adapter->main_work, mwifiex_main_work_queue);
 
 	/* Register the device. Fill up the private data structure with relevant
-	   information from the card and request for the required IRQ. */
+	   information from the card. */
 	if (adapter->if_ops.register_dev(adapter)) {
 		pr_err("%s: failed to register mwifiex device\n", __func__);
 		goto err_registerdev;
@@ -919,6 +925,11 @@
 	if (!adapter)
 		goto exit_remove;
 
+	/* We can no longer handle interrupts once we start doing the teardown
+	 * below. */
+	if (adapter->if_ops.disable_int)
+		adapter->if_ops.disable_int(adapter);
+
 	adapter->surprise_removed = true;
 
 	/* Stop data */
diff --git a/drivers/net/wireless/mwifiex/main.h b/drivers/net/wireless/mwifiex/main.h
index 3da73d3..253e0bd 100644
--- a/drivers/net/wireless/mwifiex/main.h
+++ b/drivers/net/wireless/mwifiex/main.h
@@ -601,6 +601,7 @@
 	int (*register_dev) (struct mwifiex_adapter *);
 	void (*unregister_dev) (struct mwifiex_adapter *);
 	int (*enable_int) (struct mwifiex_adapter *);
+	void (*disable_int) (struct mwifiex_adapter *);
 	int (*process_int_status) (struct mwifiex_adapter *);
 	int (*host_to_card) (struct mwifiex_adapter *, u8, struct sk_buff *,
 			     struct mwifiex_tx_param *);
diff --git a/drivers/net/wireless/mwifiex/sdio.c b/drivers/net/wireless/mwifiex/sdio.c
index 5ee5ed0..09185c9 100644
--- a/drivers/net/wireless/mwifiex/sdio.c
+++ b/drivers/net/wireless/mwifiex/sdio.c
@@ -51,6 +51,7 @@
 static struct semaphore add_remove_card_sem;
 
 static int mwifiex_sdio_resume(struct device *dev);
+static void mwifiex_sdio_interrupt(struct sdio_func *func);
 
 /*
  * SDIO probe.
@@ -296,6 +297,15 @@
 	}
 };
 
+/* Write data into SDIO card register. Caller claims SDIO device. */
+static int
+mwifiex_write_reg_locked(struct sdio_func *func, u32 reg, u8 data)
+{
+	int ret = -1;
+	sdio_writeb(func, data, reg, &ret);
+	return ret;
+}
+
 /*
  * This function writes data into SDIO card register.
  */
@@ -303,10 +313,10 @@
 mwifiex_write_reg(struct mwifiex_adapter *adapter, u32 reg, u8 data)
 {
 	struct sdio_mmc_card *card = adapter->card;
-	int ret = -1;
+	int ret;
 
 	sdio_claim_host(card->func);
-	sdio_writeb(card->func, data, reg, &ret);
+	ret = mwifiex_write_reg_locked(card->func, reg, data);
 	sdio_release_host(card->func);
 
 	return ret;
@@ -685,23 +695,15 @@
  * The host interrupt mask is read, the disable bit is reset and
  * written back to the card host interrupt mask register.
  */
-static int mwifiex_sdio_disable_host_int(struct mwifiex_adapter *adapter)
+static void mwifiex_sdio_disable_host_int(struct mwifiex_adapter *adapter)
 {
-	u8 host_int_mask, host_int_disable = HOST_INT_DISABLE;
+	struct sdio_mmc_card *card = adapter->card;
+	struct sdio_func *func = card->func;
 
-	/* Read back the host_int_mask register */
-	if (mwifiex_read_reg(adapter, HOST_INT_MASK_REG, &host_int_mask))
-		return -1;
-
-	/* Update with the mask and write back to the register */
-	host_int_mask &= ~host_int_disable;
-
-	if (mwifiex_write_reg(adapter, HOST_INT_MASK_REG, host_int_mask)) {
-		dev_err(adapter->dev, "disable host interrupt failed\n");
-		return -1;
-	}
-
-	return 0;
+	sdio_claim_host(func);
+	mwifiex_write_reg_locked(func, HOST_INT_MASK_REG, 0);
+	sdio_release_irq(func);
+	sdio_release_host(func);
 }
 
 /*
@@ -713,14 +715,29 @@
 static int mwifiex_sdio_enable_host_int(struct mwifiex_adapter *adapter)
 {
 	struct sdio_mmc_card *card = adapter->card;
+	struct sdio_func *func = card->func;
+	int ret;
+
+	sdio_claim_host(func);
+
+	/* Request the SDIO IRQ */
+	ret = sdio_claim_irq(func, mwifiex_sdio_interrupt);
+	if (ret) {
+		dev_err(adapter->dev, "claim irq failed: ret=%d\n", ret);
+		goto out;
+	}
 
 	/* Simply write the mask to the register */
-	if (mwifiex_write_reg(adapter, HOST_INT_MASK_REG,
-			      card->reg->host_int_enable)) {
+	ret = mwifiex_write_reg_locked(func, HOST_INT_MASK_REG,
+				       card->reg->host_int_enable);
+	if (ret) {
 		dev_err(adapter->dev, "enable host interrupt failed\n");
-		return -1;
+		sdio_release_irq(func);
 	}
-	return 0;
+
+out:
+	sdio_release_host(func);
+	return ret;
 }
 
 /*
@@ -997,9 +1014,6 @@
 	}
 	adapter = card->adapter;
 
-	if (adapter->surprise_removed)
-		return;
-
 	if (!adapter->pps_uapsd_mode && adapter->ps_state == PS_STATE_SLEEP)
 		adapter->ps_state = PS_STATE_AWAKE;
 
@@ -1625,8 +1639,8 @@
 	/* Allocate buffer and copy payload */
 	blk_size = MWIFIEX_SDIO_BLOCK_SIZE;
 	buf_block_len = (pkt_len + blk_size - 1) / blk_size;
-	*(u16 *) &payload[0] = (u16) pkt_len;
-	*(u16 *) &payload[2] = type;
+	*(__le16 *)&payload[0] = cpu_to_le16((u16)pkt_len);
+	*(__le16 *)&payload[2] = cpu_to_le16(type);
 
 	/*
 	 * This is SDIO specific header
@@ -1728,9 +1742,7 @@
 	struct sdio_mmc_card *card = adapter->card;
 
 	if (adapter->card) {
-		/* Release the SDIO IRQ */
 		sdio_claim_host(card->func);
-		sdio_release_irq(card->func);
 		sdio_disable_func(card->func);
 		sdio_release_host(card->func);
 		sdio_set_drvdata(card->func, NULL);
@@ -1744,7 +1756,7 @@
  */
 static int mwifiex_register_dev(struct mwifiex_adapter *adapter)
 {
-	int ret = 0;
+	int ret;
 	struct sdio_mmc_card *card = adapter->card;
 	struct sdio_func *func = card->func;
 
@@ -1753,22 +1765,14 @@
 
 	sdio_claim_host(func);
 
-	/* Request the SDIO IRQ */
-	ret = sdio_claim_irq(func, mwifiex_sdio_interrupt);
-	if (ret) {
-		pr_err("claim irq failed: ret=%d\n", ret);
-		goto disable_func;
-	}
-
 	/* Set block size */
 	ret = sdio_set_block_size(card->func, MWIFIEX_SDIO_BLOCK_SIZE);
+	sdio_release_host(func);
 	if (ret) {
 		pr_err("cannot set SDIO block size\n");
-		ret = -1;
-		goto release_irq;
+		return ret;
 	}
 
-	sdio_release_host(func);
 	sdio_set_drvdata(func, card);
 
 	adapter->dev = &func->dev;
@@ -1776,15 +1780,6 @@
 	strcpy(adapter->fw_name, card->firmware);
 
 	return 0;
-
-release_irq:
-	sdio_release_irq(func);
-disable_func:
-	sdio_disable_func(func);
-	sdio_release_host(func);
-	adapter->card = NULL;
-
-	return -1;
 }
 
 /*
@@ -1813,9 +1808,6 @@
 	 */
 	mwifiex_read_reg(adapter, HOST_INTSTATUS_REG, &sdio_ireg);
 
-	/* Disable host interrupt mask register for SDIO */
-	mwifiex_sdio_disable_host_int(adapter);
-
 	/* Get SDIO ioport */
 	mwifiex_init_sdio_ioport(adapter);
 
@@ -1957,6 +1949,7 @@
 	.register_dev = mwifiex_register_dev,
 	.unregister_dev = mwifiex_unregister_dev,
 	.enable_int = mwifiex_sdio_enable_host_int,
+	.disable_int = mwifiex_sdio_disable_host_int,
 	.process_int_status = mwifiex_process_int_status,
 	.host_to_card = mwifiex_sdio_host_to_card,
 	.wakeup = mwifiex_pm_wakeup_card,
diff --git a/drivers/net/wireless/mwifiex/sdio.h b/drivers/net/wireless/mwifiex/sdio.h
index 6d51dfd..532ae0a 100644
--- a/drivers/net/wireless/mwifiex/sdio.h
+++ b/drivers/net/wireless/mwifiex/sdio.h
@@ -92,9 +92,6 @@
 /* Host Control Registers : Download host interrupt mask */
 #define DN_LD_HOST_INT_MASK		(0x2U)
 
-/* Disable Host interrupt mask */
-#define	HOST_INT_DISABLE		0xff
-
 /* Host Control Registers : Host interrupt status */
 #define HOST_INTSTATUS_REG		0x03
 /* Host Control Registers : Upload host interrupt status */
diff --git a/drivers/net/wireless/mwifiex/sta_ioctl.c b/drivers/net/wireless/mwifiex/sta_ioctl.c
index 206c3e0..8af97ab 100644
--- a/drivers/net/wireless/mwifiex/sta_ioctl.c
+++ b/drivers/net/wireless/mwifiex/sta_ioctl.c
@@ -257,10 +257,10 @@
 			goto done;
 	}
 
-	if (priv->bss_mode == NL80211_IFTYPE_STATION) {
+	if (priv->bss_mode == NL80211_IFTYPE_STATION ||
+	    priv->bss_mode == NL80211_IFTYPE_P2P_CLIENT) {
 		u8 config_bands;
 
-		/* Infra mode */
 		ret = mwifiex_deauthenticate(priv, NULL);
 		if (ret)
 			goto done;
diff --git a/drivers/net/wireless/rt2x00/Kconfig b/drivers/net/wireless/rt2x00/Kconfig
index 9b915d3..3e60a31 100644
--- a/drivers/net/wireless/rt2x00/Kconfig
+++ b/drivers/net/wireless/rt2x00/Kconfig
@@ -1,6 +1,6 @@
 menuconfig RT2X00
 	tristate "Ralink driver support"
-	depends on MAC80211
+	depends on MAC80211 && HAS_DMA
 	---help---
 	  This will enable the support for the Ralink drivers,
 	  developed in the rt2x00 project <http://rt2x00.serialmonkey.com>.
diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.c b/drivers/net/wireless/rt2x00/rt2x00queue.c
index 6c0a91f..aa95c6c 100644
--- a/drivers/net/wireless/rt2x00/rt2x00queue.c
+++ b/drivers/net/wireless/rt2x00/rt2x00queue.c
@@ -936,13 +936,8 @@
 	spin_unlock_irqrestore(&queue->index_lock, irqflags);
 }
 
-void rt2x00queue_pause_queue(struct data_queue *queue)
+void rt2x00queue_pause_queue_nocheck(struct data_queue *queue)
 {
-	if (!test_bit(DEVICE_STATE_PRESENT, &queue->rt2x00dev->flags) ||
-	    !test_bit(QUEUE_STARTED, &queue->flags) ||
-	    test_and_set_bit(QUEUE_PAUSED, &queue->flags))
-		return;
-
 	switch (queue->qid) {
 	case QID_AC_VO:
 	case QID_AC_VI:
@@ -958,6 +953,15 @@
 		break;
 	}
 }
+void rt2x00queue_pause_queue(struct data_queue *queue)
+{
+	if (!test_bit(DEVICE_STATE_PRESENT, &queue->rt2x00dev->flags) ||
+	    !test_bit(QUEUE_STARTED, &queue->flags) ||
+	    test_and_set_bit(QUEUE_PAUSED, &queue->flags))
+		return;
+
+	rt2x00queue_pause_queue_nocheck(queue);
+}
 EXPORT_SYMBOL_GPL(rt2x00queue_pause_queue);
 
 void rt2x00queue_unpause_queue(struct data_queue *queue)
@@ -1019,7 +1023,7 @@
 		return;
 	}
 
-	rt2x00queue_pause_queue(queue);
+	rt2x00queue_pause_queue_nocheck(queue);
 
 	queue->rt2x00dev->ops->lib->stop_queue(queue);
 
diff --git a/drivers/net/wireless/rtlwifi/Kconfig b/drivers/net/wireless/rtlwifi/Kconfig
index 7253de3..c2ffce7 100644
--- a/drivers/net/wireless/rtlwifi/Kconfig
+++ b/drivers/net/wireless/rtlwifi/Kconfig
@@ -1,13 +1,91 @@
-config RTLWIFI
-	tristate "Realtek wireless card support"
-	depends on MAC80211
-	select FW_LOADER
+menuconfig RTL_CARDS
+	tristate "Realtek rtlwifi family of devices"
+	depends on MAC80211 && (PCI || USB)
+	default y
 	---help---
-	  This is common code for RTL8192CE/RTL8192CU/RTL8192SE/RTL8723AE
-	  drivers.  This module does nothing by itself - the various front-end
-	  drivers need to be enabled to support any desired devices.
+	  This option will enable support for the Realtek mac80211-based
+	  wireless drivers. Drivers rtl8192ce, rtl8192cu, rtl8192se, rtl8192de,
+	  rtl8723eu, and rtl8188eu share some common code.
 
-	  If you choose to build as a module, it'll be called rtlwifi.
+if RTL_CARDS
+
+config RTL8192CE
+	tristate "Realtek RTL8192CE/RTL8188CE Wireless Network Adapter"
+	depends on PCI
+	select RTL8192C_COMMON
+	select RTLWIFI
+	select RTLWIFI_PCI
+	---help---
+	This is the driver for Realtek RTL8192CE/RTL8188CE 802.11n PCIe
+	wireless network adapters.
+
+	If you choose to build it as a module, it will be called rtl8192ce
+
+config RTL8192SE
+	tristate "Realtek RTL8192SE/RTL8191SE PCIe Wireless Network Adapter"
+	depends on PCI
+	select RTLWIFI
+	select RTLWIFI_PCI
+	---help---
+	This is the driver for Realtek RTL8192SE/RTL8191SE 802.11n PCIe
+	wireless network adapters.
+
+	If you choose to build it as a module, it will be called rtl8192se
+
+config RTL8192DE
+	tristate "Realtek RTL8192DE/RTL8188DE PCIe Wireless Network Adapter"
+	depends on PCI
+	select RTLWIFI
+	select RTLWIFI_PCI
+	---help---
+	This is the driver for Realtek RTL8192DE/RTL8188DE 802.11n PCIe
+	wireless network adapters.
+
+	If you choose to build it as a module, it will be called rtl8192de
+
+config RTL8723AE
+	tristate "Realtek RTL8723AE PCIe Wireless Network Adapter"
+	depends on PCI
+	select RTLWIFI
+	select RTLWIFI_PCI
+	---help---
+	This is the driver for Realtek RTL8723AE 802.11n PCIe
+	wireless network adapters.
+
+	If you choose to build it as a module, it will be called rtl8723ae
+
+config RTL8188EE
+	tristate "Realtek RTL8188EE Wireless Network Adapter"
+	depends on PCI
+	select RTLWIFI
+	select RTLWIFI_PCI
+	---help---
+	This is the driver for Realtek RTL8188EE 802.11n PCIe
+	wireless network adapters.
+
+	If you choose to build it as a module, it will be called rtl8188ee
+
+config RTL8192CU
+	tristate "Realtek RTL8192CU/RTL8188CU USB Wireless Network Adapter"
+	depends on USB
+	select RTLWIFI
+	select RTLWIFI_USB
+	select RTL8192C_COMMON
+	---help---
+	This is the driver for Realtek RTL8192CU/RTL8188CU 802.11n USB
+	wireless network adapters.
+
+	If you choose to build it as a module, it will be called rtl8192cu
+
+config RTLWIFI
+	tristate
+	select FW_LOADER
+
+config RTLWIFI_PCI
+	tristate
+
+config RTLWIFI_USB
+	tristate
 
 config RTLWIFI_DEBUG
 	bool "Debugging output for rtlwifi driver family"
@@ -18,63 +96,9 @@
 	the front-end driver, this parameter must be "Y". For memory-limited
 	systems, choose "N". If in doubt, choose "Y".
 
-config RTL8192CE
-	tristate "Realtek RTL8192CE/RTL8188CE Wireless Network Adapter"
-	depends on RTLWIFI && PCI
-	select RTL8192C_COMMON
-	---help---
-	This is the driver for Realtek RTL8192CE/RTL8188CE 802.11n PCIe
-	wireless network adapters.
-
-	If you choose to build it as a module, it will be called rtl8192ce
-
-config RTL8192SE
-	tristate "Realtek RTL8192SE/RTL8191SE PCIe Wireless Network Adapter"
-	depends on RTLWIFI && PCI
-	---help---
-	This is the driver for Realtek RTL8192SE/RTL8191SE 802.11n PCIe
-	wireless network adapters.
-
-	If you choose to build it as a module, it will be called rtl8192se
-
-config RTL8192DE
-	tristate "Realtek RTL8192DE/RTL8188DE PCIe Wireless Network Adapter"
-	depends on RTLWIFI && PCI
-	---help---
-	This is the driver for Realtek RTL8192DE/RTL8188DE 802.11n PCIe
-	wireless network adapters.
-
-	If you choose to build it as a module, it will be called rtl8192de
-
-config RTL8723AE
-	tristate "Realtek RTL8723AE PCIe Wireless Network Adapter"
-	depends on RTLWIFI && PCI
-	---help---
-	This is the driver for Realtek RTL8723AE 802.11n PCIe
-	wireless network adapters.
-
-	If you choose to build it as a module, it will be called rtl8723ae
-
-config RTL8188EE
-	tristate "Realtek RTL8188EE Wireless Network Adapter"
-	depends on RTLWIFI && PCI
-	---help---
-	This is the driver for Realtek RTL8188EE 802.11n PCIe
-	wireless network adapters.
-
-	If you choose to build it as a module, it will be called rtl8188ee
-
-config RTL8192CU
-	tristate "Realtek RTL8192CU/RTL8188CU USB Wireless Network Adapter"
-	depends on RTLWIFI && USB
-	select RTL8192C_COMMON
-	---help---
-	This is the driver for Realtek RTL8192CU/RTL8188CU 802.11n USB
-	wireless network adapters.
-
-	If you choose to build it as a module, it will be called rtl8192cu
-
 config RTL8192C_COMMON
 	tristate
 	depends on RTL8192CE || RTL8192CU
-	default m
+	default y
+
+endif
diff --git a/drivers/net/wireless/rtlwifi/Makefile b/drivers/net/wireless/rtlwifi/Makefile
index ff02b87..d56f023 100644
--- a/drivers/net/wireless/rtlwifi/Makefile
+++ b/drivers/net/wireless/rtlwifi/Makefile
@@ -12,13 +12,11 @@
 
 rtl8192c_common-objs +=		\
 
-ifneq ($(CONFIG_PCI),)
-rtlwifi-objs	+= pci.o
-endif
+obj-$(CONFIG_RTLWIFI_PCI)	+= rtl_pci.o
+rtl_pci-objs	:=		pci.o
 
-ifneq ($(CONFIG_USB),)
-rtlwifi-objs	+= usb.o
-endif
+obj-$(CONFIG_RTLWIFI_USB)	+= rtl_usb.o
+rtl_usb-objs	:=		usb.o
 
 obj-$(CONFIG_RTL8192C_COMMON)	+= rtl8192c/
 obj-$(CONFIG_RTL8192CE)		+= rtl8192ce/
diff --git a/drivers/net/wireless/rtlwifi/base.c b/drivers/net/wireless/rtlwifi/base.c
index 9d558ac..7651f5a 100644
--- a/drivers/net/wireless/rtlwifi/base.c
+++ b/drivers/net/wireless/rtlwifi/base.c
@@ -172,6 +172,7 @@
 {
 	return tid_to_ac[tid];
 }
+EXPORT_SYMBOL_GPL(rtl_tid_to_ac);
 
 static void _rtl_init_hw_ht_capab(struct ieee80211_hw *hw,
 				  struct ieee80211_sta_ht_cap *ht_cap)
@@ -406,6 +407,7 @@
 	cancel_delayed_work(&rtlpriv->works.ps_rfon_wq);
 	cancel_delayed_work(&rtlpriv->works.fwevt_wq);
 }
+EXPORT_SYMBOL_GPL(rtl_deinit_deferred_work);
 
 void rtl_init_rfkill(struct ieee80211_hw *hw)
 {
@@ -439,6 +441,7 @@
 {
 	wiphy_rfkill_stop_polling(hw->wiphy);
 }
+EXPORT_SYMBOL_GPL(rtl_deinit_rfkill);
 
 int rtl_init_core(struct ieee80211_hw *hw)
 {
@@ -489,10 +492,12 @@
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(rtl_init_core);
 
 void rtl_deinit_core(struct ieee80211_hw *hw)
 {
 }
+EXPORT_SYMBOL_GPL(rtl_deinit_core);
 
 void rtl_init_rx_config(struct ieee80211_hw *hw)
 {
@@ -501,6 +506,7 @@
 
 	rtlpriv->cfg->ops->get_hw_reg(hw, HW_VAR_RCR, (u8 *) (&mac->rx_conf));
 }
+EXPORT_SYMBOL_GPL(rtl_init_rx_config);
 
 /*********************************************************
  *
@@ -879,6 +885,7 @@
 
 	return true;
 }
+EXPORT_SYMBOL_GPL(rtl_tx_mgmt_proc);
 
 void rtl_get_tcb_desc(struct ieee80211_hw *hw,
 		      struct ieee80211_tx_info *info,
@@ -1052,6 +1059,7 @@
 
 	return true;
 }
+EXPORT_SYMBOL_GPL(rtl_action_proc);
 
 /*should call before software enc*/
 u8 rtl_is_special_data(struct ieee80211_hw *hw, struct sk_buff *skb, u8 is_tx)
@@ -1125,6 +1133,7 @@
 
 	return false;
 }
+EXPORT_SYMBOL_GPL(rtl_is_special_data);
 
 /*********************************************************
  *
@@ -1300,6 +1309,7 @@
 
 	rtlpriv->link_info.bcn_rx_inperiod++;
 }
+EXPORT_SYMBOL_GPL(rtl_beacon_statistic);
 
 void rtl_watchdog_wq_callback(void *data)
 {
@@ -1793,6 +1803,7 @@
 
 	mac->vendor = vendor;
 }
+EXPORT_SYMBOL_GPL(rtl_recognize_peer);
 
 /*********************************************************
  *
@@ -1849,6 +1860,7 @@
 	.name = "rtlsysfs",
 	.attrs = rtl_sysfs_entries,
 };
+EXPORT_SYMBOL_GPL(rtl_attribute_group);
 
 MODULE_AUTHOR("lizhaoming	<chaoming_li@realsil.com.cn>");
 MODULE_AUTHOR("Realtek WlanFAE	<wlanfae@realtek.com>");
@@ -1856,7 +1868,8 @@
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Realtek 802.11n PCI wireless core");
 
-struct rtl_global_var global_var = {};
+struct rtl_global_var rtl_global_var = {};
+EXPORT_SYMBOL_GPL(rtl_global_var);
 
 static int __init rtl_core_module_init(void)
 {
@@ -1864,8 +1877,8 @@
 		pr_err("Unable to register rtl_rc, use default RC !!\n");
 
 	/* init some global vars */
-	INIT_LIST_HEAD(&global_var.glb_priv_list);
-	spin_lock_init(&global_var.glb_list_lock);
+	INIT_LIST_HEAD(&rtl_global_var.glb_priv_list);
+	spin_lock_init(&rtl_global_var.glb_list_lock);
 
 	return 0;
 }
diff --git a/drivers/net/wireless/rtlwifi/base.h b/drivers/net/wireless/rtlwifi/base.h
index 8576bc3..0e5fe09 100644
--- a/drivers/net/wireless/rtlwifi/base.h
+++ b/drivers/net/wireless/rtlwifi/base.h
@@ -147,7 +147,7 @@
 u8 rtl_tid_to_ac(u8 tid);
 extern struct attribute_group rtl_attribute_group;
 void rtl_easy_concurrent_retrytimer_callback(unsigned long data);
-extern struct rtl_global_var global_var;
+extern struct rtl_global_var rtl_global_var;
 int rtlwifi_rate_mapping(struct ieee80211_hw *hw,
 			 bool isht, u8 desc_rate, bool first_ampdu);
 bool rtl_tx_mgmt_proc(struct ieee80211_hw *hw, struct sk_buff *skb);
diff --git a/drivers/net/wireless/rtlwifi/core.c b/drivers/net/wireless/rtlwifi/core.c
index ee84844..733b7ce 100644
--- a/drivers/net/wireless/rtlwifi/core.c
+++ b/drivers/net/wireless/rtlwifi/core.c
@@ -1330,3 +1330,4 @@
 	.rfkill_poll = rtl_op_rfkill_poll,
 	.flush = rtl_op_flush,
 };
+EXPORT_SYMBOL_GPL(rtl_ops);
diff --git a/drivers/net/wireless/rtlwifi/debug.c b/drivers/net/wireless/rtlwifi/debug.c
index 7d52d3d..76e2086 100644
--- a/drivers/net/wireless/rtlwifi/debug.c
+++ b/drivers/net/wireless/rtlwifi/debug.c
@@ -51,3 +51,4 @@
 
 	/*Init Debug flag enable condition */
 }
+EXPORT_SYMBOL_GPL(rtl_dbgp_flag_init);
diff --git a/drivers/net/wireless/rtlwifi/efuse.c b/drivers/net/wireless/rtlwifi/efuse.c
index 9e38941..838a1ed 100644
--- a/drivers/net/wireless/rtlwifi/efuse.c
+++ b/drivers/net/wireless/rtlwifi/efuse.c
@@ -229,6 +229,7 @@
 
 	*pbuf = (u8) (value32 & 0xff);
 }
+EXPORT_SYMBOL_GPL(read_efuse_byte);
 
 void read_efuse(struct ieee80211_hw *hw, u16 _offset, u16 _size_byte, u8 *pbuf)
 {
diff --git a/drivers/net/wireless/rtlwifi/pci.c b/drivers/net/wireless/rtlwifi/pci.c
index c97e9d3..703f839 100644
--- a/drivers/net/wireless/rtlwifi/pci.c
+++ b/drivers/net/wireless/rtlwifi/pci.c
@@ -35,6 +35,13 @@
 #include "efuse.h"
 #include <linux/export.h>
 #include <linux/kmemleak.h>
+#include <linux/module.h>
+
+MODULE_AUTHOR("lizhaoming	<chaoming_li@realsil.com.cn>");
+MODULE_AUTHOR("Realtek WlanFAE	<wlanfae@realtek.com>");
+MODULE_AUTHOR("Larry Finger	<Larry.FInger@lwfinger.net>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("PCI basic driver for rtlwifi");
 
 static const u16 pcibridge_vendors[PCI_BRIDGE_VENDOR_MAX] = {
 	PCI_VENDOR_ID_INTEL,
@@ -1008,19 +1015,6 @@
 	return;
 }
 
-static void rtl_lps_change_work_callback(struct work_struct *work)
-{
-	struct rtl_works *rtlworks =
-	    container_of(work, struct rtl_works, lps_change_work);
-	struct ieee80211_hw *hw = rtlworks->hw;
-	struct rtl_priv *rtlpriv = rtl_priv(hw);
-
-	if (rtlpriv->enter_ps)
-		rtl_lps_enter(hw);
-	else
-		rtl_lps_leave(hw);
-}
-
 static void _rtl_pci_init_trx_var(struct ieee80211_hw *hw)
 {
 	struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
@@ -1899,7 +1893,7 @@
 	rtlpriv->rtlhal.interface = INTF_PCI;
 	rtlpriv->cfg = (struct rtl_hal_cfg *)(id->driver_data);
 	rtlpriv->intf_ops = &rtl_pci_ops;
-	rtlpriv->glb_var = &global_var;
+	rtlpriv->glb_var = &rtl_global_var;
 
 	/*
 	 *init dbgp flags before all
diff --git a/drivers/net/wireless/rtlwifi/ps.c b/drivers/net/wireless/rtlwifi/ps.c
index 884bcea..298b615 100644
--- a/drivers/net/wireless/rtlwifi/ps.c
+++ b/drivers/net/wireless/rtlwifi/ps.c
@@ -269,6 +269,7 @@
 
 	spin_unlock_irqrestore(&rtlpriv->locks.ips_lock, flags);
 }
+EXPORT_SYMBOL_GPL(rtl_ips_nic_on);
 
 /*for FW LPS*/
 
@@ -518,6 +519,7 @@
 			 "u_bufferd: %x, m_buffered: %x\n", u_buffed, m_buffed);
 	}
 }
+EXPORT_SYMBOL_GPL(rtl_swlps_beacon);
 
 void rtl_swlps_rf_awake(struct ieee80211_hw *hw)
 {
@@ -611,6 +613,19 @@
 			MSECS(sleep_intv * mac->vif->bss_conf.beacon_int - 40));
 }
 
+void rtl_lps_change_work_callback(struct work_struct *work)
+{
+	struct rtl_works *rtlworks =
+	    container_of(work, struct rtl_works, lps_change_work);
+	struct ieee80211_hw *hw = rtlworks->hw;
+	struct rtl_priv *rtlpriv = rtl_priv(hw);
+
+	if (rtlpriv->enter_ps)
+		rtl_lps_enter(hw);
+	else
+		rtl_lps_leave(hw);
+}
+EXPORT_SYMBOL_GPL(rtl_lps_change_work_callback);
 
 void rtl_swlps_wq_callback(void *data)
 {
@@ -922,3 +937,4 @@
 	else
 		rtl_p2p_noa_ie(hw, data, len - FCS_LEN);
 }
+EXPORT_SYMBOL_GPL(rtl_p2p_info);
diff --git a/drivers/net/wireless/rtlwifi/ps.h b/drivers/net/wireless/rtlwifi/ps.h
index 4d682b7..88bd76e 100644
--- a/drivers/net/wireless/rtlwifi/ps.h
+++ b/drivers/net/wireless/rtlwifi/ps.h
@@ -49,5 +49,6 @@
 void rtl_swlps_rf_sleep(struct ieee80211_hw *hw);
 void rtl_p2p_ps_cmd(struct ieee80211_hw *hw, u8 p2p_ps_state);
 void rtl_p2p_info(struct ieee80211_hw *hw, void *data, unsigned int len);
+void rtl_lps_change_work_callback(struct work_struct *work);
 
 #endif
diff --git a/drivers/net/wireless/rtlwifi/usb.c b/drivers/net/wireless/rtlwifi/usb.c
index a3532e0..e56778c 100644
--- a/drivers/net/wireless/rtlwifi/usb.c
+++ b/drivers/net/wireless/rtlwifi/usb.c
@@ -32,6 +32,13 @@
 #include "ps.h"
 #include "rtl8192c/fw_common.h"
 #include <linux/export.h>
+#include <linux/module.h>
+
+MODULE_AUTHOR("lizhaoming	<chaoming_li@realsil.com.cn>");
+MODULE_AUTHOR("Realtek WlanFAE	<wlanfae@realtek.com>");
+MODULE_AUTHOR("Larry Finger	<Larry.FInger@lwfinger.net>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("USB basic driver for rtlwifi");
 
 #define	REALTEK_USB_VENQT_READ			0xC0
 #define	REALTEK_USB_VENQT_WRITE			0x40
@@ -1070,6 +1077,8 @@
 	spin_lock_init(&rtlpriv->locks.usb_lock);
 	INIT_WORK(&rtlpriv->works.fill_h2c_cmd,
 		  rtl_fill_h2c_cmd_work_callback);
+	INIT_WORK(&rtlpriv->works.lps_change_work,
+		  rtl_lps_change_work_callback);
 
 	rtlpriv->usb_data_index = 0;
 	init_completion(&rtlpriv->firmware_loading_complete);
diff --git a/drivers/net/wireless/zd1201.c b/drivers/net/wireless/zd1201.c
index 4941f20..b8ba1f9 100644
--- a/drivers/net/wireless/zd1201.c
+++ b/drivers/net/wireless/zd1201.c
@@ -98,10 +98,12 @@
 		goto exit;
 
 	err = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), 0x4,
-	    USB_DIR_IN | 0x40, 0,0, &ret, sizeof(ret), ZD1201_FW_TIMEOUT);
+	    USB_DIR_IN | 0x40, 0, 0, buf, sizeof(ret), ZD1201_FW_TIMEOUT);
 	if (err < 0)
 		goto exit;
 
+	memcpy(&ret, buf, sizeof(ret));
+
 	if (ret & 0x80) {
 		err = -EIO;
 		goto exit;
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 6bb7cf2..b10ba00 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -392,6 +392,8 @@
 	mem = (unsigned long)
 		dt_alloc(size + 4, __alignof__(struct device_node));
 
+	memset((void *)mem, 0, size);
+
 	((__be32 *)mem)[size / 4] = cpu_to_be32(0xdeadbeef);
 
 	pr_debug("  unflattening %lx...\n", mem);
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index a3c1c5a..1264923 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -345,6 +345,7 @@
 	if (r && irq) {
 		const char *name = NULL;
 
+		memset(r, 0, sizeof(*r));
 		/*
 		 * Get optional "interrupts-names" property to add a name
 		 * to the resource.
@@ -482,8 +483,9 @@
 		}
 
 		/* Get the next pending parent that might have children */
-		desc = list_first_entry(&intc_parent_list, typeof(*desc), list);
-		if (list_empty(&intc_parent_list) || !desc) {
+		desc = list_first_entry_or_null(&intc_parent_list,
+						typeof(*desc), list);
+		if (!desc) {
 			pr_err("of_irq_init: children remain, but no parents\n");
 			break;
 		}
diff --git a/drivers/parisc/iosapic.c b/drivers/parisc/iosapic.c
index e79e006..9ee04b4 100644
--- a/drivers/parisc/iosapic.c
+++ b/drivers/parisc/iosapic.c
@@ -811,18 +811,28 @@
 	return pcidev->irq;
 }
 
-static struct iosapic_info *first_isi = NULL;
+static struct iosapic_info *iosapic_list;
 
 #ifdef CONFIG_64BIT
-int iosapic_serial_irq(int num)
+int iosapic_serial_irq(struct parisc_device *dev)
 {
-	struct iosapic_info *isi = first_isi;
-	struct irt_entry *irte = NULL;  /* only used if PAT PDC */
+	struct iosapic_info *isi;
+	struct irt_entry *irte;
 	struct vector_info *vi;
-	int isi_line;	/* line used by device */
+	int cnt;
+	int intin;
+
+	intin = (dev->mod_info >> 24) & 15;
 
 	/* lookup IRT entry for isi/slot/pin set */
-	irte = &irt_cell[num];
+	for (cnt = 0; cnt < irt_num_entry; cnt++) {
+		irte = &irt_cell[cnt];
+		if (COMPARE_IRTE_ADDR(irte, dev->mod0) &&
+		    irte->dest_iosapic_intin == intin)
+			break;
+	}
+	if (cnt >= irt_num_entry)
+		return 0; /* no irq found, force polling */
 
 	DBG_IRT("iosapic_serial_irq(): irte %p %x %x %x %x %x %x %x %x\n",
 		irte,
@@ -834,11 +844,17 @@
 		irte->src_seg_id,
 		irte->dest_iosapic_intin,
 		(u32) irte->dest_iosapic_addr);
-	isi_line = irte->dest_iosapic_intin;
+
+	/* search for iosapic */
+	for (isi = iosapic_list; isi; isi = isi->isi_next)
+		if (isi->isi_hpa == dev->mod0)
+			break;
+	if (!isi)
+		return 0; /* no iosapic found, force polling */
 
 	/* get vector info for this input line */
-	vi = isi->isi_vector + isi_line;
-	DBG_IRT("iosapic_serial_irq:  line %d vi 0x%p\n", isi_line, vi);
+	vi = isi->isi_vector + intin;
+	DBG_IRT("iosapic_serial_irq:  line %d vi 0x%p\n", iosapic_intin, vi);
 
 	/* If this IRQ line has already been setup, skip it */
 	if (vi->irte)
@@ -941,8 +957,8 @@
 		vip->irqline = (unsigned char) cnt;
 		vip->iosapic = isi;
 	}
-	if (!first_isi)
-		first_isi = isi;
+	isi->isi_next = iosapic_list;
+	iosapic_list = isi;
 	return isi;
 }
 
diff --git a/drivers/pci/host/pci-mvebu.c b/drivers/pci/host/pci-mvebu.c
index 13a633b..7bf3926 100644
--- a/drivers/pci/host/pci-mvebu.c
+++ b/drivers/pci/host/pci-mvebu.c
@@ -86,10 +86,6 @@
 	u16 secondary_status;
 	u16 membase;
 	u16 memlimit;
-	u16 prefmembase;
-	u16 prefmemlimit;
-	u32 prefbaseupper;
-	u32 preflimitupper;
 	u16 iobaseupper;
 	u16 iolimitupper;
 	u8 cappointer;
@@ -419,15 +415,7 @@
 		break;
 
 	case PCI_PREF_MEMORY_BASE:
-		*value = (bridge->prefmemlimit << 16 | bridge->prefmembase);
-		break;
-
-	case PCI_PREF_BASE_UPPER32:
-		*value = bridge->prefbaseupper;
-		break;
-
-	case PCI_PREF_LIMIT_UPPER32:
-		*value = bridge->preflimitupper;
+		*value = 0;
 		break;
 
 	case PCI_IO_BASE_UPPER16:
@@ -501,19 +489,6 @@
 		mvebu_pcie_handle_membase_change(port);
 		break;
 
-	case PCI_PREF_MEMORY_BASE:
-		bridge->prefmembase = value & 0xffff;
-		bridge->prefmemlimit = value >> 16;
-		break;
-
-	case PCI_PREF_BASE_UPPER32:
-		bridge->prefbaseupper = value;
-		break;
-
-	case PCI_PREF_LIMIT_UPPER32:
-		bridge->preflimitupper = value;
-		break;
-
 	case PCI_IO_BASE_UPPER16:
 		bridge->iobaseupper = value & 0xffff;
 		bridge->iolimitupper = value >> 16;
diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig
index bb7ebb2..d85009d 100644
--- a/drivers/pci/hotplug/Kconfig
+++ b/drivers/pci/hotplug/Kconfig
@@ -3,16 +3,13 @@
 #
 
 menuconfig HOTPLUG_PCI
-	tristate "Support for PCI Hotplug"
+	bool "Support for PCI Hotplug"
 	depends on PCI && SYSFS
 	---help---
 	  Say Y here if you have a motherboard with a PCI Hotplug controller.
 	  This allows you to add and remove PCI cards while the machine is
 	  powered up and running.
 
-	  To compile this driver as a module, choose M here: the
-	  module will be called pci_hotplug.
-
 	  When in doubt, say N.
 
 if HOTPLUG_PCI
diff --git a/drivers/pci/hotplug/pciehp_pci.c b/drivers/pci/hotplug/pciehp_pci.c
index aac7a40..0e0d0f7 100644
--- a/drivers/pci/hotplug/pciehp_pci.c
+++ b/drivers/pci/hotplug/pciehp_pci.c
@@ -92,7 +92,14 @@
 	if (ret)
 		presence = 0;
 
-	list_for_each_entry_safe(dev, temp, &parent->devices, bus_list) {
+	/*
+	 * Stopping an SR-IOV PF device removes all the associated VFs,
+	 * which will update the bus->devices list and confuse the
+	 * iterator.  Therefore, iterate in reverse so we remove the VFs
+	 * first, then the PF.  We do the same in pci_stop_bus_device().
+	 */
+	list_for_each_entry_safe_reverse(dev, temp, &parent->devices,
+					 bus_list) {
 		pci_dev_get(dev);
 		if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE && presence) {
 			pci_read_config_byte(dev, PCI_BRIDGE_CONTROL, &bctl);
diff --git a/drivers/pci/hotplug/rpadlpar_core.c b/drivers/pci/hotplug/rpadlpar_core.c
index b29e20b..bb7af78 100644
--- a/drivers/pci/hotplug/rpadlpar_core.c
+++ b/drivers/pci/hotplug/rpadlpar_core.c
@@ -388,7 +388,6 @@
 	/* Remove the EADS bridge device itself */
 	BUG_ON(!bus->self);
 	pr_debug("PCI: Now removing bridge device %s\n", pci_name(bus->self));
-	eeh_remove_bus_device(bus->self, true);
 	pci_stop_and_remove_bus_device(bus->self);
 
 	return 0;
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index dbdc5f7..01e264f 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -317,13 +317,20 @@
 /* ACPI bus type */
 static int acpi_pci_find_device(struct device *dev, acpi_handle *handle)
 {
-	struct pci_dev * pci_dev;
-	u64	addr;
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	bool is_bridge;
+	u64 addr;
 
-	pci_dev = to_pci_dev(dev);
+	/*
+	 * pci_is_bridge() is not suitable here, because pci_dev->subordinate
+	 * is set only after acpi_pci_find_device() has been called for the
+	 * given device.
+	 */
+	is_bridge = pci_dev->hdr_type == PCI_HEADER_TYPE_BRIDGE
+			|| pci_dev->hdr_type == PCI_HEADER_TYPE_CARDBUS;
 	/* Please ref to ACPI spec for the syntax of _ADR */
 	addr = (PCI_SLOT(pci_dev->devfn) << 16) | PCI_FUNC(pci_dev->devfn);
-	*handle = acpi_get_child(DEVICE_ACPI_HANDLE(dev->parent), addr);
+	*handle = acpi_find_child(ACPI_HANDLE(dev->parent), addr, is_bridge);
 	if (!*handle)
 		return -ENODEV;
 	return 0;
diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig
index 569f82f..3b94cfc 100644
--- a/drivers/pci/pcie/Kconfig
+++ b/drivers/pci/pcie/Kconfig
@@ -14,15 +14,12 @@
 # Include service Kconfig here
 #
 config HOTPLUG_PCI_PCIE
-	tristate "PCI Express Hotplug driver"
+	bool "PCI Express Hotplug driver"
 	depends on HOTPLUG_PCI && PCIEPORTBUS
 	help
 	  Say Y here if you have a motherboard that supports PCI Express Native
 	  Hotplug
 
-	  To compile this driver as a module, choose M here: the
-	  module will be called pciehp.
-
 	  When in doubt, say N.
 
 source "drivers/pci/pcie/aer/Kconfig"
diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c
index d254e23..64a7de2 100644
--- a/drivers/pci/setup-bus.c
+++ b/drivers/pci/setup-bus.c
@@ -300,6 +300,47 @@
 	}
 }
 
+static unsigned long pci_fail_res_type_mask(struct list_head *fail_head)
+{
+	struct pci_dev_resource *fail_res;
+	unsigned long mask = 0;
+
+	/* check failed type */
+	list_for_each_entry(fail_res, fail_head, list)
+		mask |= fail_res->flags;
+
+	/*
+	 * one pref failed resource will set IORESOURCE_MEM,
+	 * as we can allocate pref in non-pref range.
+	 * Will release all assigned non-pref sibling resources
+	 * according to that bit.
+	 */
+	return mask & (IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH);
+}
+
+static bool pci_need_to_release(unsigned long mask, struct resource *res)
+{
+	if (res->flags & IORESOURCE_IO)
+		return !!(mask & IORESOURCE_IO);
+
+	/* check pref at first */
+	if (res->flags & IORESOURCE_PREFETCH) {
+		if (mask & IORESOURCE_PREFETCH)
+			return true;
+		/* count pref if its parent is non-pref */
+		else if ((mask & IORESOURCE_MEM) &&
+			 !(res->parent->flags & IORESOURCE_PREFETCH))
+			return true;
+		else
+			return false;
+	}
+
+	if (res->flags & IORESOURCE_MEM)
+		return !!(mask & IORESOURCE_MEM);
+
+	return false;	/* should not get here */
+}
+
 static void __assign_resources_sorted(struct list_head *head,
 				 struct list_head *realloc_head,
 				 struct list_head *fail_head)
@@ -312,11 +353,24 @@
 	 *  if could do that, could get out early.
 	 *  if could not do that, we still try to assign requested at first,
 	 *    then try to reassign add_size for some resources.
+	 *
+	 * Separate three resource type checking if we need to release
+	 * assigned resource after requested + add_size try.
+	 *	1. if there is io port assign fail, will release assigned
+	 *	   io port.
+	 *	2. if there is pref mmio assign fail, release assigned
+	 *	   pref mmio.
+	 *	   if assigned pref mmio's parent is non-pref mmio and there
+	 *	   is non-pref mmio assign fail, will release that assigned
+	 *	   pref mmio.
+	 *	3. if there is non-pref mmio assign fail or pref mmio
+	 *	   assigned fail, will release assigned non-pref mmio.
 	 */
 	LIST_HEAD(save_head);
 	LIST_HEAD(local_fail_head);
 	struct pci_dev_resource *save_res;
-	struct pci_dev_resource *dev_res;
+	struct pci_dev_resource *dev_res, *tmp_res;
+	unsigned long fail_type;
 
 	/* Check if optional add_size is there */
 	if (!realloc_head || list_empty(realloc_head))
@@ -348,6 +402,19 @@
 		return;
 	}
 
+	/* check failed type */
+	fail_type = pci_fail_res_type_mask(&local_fail_head);
+	/* remove not need to be released assigned res from head list etc */
+	list_for_each_entry_safe(dev_res, tmp_res, head, list)
+		if (dev_res->res->parent &&
+		    !pci_need_to_release(fail_type, dev_res->res)) {
+			/* remove it from realloc_head list */
+			remove_from_list(realloc_head, dev_res->res);
+			remove_from_list(&save_head, dev_res->res);
+			list_del(&dev_res->list);
+			kfree(dev_res);
+		}
+
 	free_list(&local_fail_head);
 	/* Release assigned resource */
 	list_for_each_entry(dev_res, head, list)
diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c
index 5b272bf..2a00239 100644
--- a/drivers/pinctrl/core.c
+++ b/drivers/pinctrl/core.c
@@ -1193,6 +1193,7 @@
 	list_for_each_entry(maps_node, &pinctrl_maps, node) {
 		if (maps_node->maps == map) {
 			list_del(&maps_node->node);
+			kfree(maps_node);
 			mutex_unlock(&pinctrl_maps_mutex);
 			return;
 		}
diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
index 6866548..7323cca 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -1483,6 +1483,7 @@
 	return ret;
 }
 
+#ifdef CONFIG_PM
 static int pinctrl_single_suspend(struct platform_device *pdev,
 					pm_message_t state)
 {
@@ -1505,6 +1506,7 @@
 
 	return pinctrl_force_default(pcs->pctl);
 }
+#endif
 
 static int pcs_probe(struct platform_device *pdev)
 {
diff --git a/drivers/pinctrl/pinctrl-sunxi.c b/drivers/pinctrl/pinctrl-sunxi.c
index c47fd1e..94716c7 100644
--- a/drivers/pinctrl/pinctrl-sunxi.c
+++ b/drivers/pinctrl/pinctrl-sunxi.c
@@ -278,6 +278,7 @@
 {
 	struct sunxi_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev);
 	struct sunxi_pinctrl_group *g = &pctl->groups[group];
+	unsigned long flags;
 	u32 val, mask;
 	u16 strength;
 	u8 dlevel;
@@ -295,22 +296,35 @@
 		 *   3: 40mA
 		 */
 		dlevel = strength / 10 - 1;
+
+		spin_lock_irqsave(&pctl->lock, flags);
+
 		val = readl(pctl->membase + sunxi_dlevel_reg(g->pin));
 	        mask = DLEVEL_PINS_MASK << sunxi_dlevel_offset(g->pin);
 		writel((val & ~mask) | dlevel << sunxi_dlevel_offset(g->pin),
 			pctl->membase + sunxi_dlevel_reg(g->pin));
+
+		spin_unlock_irqrestore(&pctl->lock, flags);
 		break;
 	case PIN_CONFIG_BIAS_PULL_UP:
+		spin_lock_irqsave(&pctl->lock, flags);
+
 		val = readl(pctl->membase + sunxi_pull_reg(g->pin));
 		mask = PULL_PINS_MASK << sunxi_pull_offset(g->pin);
 		writel((val & ~mask) | 1 << sunxi_pull_offset(g->pin),
 			pctl->membase + sunxi_pull_reg(g->pin));
+
+		spin_unlock_irqrestore(&pctl->lock, flags);
 		break;
 	case PIN_CONFIG_BIAS_PULL_DOWN:
+		spin_lock_irqsave(&pctl->lock, flags);
+
 		val = readl(pctl->membase + sunxi_pull_reg(g->pin));
 		mask = PULL_PINS_MASK << sunxi_pull_offset(g->pin);
 		writel((val & ~mask) | 2 << sunxi_pull_offset(g->pin),
 			pctl->membase + sunxi_pull_reg(g->pin));
+
+		spin_unlock_irqrestore(&pctl->lock, flags);
 		break;
 	default:
 		break;
@@ -360,11 +374,17 @@
 				 u8 config)
 {
 	struct sunxi_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev);
+	unsigned long flags;
+	u32 val, mask;
 
-	u32 val = readl(pctl->membase + sunxi_mux_reg(pin));
-	u32 mask = MUX_PINS_MASK << sunxi_mux_offset(pin);
+	spin_lock_irqsave(&pctl->lock, flags);
+
+	val = readl(pctl->membase + sunxi_mux_reg(pin));
+	mask = MUX_PINS_MASK << sunxi_mux_offset(pin);
 	writel((val & ~mask) | config << sunxi_mux_offset(pin),
 		pctl->membase + sunxi_mux_reg(pin));
+
+	spin_unlock_irqrestore(&pctl->lock, flags);
 }
 
 static int sunxi_pmx_enable(struct pinctrl_dev *pctldev,
@@ -464,8 +484,21 @@
 	struct sunxi_pinctrl *pctl = dev_get_drvdata(chip->dev);
 	u32 reg = sunxi_data_reg(offset);
 	u8 index = sunxi_data_offset(offset);
+	unsigned long flags;
+	u32 regval;
 
-	writel((value & DATA_PINS_MASK) << index, pctl->membase + reg);
+	spin_lock_irqsave(&pctl->lock, flags);
+
+	regval = readl(pctl->membase + reg);
+
+	if (value)
+		regval |= BIT(index);
+	else
+		regval &= ~(BIT(index));
+
+	writel(regval, pctl->membase + reg);
+
+	spin_unlock_irqrestore(&pctl->lock, flags);
 }
 
 static int sunxi_pinctrl_gpio_of_xlate(struct gpio_chip *gc,
@@ -526,6 +559,8 @@
 	struct sunxi_pinctrl *pctl = irq_data_get_irq_chip_data(d);
 	u32 reg = sunxi_irq_cfg_reg(d->hwirq);
 	u8 index = sunxi_irq_cfg_offset(d->hwirq);
+	unsigned long flags;
+	u32 regval;
 	u8 mode;
 
 	switch (type) {
@@ -548,7 +583,13 @@
 		return -EINVAL;
 	}
 
-	writel((mode & IRQ_CFG_IRQ_MASK) << index, pctl->membase + reg);
+	spin_lock_irqsave(&pctl->lock, flags);
+
+	regval = readl(pctl->membase + reg);
+	regval &= ~IRQ_CFG_IRQ_MASK;
+	writel(regval | (mode << index), pctl->membase + reg);
+
+	spin_unlock_irqrestore(&pctl->lock, flags);
 
 	return 0;
 }
@@ -560,14 +601,19 @@
 	u8 ctrl_idx = sunxi_irq_ctrl_offset(d->hwirq);
 	u32 status_reg = sunxi_irq_status_reg(d->hwirq);
 	u8 status_idx = sunxi_irq_status_offset(d->hwirq);
+	unsigned long flags;
 	u32 val;
 
+	spin_lock_irqsave(&pctl->lock, flags);
+
 	/* Mask the IRQ */
 	val = readl(pctl->membase + ctrl_reg);
 	writel(val & ~(1 << ctrl_idx), pctl->membase + ctrl_reg);
 
 	/* Clear the IRQ */
 	writel(1 << status_idx, pctl->membase + status_reg);
+
+	spin_unlock_irqrestore(&pctl->lock, flags);
 }
 
 static void sunxi_pinctrl_irq_mask(struct irq_data *d)
@@ -575,11 +621,16 @@
 	struct sunxi_pinctrl *pctl = irq_data_get_irq_chip_data(d);
 	u32 reg = sunxi_irq_ctrl_reg(d->hwirq);
 	u8 idx = sunxi_irq_ctrl_offset(d->hwirq);
+	unsigned long flags;
 	u32 val;
 
+	spin_lock_irqsave(&pctl->lock, flags);
+
 	/* Mask the IRQ */
 	val = readl(pctl->membase + reg);
 	writel(val & ~(1 << idx), pctl->membase + reg);
+
+	spin_unlock_irqrestore(&pctl->lock, flags);
 }
 
 static void sunxi_pinctrl_irq_unmask(struct irq_data *d)
@@ -588,6 +639,7 @@
 	struct sunxi_desc_function *func;
 	u32 reg = sunxi_irq_ctrl_reg(d->hwirq);
 	u8 idx = sunxi_irq_ctrl_offset(d->hwirq);
+	unsigned long flags;
 	u32 val;
 
 	func = sunxi_pinctrl_desc_find_function_by_pin(pctl,
@@ -597,9 +649,13 @@
 	/* Change muxing to INT mode */
 	sunxi_pmx_set(pctl->pctl_dev, pctl->irq_array[d->hwirq], func->muxval);
 
+	spin_lock_irqsave(&pctl->lock, flags);
+
 	/* Unmask the IRQ */
 	val = readl(pctl->membase + reg);
 	writel(val | (1 << idx), pctl->membase + reg);
+
+	spin_unlock_irqrestore(&pctl->lock, flags);
 }
 
 static struct irq_chip sunxi_pinctrl_irq_chip = {
@@ -752,6 +808,8 @@
 		return -ENOMEM;
 	platform_set_drvdata(pdev, pctl);
 
+	spin_lock_init(&pctl->lock);
+
 	pctl->membase = of_iomap(node, 0);
 	if (!pctl->membase)
 		return -ENOMEM;
diff --git a/drivers/pinctrl/pinctrl-sunxi.h b/drivers/pinctrl/pinctrl-sunxi.h
index d68047d..01c494f 100644
--- a/drivers/pinctrl/pinctrl-sunxi.h
+++ b/drivers/pinctrl/pinctrl-sunxi.h
@@ -14,6 +14,7 @@
 #define __PINCTRL_SUNXI_H
 
 #include <linux/kernel.h>
+#include <linux/spinlock.h>
 
 #define PA_BASE	0
 #define PB_BASE	32
@@ -407,6 +408,7 @@
 	unsigned			ngroups;
 	int				irq;
 	int				irq_array[SUNXI_IRQ_NUMBER];
+	spinlock_t			lock;
 	struct pinctrl_dev		*pctl_dev;
 };
 
diff --git a/drivers/pinctrl/sh-pfc/pfc-sh73a0.c b/drivers/pinctrl/sh-pfc/pfc-sh73a0.c
index 7956df5..31f7d0e 100644
--- a/drivers/pinctrl/sh-pfc/pfc-sh73a0.c
+++ b/drivers/pinctrl/sh-pfc/pfc-sh73a0.c
@@ -3785,6 +3785,7 @@
 
 static struct regulator_consumer_supply sh73a0_vccq_mc0_consumers[] = {
 	REGULATOR_SUPPLY("vqmmc", "sh_mobile_sdhi.0"),
+	REGULATOR_SUPPLY("vqmmc", "ee100000.sdhi"),
 };
 
 static const struct regulator_init_data sh73a0_vccq_mc0_init_data = {
diff --git a/drivers/pinctrl/sirf/pinctrl-atlas6.c b/drivers/pinctrl/sirf/pinctrl-atlas6.c
index 1fa39a4..867c968 100644
--- a/drivers/pinctrl/sirf/pinctrl-atlas6.c
+++ b/drivers/pinctrl/sirf/pinctrl-atlas6.c
@@ -496,7 +496,7 @@
 static const struct sirfsoc_muxmask usp0_muxmask[] = {
 	{
 		.group = 1,
-		.mask = BIT(19) | BIT(20) | BIT(21) | BIT(22),
+		.mask = BIT(19) | BIT(20) | BIT(21) | BIT(22) | BIT(23),
 	},
 };
 
@@ -507,8 +507,21 @@
 	.funcval = 0,
 };
 
-static const unsigned usp0_pins[] = { 51, 52, 53, 54 };
+static const unsigned usp0_pins[] = { 51, 52, 53, 54, 55 };
 
+static const struct sirfsoc_muxmask usp0_uart_nostreamctrl_muxmask[] = {
+	{
+		.group = 1,
+		.mask = BIT(20) | BIT(21),
+	},
+};
+
+static const struct sirfsoc_padmux usp0_uart_nostreamctrl_padmux = {
+	.muxmask_counts = ARRAY_SIZE(usp0_uart_nostreamctrl_muxmask),
+	.muxmask = usp0_uart_nostreamctrl_muxmask,
+};
+
+static const unsigned usp0_uart_nostreamctrl_pins[] = { 52, 53 };
 static const struct sirfsoc_muxmask usp1_muxmask[] = {
 	{
 		.group = 0,
@@ -822,6 +835,8 @@
 	SIRFSOC_PIN_GROUP("uart2grp", uart2_pins),
 	SIRFSOC_PIN_GROUP("uart2_nostreamctrlgrp", uart2_nostreamctrl_pins),
 	SIRFSOC_PIN_GROUP("usp0grp", usp0_pins),
+	SIRFSOC_PIN_GROUP("usp0_uart_nostreamctrl_grp",
+					usp0_uart_nostreamctrl_pins),
 	SIRFSOC_PIN_GROUP("usp1grp", usp1_pins),
 	SIRFSOC_PIN_GROUP("i2c0grp", i2c0_pins),
 	SIRFSOC_PIN_GROUP("i2c1grp", i2c1_pins),
@@ -862,6 +877,8 @@
 static const char * const uart1grp[] = { "uart1grp" };
 static const char * const uart2grp[] = { "uart2grp" };
 static const char * const uart2_nostreamctrlgrp[] = { "uart2_nostreamctrlgrp" };
+static const char * const usp0_uart_nostreamctrl_grp[] = {
+					"usp0_uart_nostreamctrl_grp" };
 static const char * const usp0grp[] = { "usp0grp" };
 static const char * const usp1grp[] = { "usp1grp" };
 static const char * const i2c0grp[] = { "i2c0grp" };
@@ -904,6 +921,9 @@
 	SIRFSOC_PMX_FUNCTION("uart2", uart2grp, uart2_padmux),
 	SIRFSOC_PMX_FUNCTION("uart2_nostreamctrl", uart2_nostreamctrlgrp, uart2_nostreamctrl_padmux),
 	SIRFSOC_PMX_FUNCTION("usp0", usp0grp, usp0_padmux),
+	SIRFSOC_PMX_FUNCTION("usp0_uart_nostreamctrl",
+						usp0_uart_nostreamctrl_grp,
+						usp0_uart_nostreamctrl_padmux),
 	SIRFSOC_PMX_FUNCTION("usp1", usp1grp, usp1_padmux),
 	SIRFSOC_PMX_FUNCTION("i2c0", i2c0grp, i2c0_padmux),
 	SIRFSOC_PMX_FUNCTION("i2c1", i2c1grp, i2c1_padmux),
diff --git a/drivers/platform/olpc/olpc-ec.c b/drivers/platform/olpc/olpc-ec.c
index 0f9f859..f911952 100644
--- a/drivers/platform/olpc/olpc-ec.c
+++ b/drivers/platform/olpc/olpc-ec.c
@@ -330,7 +330,7 @@
 	return platform_driver_register(&olpc_ec_plat_driver);
 }
 
-module_init(olpc_ec_init_module);
+arch_initcall(olpc_ec_init_module);
 
 MODULE_AUTHOR("Andres Salomon <dilinger@queued.net>");
 MODULE_LICENSE("GPL");
diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c
index 97bb05e..d6970f4 100644
--- a/drivers/platform/x86/hp-wmi.c
+++ b/drivers/platform/x86/hp-wmi.c
@@ -53,7 +53,6 @@
 #define HPWMI_ALS_QUERY 0x3
 #define HPWMI_HARDWARE_QUERY 0x4
 #define HPWMI_WIRELESS_QUERY 0x5
-#define HPWMI_BIOS_QUERY 0x9
 #define HPWMI_HOTKEY_QUERY 0xc
 #define HPWMI_WIRELESS2_QUERY 0x1b
 #define HPWMI_POSTCODEERROR_QUERY 0x2a
@@ -293,19 +292,6 @@
 	return (state & 0x4) ? 1 : 0;
 }
 
-static int hp_wmi_enable_hotkeys(void)
-{
-	int ret;
-	int query = 0x6e;
-
-	ret = hp_wmi_perform_query(HPWMI_BIOS_QUERY, 1, &query, sizeof(query),
-				   0);
-
-	if (ret)
-		return -EINVAL;
-	return 0;
-}
-
 static int hp_wmi_set_block(void *data, bool blocked)
 {
 	enum hp_wmi_radio r = (enum hp_wmi_radio) data;
@@ -1009,8 +995,6 @@
 		err = hp_wmi_input_setup();
 		if (err)
 			return err;
-
-		hp_wmi_enable_hotkeys();
 	}
 
 	if (bios_capable) {
diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index 2ac045f..3a1b6bf 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -2440,7 +2440,10 @@
 	if (pos < 0)
 		return pos;
 
-	return snprintf(buffer, PAGE_SIZE, "%s\n", pos ? "speed" : "stamina");
+	return snprintf(buffer, PAGE_SIZE, "%s\n",
+					pos == SPEED ? "speed" :
+					pos == STAMINA ? "stamina" :
+					pos == AUTO ? "auto" : "unknown");
 }
 
 static int sony_nc_gfx_switch_setup(struct platform_device *pd,
@@ -4320,7 +4323,8 @@
 		goto err_free_resources;
 	}
 
-	if (sonypi_compat_init())
+	result = sonypi_compat_init();
+	if (result)
 		goto err_remove_input;
 
 	/* request io port */
diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index f4f30af..2e8a20c 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -1715,11 +1715,13 @@
 		    (mport_id == RIO_MPORT_ANY && port->nscan == scan_ops))
 			port->nscan = NULL;
 
-	list_for_each_entry(scan, &rio_scans, node)
+	list_for_each_entry(scan, &rio_scans, node) {
 		if (scan->mport_id == mport_id) {
 			list_del(&scan->node);
 			kfree(scan);
+			break;
 		}
+	}
 
 	mutex_unlock(&rio_mport_list_lock);
 
diff --git a/drivers/rtc/rtc-stmp3xxx.c b/drivers/rtc/rtc-stmp3xxx.c
index 767fee2..2601953 100644
--- a/drivers/rtc/rtc-stmp3xxx.c
+++ b/drivers/rtc/rtc-stmp3xxx.c
@@ -23,6 +23,7 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/interrupt.h>
+#include <linux/delay.h>
 #include <linux/rtc.h>
 #include <linux/slab.h>
 #include <linux/of_device.h>
@@ -119,24 +120,39 @@
 }
 #endif /* CONFIG_STMP3XXX_RTC_WATCHDOG */
 
-static void stmp3xxx_wait_time(struct stmp3xxx_rtc_data *rtc_data)
+static int stmp3xxx_wait_time(struct stmp3xxx_rtc_data *rtc_data)
 {
+	int timeout = 5000; /* 3ms according to i.MX28 Ref Manual */
 	/*
-	 * The datasheet doesn't say which way round the
-	 * NEW_REGS/STALE_REGS bitfields go. In fact it's 0x1=P0,
-	 * 0x2=P1, .., 0x20=P5, 0x40=ALARM, 0x80=SECONDS
+	 * The i.MX28 Applications Processor Reference Manual, Rev. 1, 2010
+	 * states:
+	 * | The order in which registers are updated is
+	 * | Persistent 0, 1, 2, 3, 4, 5, Alarm, Seconds.
+	 * | (This list is in bitfield order, from LSB to MSB, as they would
+	 * | appear in the STALE_REGS and NEW_REGS bitfields of the HW_RTC_STAT
+	 * | register. For example, the Seconds register corresponds to
+	 * | STALE_REGS or NEW_REGS containing 0x80.)
 	 */
-	while (readl(rtc_data->io + STMP3XXX_RTC_STAT) &
-			(0x80 << STMP3XXX_RTC_STAT_STALE_SHIFT))
-		cpu_relax();
+	do {
+		if (!(readl(rtc_data->io + STMP3XXX_RTC_STAT) &
+				(0x80 << STMP3XXX_RTC_STAT_STALE_SHIFT)))
+			return 0;
+		udelay(1);
+	} while (--timeout > 0);
+	return (readl(rtc_data->io + STMP3XXX_RTC_STAT) &
+		(0x80 << STMP3XXX_RTC_STAT_STALE_SHIFT)) ? -ETIME : 0;
 }
 
 /* Time read/write */
 static int stmp3xxx_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
 {
+	int ret;
 	struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev);
 
-	stmp3xxx_wait_time(rtc_data);
+	ret = stmp3xxx_wait_time(rtc_data);
+	if (ret)
+		return ret;
+
 	rtc_time_to_tm(readl(rtc_data->io + STMP3XXX_RTC_SECONDS), rtc_tm);
 	return 0;
 }
@@ -146,8 +162,7 @@
 	struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev);
 
 	writel(t, rtc_data->io + STMP3XXX_RTC_SECONDS);
-	stmp3xxx_wait_time(rtc_data);
-	return 0;
+	return stmp3xxx_wait_time(rtc_data);
 }
 
 /* interrupt(s) handler */
diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c
index 02faf3c..c2e80d7 100644
--- a/drivers/rtc/rtc-twl.c
+++ b/drivers/rtc/rtc-twl.c
@@ -524,6 +524,8 @@
 	if (ret < 0)
 		goto out1;
 
+	device_init_wakeup(&pdev->dev, 1);
+
 	rtc = rtc_device_register(pdev->name,
 				  &pdev->dev, &twl_rtc_ops, THIS_MODULE);
 	if (IS_ERR(rtc)) {
@@ -542,7 +544,6 @@
 	}
 
 	platform_set_drvdata(pdev, rtc);
-	device_init_wakeup(&pdev->dev, 1);
 	return 0;
 
 out2:
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 17150a7..451bf99 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -2392,6 +2392,12 @@
 		rc = cqr->intrc;
 	else
 		rc = -EIO;
+
+	/* kick tasklets */
+	dasd_schedule_device_bh(device);
+	if (device->block)
+		dasd_schedule_block_bh(device->block);
+
 	return rc;
 }
 
diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c
index 1d4c8fe..c82fe65 100644
--- a/drivers/s390/scsi/zfcp_erp.c
+++ b/drivers/s390/scsi/zfcp_erp.c
@@ -102,10 +102,13 @@
 
 	if (atomic_read(&port->status) & ZFCP_STATUS_COMMON_ERP_INUSE)
 		zfcp_erp_action_dismiss(&port->erp_action);
-	else
-		shost_for_each_device(sdev, port->adapter->scsi_host)
+	else {
+		spin_lock(port->adapter->scsi_host->host_lock);
+		__shost_for_each_device(sdev, port->adapter->scsi_host)
 			if (sdev_to_zfcp(sdev)->port == port)
 				zfcp_erp_action_dismiss_lun(sdev);
+		spin_unlock(port->adapter->scsi_host->host_lock);
+	}
 }
 
 static void zfcp_erp_action_dismiss_adapter(struct zfcp_adapter *adapter)
@@ -592,9 +595,11 @@
 {
 	struct scsi_device *sdev;
 
-	shost_for_each_device(sdev, port->adapter->scsi_host)
+	spin_lock(port->adapter->scsi_host->host_lock);
+	__shost_for_each_device(sdev, port->adapter->scsi_host)
 		if (sdev_to_zfcp(sdev)->port == port)
 			_zfcp_erp_lun_reopen(sdev, clear, id, 0);
+	spin_unlock(port->adapter->scsi_host->host_lock);
 }
 
 static void zfcp_erp_strategy_followup_failed(struct zfcp_erp_action *act)
@@ -1434,8 +1439,10 @@
 		atomic_set_mask(common_mask, &port->status);
 	read_unlock_irqrestore(&adapter->port_list_lock, flags);
 
-	shost_for_each_device(sdev, adapter->scsi_host)
+	spin_lock_irqsave(adapter->scsi_host->host_lock, flags);
+	__shost_for_each_device(sdev, adapter->scsi_host)
 		atomic_set_mask(common_mask, &sdev_to_zfcp(sdev)->status);
+	spin_unlock_irqrestore(adapter->scsi_host->host_lock, flags);
 }
 
 /**
@@ -1469,11 +1476,13 @@
 	}
 	read_unlock_irqrestore(&adapter->port_list_lock, flags);
 
-	shost_for_each_device(sdev, adapter->scsi_host) {
+	spin_lock_irqsave(adapter->scsi_host->host_lock, flags);
+	__shost_for_each_device(sdev, adapter->scsi_host) {
 		atomic_clear_mask(common_mask, &sdev_to_zfcp(sdev)->status);
 		if (clear_counter)
 			atomic_set(&sdev_to_zfcp(sdev)->erp_counter, 0);
 	}
+	spin_unlock_irqrestore(adapter->scsi_host->host_lock, flags);
 }
 
 /**
@@ -1487,16 +1496,19 @@
 {
 	struct scsi_device *sdev;
 	u32 common_mask = mask & ZFCP_COMMON_FLAGS;
+	unsigned long flags;
 
 	atomic_set_mask(mask, &port->status);
 
 	if (!common_mask)
 		return;
 
-	shost_for_each_device(sdev, port->adapter->scsi_host)
+	spin_lock_irqsave(port->adapter->scsi_host->host_lock, flags);
+	__shost_for_each_device(sdev, port->adapter->scsi_host)
 		if (sdev_to_zfcp(sdev)->port == port)
 			atomic_set_mask(common_mask,
 					&sdev_to_zfcp(sdev)->status);
+	spin_unlock_irqrestore(port->adapter->scsi_host->host_lock, flags);
 }
 
 /**
@@ -1511,6 +1523,7 @@
 	struct scsi_device *sdev;
 	u32 common_mask = mask & ZFCP_COMMON_FLAGS;
 	u32 clear_counter = mask & ZFCP_STATUS_COMMON_ERP_FAILED;
+	unsigned long flags;
 
 	atomic_clear_mask(mask, &port->status);
 
@@ -1520,13 +1533,15 @@
 	if (clear_counter)
 		atomic_set(&port->erp_counter, 0);
 
-	shost_for_each_device(sdev, port->adapter->scsi_host)
+	spin_lock_irqsave(port->adapter->scsi_host->host_lock, flags);
+	__shost_for_each_device(sdev, port->adapter->scsi_host)
 		if (sdev_to_zfcp(sdev)->port == port) {
 			atomic_clear_mask(common_mask,
 					  &sdev_to_zfcp(sdev)->status);
 			if (clear_counter)
 				atomic_set(&sdev_to_zfcp(sdev)->erp_counter, 0);
 		}
+	spin_unlock_irqrestore(port->adapter->scsi_host->host_lock, flags);
 }
 
 /**
diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c
index 665e3cf..de0598e 100644
--- a/drivers/s390/scsi/zfcp_qdio.c
+++ b/drivers/s390/scsi/zfcp_qdio.c
@@ -224,11 +224,9 @@
 
 static int zfcp_qdio_sbal_check(struct zfcp_qdio *qdio)
 {
-	spin_lock_irq(&qdio->req_q_lock);
 	if (atomic_read(&qdio->req_q_free) ||
 	    !(atomic_read(&qdio->adapter->status) & ZFCP_STATUS_ADAPTER_QDIOUP))
 		return 1;
-	spin_unlock_irq(&qdio->req_q_lock);
 	return 0;
 }
 
@@ -246,9 +244,8 @@
 {
 	long ret;
 
-	spin_unlock_irq(&qdio->req_q_lock);
-	ret = wait_event_interruptible_timeout(qdio->req_q_wq,
-			       zfcp_qdio_sbal_check(qdio), 5 * HZ);
+	ret = wait_event_interruptible_lock_irq_timeout(qdio->req_q_wq,
+		       zfcp_qdio_sbal_check(qdio), qdio->req_q_lock, 5 * HZ);
 
 	if (!(atomic_read(&qdio->adapter->status) & ZFCP_STATUS_ADAPTER_QDIOUP))
 		return -EIO;
@@ -262,7 +259,6 @@
 		zfcp_erp_adapter_reopen(qdio->adapter, 0, "qdsbg_1");
 	}
 
-	spin_lock_irq(&qdio->req_q_lock);
 	return -EIO;
 }
 
diff --git a/drivers/s390/scsi/zfcp_sysfs.c b/drivers/s390/scsi/zfcp_sysfs.c
index 3f01bbf..8906392 100644
--- a/drivers/s390/scsi/zfcp_sysfs.c
+++ b/drivers/s390/scsi/zfcp_sysfs.c
@@ -27,6 +27,16 @@
 static ZFCP_DEV_ATTR(_feat, _name, S_IRUGO,				       \
 		     zfcp_sysfs_##_feat##_##_name##_show, NULL);
 
+#define ZFCP_DEFINE_ATTR_CONST(_feat, _name, _format, _value)		       \
+static ssize_t zfcp_sysfs_##_feat##_##_name##_show(struct device *dev,	       \
+						   struct device_attribute *at,\
+						   char *buf)		       \
+{									       \
+	return sprintf(buf, _format, _value);				       \
+}									       \
+static ZFCP_DEV_ATTR(_feat, _name, S_IRUGO,				       \
+		     zfcp_sysfs_##_feat##_##_name##_show, NULL);
+
 #define ZFCP_DEFINE_A_ATTR(_name, _format, _value)			     \
 static ssize_t zfcp_sysfs_adapter_##_name##_show(struct device *dev,	     \
 						 struct device_attribute *at,\
@@ -75,6 +85,8 @@
 ZFCP_DEFINE_ATTR(zfcp_unit, unit, access_denied, "%d\n",
 		 (zfcp_unit_sdev_status(unit) &
 		  ZFCP_STATUS_COMMON_ACCESS_DENIED) != 0);
+ZFCP_DEFINE_ATTR_CONST(unit, access_shared, "%d\n", 0);
+ZFCP_DEFINE_ATTR_CONST(unit, access_readonly, "%d\n", 0);
 
 static ssize_t zfcp_sysfs_port_failed_show(struct device *dev,
 					   struct device_attribute *attr,
@@ -347,6 +359,8 @@
 	&dev_attr_unit_in_recovery.attr,
 	&dev_attr_unit_status.attr,
 	&dev_attr_unit_access_denied.attr,
+	&dev_attr_unit_access_shared.attr,
+	&dev_attr_unit_access_readonly.attr,
 	NULL
 };
 static struct attribute_group zfcp_unit_attr_group = {
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 48b2918..92ff027 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1353,7 +1353,6 @@
 	tristate "Emulex LightPulse Fibre Channel Support"
 	depends on PCI && SCSI
 	select SCSI_FC_ATTRS
-	select GENERIC_CSUM
 	select CRC_T10DIF
 	help
           This lpfc driver supports the Emulex LightPulse
diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h
index b6d1f92..c18c681 100644
--- a/drivers/scsi/fnic/fnic.h
+++ b/drivers/scsi/fnic/fnic.h
@@ -38,7 +38,7 @@
 
 #define DRV_NAME		"fnic"
 #define DRV_DESCRIPTION		"Cisco FCoE HBA Driver"
-#define DRV_VERSION		"1.5.0.22"
+#define DRV_VERSION		"1.5.0.23"
 #define PFX			DRV_NAME ": "
 #define DFX                     DRV_NAME "%d: "
 
diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c
index 5f09d18..42e15ee 100644
--- a/drivers/scsi/fnic/fnic_main.c
+++ b/drivers/scsi/fnic/fnic_main.c
@@ -642,19 +642,6 @@
 		INIT_WORK(&fnic->fip_frame_work, fnic_handle_fip_frame);
 		INIT_WORK(&fnic->event_work, fnic_handle_event);
 		skb_queue_head_init(&fnic->fip_frame_queue);
-		spin_lock_irqsave(&fnic_list_lock, flags);
-		if (!fnic_fip_queue) {
-			fnic_fip_queue =
-				create_singlethread_workqueue("fnic_fip_q");
-			if (!fnic_fip_queue) {
-				spin_unlock_irqrestore(&fnic_list_lock, flags);
-				printk(KERN_ERR PFX "fnic FIP work queue "
-						 "create failed\n");
-				err = -ENOMEM;
-				goto err_out_free_max_pool;
-			}
-		}
-		spin_unlock_irqrestore(&fnic_list_lock, flags);
 		INIT_LIST_HEAD(&fnic->evlist);
 		INIT_LIST_HEAD(&fnic->vlans);
 	} else {
@@ -960,6 +947,13 @@
 	spin_lock_init(&fnic_list_lock);
 	INIT_LIST_HEAD(&fnic_list);
 
+	fnic_fip_queue = create_singlethread_workqueue("fnic_fip_q");
+	if (!fnic_fip_queue) {
+		printk(KERN_ERR PFX "fnic FIP work queue create failed\n");
+		err = -ENOMEM;
+		goto err_create_fip_workq;
+	}
+
 	fnic_fc_transport = fc_attach_transport(&fnic_fc_functions);
 	if (!fnic_fc_transport) {
 		printk(KERN_ERR PFX "fc_attach_transport error\n");
@@ -978,6 +972,8 @@
 err_pci_register:
 	fc_release_transport(fnic_fc_transport);
 err_fc_transport:
+	destroy_workqueue(fnic_fip_queue);
+err_create_fip_workq:
 	destroy_workqueue(fnic_event_queue);
 err_create_fnic_workq:
 	kmem_cache_destroy(fnic_io_req_cache);
diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c
index 7b08215..99d2930 100644
--- a/drivers/scsi/isci/request.c
+++ b/drivers/scsi/isci/request.c
@@ -185,7 +185,7 @@
 	cmd_iu->_r_c = 0;
 
 	sci_swab32_cpy(&cmd_iu->cdb, task->ssp_task.cmd->cmnd,
-		       task->ssp_task.cmd->cmd_len / sizeof(u32));
+		       (task->ssp_task.cmd->cmd_len+3) / sizeof(u32));
 }
 
 static void sci_task_request_build_ssp_task_iu(struct isci_request *ireq)
diff --git a/drivers/scsi/isci/task.c b/drivers/scsi/isci/task.c
index 9bb020a..0d30ca8 100644
--- a/drivers/scsi/isci/task.c
+++ b/drivers/scsi/isci/task.c
@@ -491,6 +491,7 @@
 	struct isci_tmf           tmf;
 	int                       ret = TMF_RESP_FUNC_FAILED;
 	unsigned long             flags;
+	int                       target_done_already = 0;
 
 	/* Get the isci_request reference from the task.  Note that
 	 * this check does not depend on the pending request list
@@ -505,9 +506,11 @@
 	/* If task is already done, the request isn't valid */
 	if (!(task->task_state_flags & SAS_TASK_STATE_DONE) &&
 	    (task->task_state_flags & SAS_TASK_AT_INITIATOR) &&
-	    old_request)
+	    old_request) {
 		idev = isci_get_device(task->dev->lldd_dev);
-
+		target_done_already = test_bit(IREQ_COMPLETE_IN_TARGET,
+					       &old_request->flags);
+	}
 	spin_unlock(&task->task_state_lock);
 	spin_unlock_irqrestore(&ihost->scic_lock, flags);
 
@@ -561,7 +564,7 @@
 
 	if (task->task_proto == SAS_PROTOCOL_SMP ||
 	    sas_protocol_ata(task->task_proto) ||
-	    test_bit(IREQ_COMPLETE_IN_TARGET, &old_request->flags) ||
+	    target_done_already ||
 	    test_bit(IDEV_GONE, &idev->flags)) {
 
 		spin_unlock_irqrestore(&ihost->scic_lock, flags);
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 0177295..1f0ca68 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -3547,11 +3547,21 @@
 		break;
 	}
 
-	/*
-	 * We expect the FW state to be READY
-	 */
-	if (megasas_transition_to_ready(instance, 0))
-		goto fail_ready_state;
+	if (megasas_transition_to_ready(instance, 0)) {
+		atomic_set(&instance->fw_reset_no_pci_access, 1);
+		instance->instancet->adp_reset
+			(instance, instance->reg_set);
+		atomic_set(&instance->fw_reset_no_pci_access, 0);
+		dev_info(&instance->pdev->dev,
+			"megasas: FW restarted successfully from %s!\n",
+			__func__);
+
+		/*waitting for about 30 second before retry*/
+		ssleep(30);
+
+		if (megasas_transition_to_ready(instance, 0))
+			goto fail_ready_state;
+	}
 
 	/*
 	 * MSI-X host index 0 is common for all adapter.
diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c
index f14665a..6b1b4e9 100644
--- a/drivers/scsi/mvsas/mv_sas.c
+++ b/drivers/scsi/mvsas/mv_sas.c
@@ -1857,11 +1857,16 @@
 		goto out;
 	}
 
-	/* error info record present */
-	if (unlikely((rx_desc & RXQ_ERR) && (*(u64 *) slot->response))) {
+	/*
+	 * error info record present; slot->response is 32 bit aligned but may
+	 * not be 64 bit aligned, so check for zero in two 32 bit reads
+	 */
+	if (unlikely((rx_desc & RXQ_ERR)
+		     && (*((u32 *)slot->response)
+			 || *(((u32 *)slot->response) + 1)))) {
 		mv_dprintk("port %d slot %d rx_desc %X has error info"
 			"%016llX.\n", slot->port->sas_port.id, slot_idx,
-			 rx_desc, (u64)(*(u64 *)slot->response));
+			 rx_desc, get_unaligned_le64(slot->response));
 		tstat->stat = mvs_slot_err(mvi, task, slot_idx);
 		tstat->resp = SAS_TASK_COMPLETE;
 		goto out;
diff --git a/drivers/scsi/mvsas/mv_sas.h b/drivers/scsi/mvsas/mv_sas.h
index 60e2fb7..d6b19dc 100644
--- a/drivers/scsi/mvsas/mv_sas.h
+++ b/drivers/scsi/mvsas/mv_sas.h
@@ -39,6 +39,7 @@
 #include <linux/irq.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <asm/unaligned.h>
 #include <scsi/libsas.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_tcq.h>
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index 42ef481..ef0a548 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -419,6 +419,8 @@
 			    __constant_cpu_to_le16(CF_SIMPLE_TAG);
 			break;
 		}
+	} else {
+		cmd_pkt->control_flags = __constant_cpu_to_le16(CF_SIMPLE_TAG);
 	}
 
 	/* Load SCSI command packet. */
@@ -1307,11 +1309,11 @@
 		    fcp_cmnd->task_attribute = TSK_ORDERED;
 		    break;
 		default:
-		    fcp_cmnd->task_attribute = 0;
+		    fcp_cmnd->task_attribute = TSK_SIMPLE;
 		    break;
 		}
 	} else {
-		fcp_cmnd->task_attribute = 0;
+		fcp_cmnd->task_attribute = TSK_SIMPLE;
 	}
 
 	cmd_pkt->fcp_rsp_dseg_len = 0; /* Let response come in status iocb */
@@ -1525,7 +1527,12 @@
 		case ORDERED_QUEUE_TAG:
 			cmd_pkt->task = TSK_ORDERED;
 			break;
+		default:
+		    cmd_pkt->task = TSK_SIMPLE;
+		    break;
 		}
+	} else {
+		cmd_pkt->task = TSK_SIMPLE;
 	}
 
 	/* Load SCSI command packet. */
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 3b1ea34..eaa808e 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -1031,6 +1031,9 @@
 {
 	int i, result;
 
+	if (sdev->skip_vpd_pages)
+		goto fail;
+
 	/* Ask for all the pages supported by this device */
 	result = scsi_vpd_inquiry(sdev, buf, 0, buf_len);
 	if (result)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 80f39b8..86fcf2c 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -838,10 +838,17 @@
 
 static void sd_unprep_fn(struct request_queue *q, struct request *rq)
 {
+	struct scsi_cmnd *SCpnt = rq->special;
+
 	if (rq->cmd_flags & REQ_DISCARD) {
 		free_page((unsigned long)rq->buffer);
 		rq->buffer = NULL;
 	}
+	if (SCpnt->cmnd != rq->cmd) {
+		mempool_free(SCpnt->cmnd, sd_cdb_pool);
+		SCpnt->cmnd = NULL;
+		SCpnt->cmd_len = 0;
+	}
 }
 
 /**
@@ -1720,21 +1727,6 @@
 	if (rq_data_dir(SCpnt->request) == READ && scsi_prot_sg_count(SCpnt))
 		sd_dif_complete(SCpnt, good_bytes);
 
-	if (scsi_host_dif_capable(sdkp->device->host, sdkp->protection_type)
-	    == SD_DIF_TYPE2_PROTECTION && SCpnt->cmnd != SCpnt->request->cmd) {
-
-		/* We have to print a failed command here as the
-		 * extended CDB gets freed before scsi_io_completion()
-		 * is called.
-		 */
-		if (result)
-			scsi_print_command(SCpnt);
-
-		mempool_free(SCpnt->cmnd, sd_cdb_pool);
-		SCpnt->cmnd = NULL;
-		SCpnt->cmd_len = 0;
-	}
-
 	return good_bytes;
 }
 
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 2168258..74b88ef 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -751,7 +751,7 @@
 
 		vscsi->affinity_hint_set = true;
 	} else {
-		for (i = 0; i < vscsi->num_queues - VIRTIO_SCSI_VQ_BASE; i++)
+		for (i = 0; i < vscsi->num_queues; i++)
 			virtqueue_set_affinity(vscsi->req_vqs[i].vq, -1);
 
 		vscsi->affinity_hint_set = false;
diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index 222d3e3..707966b 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -609,7 +609,7 @@
 		else
 			buf = (void *)t->tx_buf;
 		t->tx_dma = dma_map_single(&spi->dev, buf,
-				t->len, DMA_FROM_DEVICE);
+				t->len, DMA_TO_DEVICE);
 		if (!t->tx_dma) {
 			ret = -EFAULT;
 			goto err_tx_map;
diff --git a/drivers/staging/android/logger.c b/drivers/staging/android/logger.c
index 080abf2..a8c3444 100644
--- a/drivers/staging/android/logger.c
+++ b/drivers/staging/android/logger.c
@@ -469,7 +469,7 @@
 			 unsigned long nr_segs, loff_t ppos)
 {
 	struct logger_log *log = file_get_log(iocb->ki_filp);
-	size_t orig = log->w_off;
+	size_t orig;
 	struct logger_entry header;
 	struct timespec now;
 	ssize_t ret = 0;
@@ -490,6 +490,8 @@
 
 	mutex_lock(&log->mutex);
 
+	orig = log->w_off;
+
 	/*
 	 * Fix up any readers, pulling them forward to the first readable
 	 * entry after (what will be) the new write offset. We do this now
diff --git a/drivers/staging/comedi/TODO b/drivers/staging/comedi/TODO
index b10f739..fa8da9a 100644
--- a/drivers/staging/comedi/TODO
+++ b/drivers/staging/comedi/TODO
@@ -9,4 +9,4 @@
 Please send patches to Greg Kroah-Hartman <greg@kroah.com> and
 copy:
 	Ian Abbott <abbotti@mev.co.uk>
-	Frank Mori Hess <fmhess@users.sourceforge.net>
+	H Hartley Sweeten <hsweeten@visionengravers.com>
diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c
index 8647518..f4a197b 100644
--- a/drivers/staging/comedi/comedi_fops.c
+++ b/drivers/staging/comedi/comedi_fops.c
@@ -1413,22 +1413,19 @@
 		DPRINTK("subdevice busy\n");
 		return -EBUSY;
 	}
-	s->busy = file;
 
 	/* make sure channel/gain list isn't too long */
 	if (cmd.chanlist_len > s->len_chanlist) {
 		DPRINTK("channel/gain list too long %u > %d\n",
 			cmd.chanlist_len, s->len_chanlist);
-		ret = -EINVAL;
-		goto cleanup;
+		return -EINVAL;
 	}
 
 	/* make sure channel/gain list isn't too short */
 	if (cmd.chanlist_len < 1) {
 		DPRINTK("channel/gain list too short %u < 1\n",
 			cmd.chanlist_len);
-		ret = -EINVAL;
-		goto cleanup;
+		return -EINVAL;
 	}
 
 	async->cmd = cmd;
@@ -1438,8 +1435,7 @@
 	    kmalloc(async->cmd.chanlist_len * sizeof(int), GFP_KERNEL);
 	if (!async->cmd.chanlist) {
 		DPRINTK("allocation failed\n");
-		ret = -ENOMEM;
-		goto cleanup;
+		return -ENOMEM;
 	}
 
 	if (copy_from_user(async->cmd.chanlist, user_chanlist,
@@ -1491,6 +1487,9 @@
 
 	comedi_set_subdevice_runflags(s, ~0, SRF_USER | SRF_RUNNING);
 
+	/* set s->busy _after_ setting SRF_RUNNING flag to avoid race with
+	 * comedi_read() or comedi_write() */
+	s->busy = file;
 	ret = s->do_cmd(dev, s);
 	if (ret == 0)
 		return 0;
@@ -1705,6 +1704,7 @@
 			   void *file)
 {
 	struct comedi_subdevice *s;
+	int ret;
 
 	if (arg >= dev->n_subdevices)
 		return -EINVAL;
@@ -1721,7 +1721,11 @@
 	if (s->busy != file)
 		return -EBUSY;
 
-	return do_cancel(dev, s);
+	ret = do_cancel(dev, s);
+	if (comedi_get_subdevice_runflags(s) & SRF_USER)
+		wake_up_interruptible(&s->async->wait_head);
+
+	return ret;
 }
 
 /*
@@ -2053,11 +2057,13 @@
 
 		if (!comedi_is_subdevice_running(s)) {
 			if (count == 0) {
+				mutex_lock(&dev->mutex);
 				if (comedi_is_subdevice_in_error(s))
 					retval = -EPIPE;
 				else
 					retval = 0;
 				do_become_nonbusy(dev, s);
+				mutex_unlock(&dev->mutex);
 			}
 			break;
 		}
@@ -2156,11 +2162,13 @@
 
 		if (n == 0) {
 			if (!comedi_is_subdevice_running(s)) {
+				mutex_lock(&dev->mutex);
 				do_become_nonbusy(dev, s);
 				if (comedi_is_subdevice_in_error(s))
 					retval = -EPIPE;
 				else
 					retval = 0;
+				mutex_unlock(&dev->mutex);
 				break;
 			}
 			if (file->f_flags & O_NONBLOCK) {
@@ -2198,9 +2206,11 @@
 		buf += n;
 		break;		/* makes device work like a pipe */
 	}
-	if (comedi_is_subdevice_idle(s) &&
-	    async->buf_read_count - async->buf_write_count == 0) {
-		do_become_nonbusy(dev, s);
+	if (comedi_is_subdevice_idle(s)) {
+		mutex_lock(&dev->mutex);
+		if (async->buf_read_count - async->buf_write_count == 0)
+			do_become_nonbusy(dev, s);
+		mutex_unlock(&dev->mutex);
 	}
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&async->wait_head, &wait);
diff --git a/drivers/staging/comedi/drivers.c b/drivers/staging/comedi/drivers.c
index e25eba5..b3b5125 100644
--- a/drivers/staging/comedi/drivers.c
+++ b/drivers/staging/comedi/drivers.c
@@ -482,7 +482,7 @@
 		ret = comedi_device_postconfig(dev);
 	if (ret < 0) {
 		comedi_device_detach(dev);
-		module_put(dev->driver->module);
+		module_put(driv->module);
 	}
 	/* On success, the driver module count has been incremented. */
 	return ret;
diff --git a/drivers/staging/frontier/alphatrack.c b/drivers/staging/frontier/alphatrack.c
index 5590ebf..817f837 100644
--- a/drivers/staging/frontier/alphatrack.c
+++ b/drivers/staging/frontier/alphatrack.c
@@ -827,11 +827,11 @@
 		mutex_unlock(&dev->mtx);
 		usb_alphatrack_delete(dev);
 	} else {
+		atomic_set(&dev->writes_pending, 0);
 		dev->intf = NULL;
 		mutex_unlock(&dev->mtx);
 	}
 
-	atomic_set(&dev->writes_pending, 0);
 	mutex_unlock(&disconnect_mutex);
 
 	dev_info(&intf->dev, "Alphatrack Surface #%d now disconnected\n",
diff --git a/drivers/staging/gdm72xx/gdm_qos.c b/drivers/staging/gdm72xx/gdm_qos.c
index b795353..cc36924 100644
--- a/drivers/staging/gdm72xx/gdm_qos.c
+++ b/drivers/staging/gdm72xx/gdm_qos.c
@@ -250,8 +250,8 @@
 
 	list_for_each_entry_safe(entry, n, head, list) {
 		list_del(&entry->list);
-		free_qos_entry(entry);
 		gdm_wimax_send_tx(entry->skb, entry->dev);
+		free_qos_entry(entry);
 	}
 }
 
diff --git a/drivers/staging/imx-drm/Kconfig b/drivers/staging/imx-drm/Kconfig
index 2233905..bd0f2fd 100644
--- a/drivers/staging/imx-drm/Kconfig
+++ b/drivers/staging/imx-drm/Kconfig
@@ -33,7 +33,6 @@
 config DRM_IMX_LDB
 	tristate "Support for LVDS displays"
 	depends on DRM_IMX
-	select OF_VIDEOMODE
 	help
 	  Choose this to enable the internal LVDS Display Bridge (LDB)
 	  found on i.MX53 and i.MX6 processors.
diff --git a/drivers/staging/imx-drm/imx-drm-core.c b/drivers/staging/imx-drm/imx-drm-core.c
index 9854a1d..e826086 100644
--- a/drivers/staging/imx-drm/imx-drm-core.c
+++ b/drivers/staging/imx-drm/imx-drm-core.c
@@ -69,28 +69,20 @@
 	struct module				*owner;
 };
 
-static int imx_drm_driver_firstopen(struct drm_device *drm)
-{
-	if (!imx_drm_device_get())
-		return -EINVAL;
-
-	return 0;
-}
-
 static void imx_drm_driver_lastclose(struct drm_device *drm)
 {
 	struct imx_drm_device *imxdrm = drm->dev_private;
 
 	if (imxdrm->fbhelper)
 		drm_fbdev_cma_restore_mode(imxdrm->fbhelper);
-
-	imx_drm_device_put();
 }
 
 static int imx_drm_driver_unload(struct drm_device *drm)
 {
 	struct imx_drm_device *imxdrm = drm->dev_private;
 
+	imx_drm_device_put();
+
 	drm_mode_config_cleanup(imxdrm->drm);
 	drm_kms_helper_poll_fini(imxdrm->drm);
 
@@ -207,7 +199,6 @@
 	.unlocked_ioctl = drm_ioctl,
 	.mmap = drm_gem_cma_mmap,
 	.poll = drm_poll,
-	.fasync = drm_fasync,
 	.read = drm_read,
 	.llseek = noop_llseek,
 };
@@ -226,8 +217,6 @@
 	struct imx_drm_connector *con;
 	struct imx_drm_crtc *crtc;
 
-	mutex_lock(&imxdrm->mutex);
-
 	list_for_each_entry(enc, &imxdrm->encoder_list, list) {
 		if (!try_module_get(enc->owner)) {
 			dev_err(imxdrm->dev, "could not get module %s\n",
@@ -254,8 +243,6 @@
 
 	imxdrm->references++;
 
-	mutex_unlock(&imxdrm->mutex);
-
 	return imxdrm->drm;
 
 unwind_crtc:
@@ -447,6 +434,9 @@
 	 */
 	imxdrm->drm->vblank_disable_allowed = 1;
 
+	if (!imx_drm_device_get())
+		ret = -EINVAL;
+
 	ret = 0;
 
 err_init:
@@ -783,7 +773,7 @@
 }
 EXPORT_SYMBOL_GPL(imx_drm_remove_connector);
 
-static struct drm_ioctl_desc imx_drm_ioctls[] = {
+static const struct drm_ioctl_desc imx_drm_ioctls[] = {
 	/* none so far */
 };
 
@@ -791,13 +781,12 @@
 	.driver_features	= DRIVER_MODESET | DRIVER_GEM,
 	.load			= imx_drm_driver_load,
 	.unload			= imx_drm_driver_unload,
-	.firstopen		= imx_drm_driver_firstopen,
 	.lastclose		= imx_drm_driver_lastclose,
 	.gem_free_object	= drm_gem_cma_free_object,
 	.gem_vm_ops		= &drm_gem_cma_vm_ops,
 	.dumb_create		= drm_gem_cma_dumb_create,
 	.dumb_map_offset	= drm_gem_cma_dumb_map_offset,
-	.dumb_destroy		= drm_gem_cma_dumb_destroy,
+	.dumb_destroy		= drm_gem_dumb_destroy,
 
 	.get_vblank_counter	= drm_vblank_count,
 	.enable_vblank		= imx_drm_enable_vblank,
diff --git a/drivers/staging/imx-drm/ipuv3-crtc.c b/drivers/staging/imx-drm/ipuv3-crtc.c
index 9176a81..e39690a 100644
--- a/drivers/staging/imx-drm/ipuv3-crtc.c
+++ b/drivers/staging/imx-drm/ipuv3-crtc.c
@@ -129,7 +129,8 @@
 
 static int ipu_page_flip(struct drm_crtc *crtc,
 		struct drm_framebuffer *fb,
-		struct drm_pending_vblank_event *event)
+		struct drm_pending_vblank_event *event,
+		uint32_t page_flip_flags)
 {
 	struct ipu_crtc *ipu_crtc = to_ipu_crtc(crtc);
 	int ret;
diff --git a/drivers/staging/tidspbridge/pmgr/dbll.c b/drivers/staging/tidspbridge/pmgr/dbll.c
index c191ae2..41e88ab 100644
--- a/drivers/staging/tidspbridge/pmgr/dbll.c
+++ b/drivers/staging/tidspbridge/pmgr/dbll.c
@@ -1120,8 +1120,11 @@
 	   or DYN_EXTERNAL, then mem granularity information is present
 	   within the section name - only process if there are at least three
 	   tokens within the section name (just a minor optimization) */
-	if (count >= 3)
-		strict_strtol(sz_last_token, 10, (long *)&req);
+	if (count >= 3) {
+		status = kstrtos32(sz_last_token, 10, &req);
+		if (status)
+			goto func_cont;
+	}
 
 	if ((req == 0) || (req == 1)) {
 		if (strcmp(sz_sec_last_token, "DYN_DARAM") == 0) {
diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c
index dcceed2..81972fa 100644
--- a/drivers/staging/zcache/zcache-main.c
+++ b/drivers/staging/zcache/zcache-main.c
@@ -1811,10 +1811,12 @@
 #else
 	if (*zcache_comp_name != '\0') {
 		ret = crypto_has_comp(zcache_comp_name, 0, 0);
-		if (!ret)
+		if (!ret) {
 			pr_info("zcache: %s not supported\n",
 					zcache_comp_name);
-		goto out;
+			ret = 1;
+			goto out;
+		}
 	}
 	if (!ret)
 		strcpy(zcache_comp_name, "lzo");
diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c
index 82c7202..e77fb6e 100644
--- a/drivers/staging/zram/zram_drv.c
+++ b/drivers/staging/zram/zram_drv.c
@@ -527,8 +527,11 @@
 	size_t index;
 	struct zram_meta *meta;
 
-	if (!zram->init_done)
+	down_write(&zram->init_lock);
+	if (!zram->init_done) {
+		up_write(&zram->init_lock);
 		return;
+	}
 
 	meta = zram->meta;
 	zram->init_done = 0;
@@ -549,6 +552,7 @@
 
 	zram->disksize = 0;
 	set_capacity(zram->disk, 0);
+	up_write(&zram->init_lock);
 }
 
 static void zram_init_device(struct zram *zram, struct zram_meta *meta)
diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c
index 5de56f6..f36950e 100644
--- a/drivers/thermal/x86_pkg_temp_thermal.c
+++ b/drivers/thermal/x86_pkg_temp_thermal.c
@@ -54,6 +54,8 @@
 * is some wrong values returned by cpuid for number of thresholds.
 */
 #define MAX_NUMBER_OF_TRIPS	2
+/* Limit number of package temp zones */
+#define MAX_PKG_TEMP_ZONE_IDS	256
 
 struct phy_dev_entry {
 	struct list_head list;
@@ -394,12 +396,16 @@
 	char buffer[30];
 	int thres_count;
 	u32 eax, ebx, ecx, edx;
+	u8 *temp;
 
 	cpuid(6, &eax, &ebx, &ecx, &edx);
 	thres_count = ebx & 0x07;
 	if (!thres_count)
 		return -ENODEV;
 
+	if (topology_physical_package_id(cpu) > MAX_PKG_TEMP_ZONE_IDS)
+		return -ENODEV;
+
 	thres_count = clamp_val(thres_count, 0, MAX_NUMBER_OF_TRIPS);
 
 	err = get_tj_max(cpu, &tj_max);
@@ -417,13 +423,14 @@
 	spin_lock(&pkg_work_lock);
 	if (topology_physical_package_id(cpu) > max_phy_id)
 		max_phy_id = topology_physical_package_id(cpu);
-	pkg_work_scheduled = krealloc(pkg_work_scheduled,
-				(max_phy_id+1) * sizeof(u8), GFP_ATOMIC);
-	if (!pkg_work_scheduled) {
+	temp = krealloc(pkg_work_scheduled,
+			(max_phy_id+1) * sizeof(u8), GFP_ATOMIC);
+	if (!temp) {
 		spin_unlock(&pkg_work_lock);
 		err = -ENOMEM;
 		goto err_ret_free;
 	}
+	pkg_work_scheduled = temp;
 	pkg_work_scheduled[topology_physical_package_id(cpu)] = 0;
 	spin_unlock(&pkg_work_lock);
 
@@ -511,7 +518,7 @@
 
 	/* Check if there is already an instance for this package */
 	if (!phdev) {
-		if (!cpu_has(c, X86_FEATURE_DTHERM) &&
+		if (!cpu_has(c, X86_FEATURE_DTHERM) ||
 					!cpu_has(c, X86_FEATURE_PTS))
 			return -ENODEV;
 		if (pkg_temp_thermal_device_add(cpu))
@@ -562,7 +569,7 @@
 };
 
 static const struct x86_cpu_id __initconst pkg_temp_thermal_ids[] = {
-	{ X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_DTHERM },
+	{ X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_PTS },
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, pkg_temp_thermal_ids);
@@ -592,7 +599,6 @@
 	return 0;
 
 err_ret:
-	get_online_cpus();
 	for_each_online_cpu(i)
 		put_core_offline(i);
 	put_online_cpus();
diff --git a/drivers/tty/serial/8250/8250_early.c b/drivers/tty/serial/8250/8250_early.c
index 721904f..946ddd2 100644
--- a/drivers/tty/serial/8250/8250_early.c
+++ b/drivers/tty/serial/8250/8250_early.c
@@ -193,7 +193,8 @@
 	if (options) {
 		options++;
 		device->baud = simple_strtoul(options, NULL, 0);
-		length = min(strcspn(options, " "), sizeof(device->options));
+		length = min(strcspn(options, " ") + 1,
+			     sizeof(device->options));
 		strlcpy(device->options, options, length);
 	} else {
 		device->baud = probe_baud(port);
diff --git a/drivers/tty/serial/8250/8250_gsc.c b/drivers/tty/serial/8250/8250_gsc.c
index bb91b47..2e3ea1a 100644
--- a/drivers/tty/serial/8250/8250_gsc.c
+++ b/drivers/tty/serial/8250/8250_gsc.c
@@ -31,9 +31,8 @@
 	int err;
 
 #ifdef CONFIG_64BIT
-	extern int iosapic_serial_irq(int cellnum);
 	if (!dev->irq && (dev->id.sversion == 0xad))
-		dev->irq = iosapic_serial_irq(dev->mod_index-1);
+		dev->irq = iosapic_serial_irq(dev);
 #endif
 
 	if (!dev->irq) {
diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 5e3d689..1456673 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -277,7 +277,7 @@
 	select SERIAL_CORE
 	help
 	  Support for the on-chip UARTs on the NVIDIA Tegra series SOCs
-	  providing /dev/ttyHS0, 1, 2, 3 and 4 (note, some machines may not
+	  providing /dev/ttyTHS0, 1, 2, 3 and 4 (note, some machines may not
 	  provide all of these ports, depending on how the serial port
 	  are enabled). This driver uses the APB DMA to achieve higher baudrate
 	  and better performance.
diff --git a/drivers/tty/serial/arc_uart.c b/drivers/tty/serial/arc_uart.c
index cbf1d15..22f280a 100644
--- a/drivers/tty/serial/arc_uart.c
+++ b/drivers/tty/serial/arc_uart.c
@@ -773,6 +773,6 @@
 module_exit(arc_serial_exit);
 
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("plat-arcfpga/uart");
+MODULE_ALIAS("platform:" DRIVER_NAME);
 MODULE_AUTHOR("Vineet Gupta");
 MODULE_DESCRIPTION("ARC(Synopsys) On-Chip(fpga) serial driver");
diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c
index 4f5f161..f85b8e6 100644
--- a/drivers/tty/serial/mxs-auart.c
+++ b/drivers/tty/serial/mxs-auart.c
@@ -678,11 +678,18 @@
 
 static irqreturn_t mxs_auart_irq_handle(int irq, void *context)
 {
-	u32 istatus, istat;
+	u32 istat;
 	struct mxs_auart_port *s = context;
 	u32 stat = readl(s->port.membase + AUART_STAT);
 
-	istatus = istat = readl(s->port.membase + AUART_INTR);
+	istat = readl(s->port.membase + AUART_INTR);
+
+	/* ack irq */
+	writel(istat & (AUART_INTR_RTIS
+		| AUART_INTR_TXIS
+		| AUART_INTR_RXIS
+		| AUART_INTR_CTSMIS),
+			s->port.membase + AUART_INTR_CLR);
 
 	if (istat & AUART_INTR_CTSMIS) {
 		uart_handle_cts_change(&s->port, stat & AUART_STAT_CTS);
@@ -702,12 +709,6 @@
 		istat &= ~AUART_INTR_TXIS;
 	}
 
-	writel(istatus & (AUART_INTR_RTIS
-		| AUART_INTR_TXIS
-		| AUART_INTR_RXIS
-		| AUART_INTR_CTSMIS),
-			s->port.membase + AUART_INTR_CLR);
-
 	return IRQ_HANDLED;
 }
 
@@ -850,7 +851,7 @@
 	struct mxs_auart_port *s;
 	struct uart_port *port;
 	unsigned int old_ctrl0, old_ctrl2;
-	unsigned int to = 1000;
+	unsigned int to = 20000;
 
 	if (co->index >= MXS_AUART_PORTS || co->index < 0)
 		return;
@@ -871,18 +872,23 @@
 
 	uart_console_write(port, str, count, mxs_auart_console_putchar);
 
-	/*
-	 * Finally, wait for transmitter to become empty
-	 * and restore the TCR
-	 */
+	/* Finally, wait for transmitter to become empty ... */
 	while (readl(port->membase + AUART_STAT) & AUART_STAT_BUSY) {
+		udelay(1);
 		if (!to--)
 			break;
-		udelay(1);
 	}
 
-	writel(old_ctrl0, port->membase + AUART_CTRL0);
-	writel(old_ctrl2, port->membase + AUART_CTRL2);
+	/*
+	 * ... and restore the TCR if we waited long enough for the transmitter
+	 * to be idle. This might keep the transmitter enabled although it is
+	 * unused, but that is better than to disable it while it is still
+	 * transmitting.
+	 */
+	if (!(readl(port->membase + AUART_STAT) & AUART_STAT_BUSY)) {
+		writel(old_ctrl0, port->membase + AUART_CTRL0);
+		writel(old_ctrl2, port->membase + AUART_CTRL2);
+	}
 
 	clk_disable(s->clk);
 }
diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c
index ff17138..dc6e969 100644
--- a/drivers/tty/synclinkmp.c
+++ b/drivers/tty/synclinkmp.c
@@ -3478,7 +3478,7 @@
 	for ( i = 0; i < info->rx_buf_count; i++ ) {
 		/* calculate and store physical address of this buffer entry */
 		info->rx_buf_list_ex[i].phys_entry =
-			info->buffer_list_phys + (i * sizeof(SCABUFSIZE));
+			info->buffer_list_phys + (i * SCABUFSIZE);
 
 		/* calculate and store physical address of */
 		/* next entry in cirular list of entries */
diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c
index 121aeb9..f597e88 100644
--- a/drivers/tty/tty_port.c
+++ b/drivers/tty/tty_port.c
@@ -256,10 +256,9 @@
 {
 	struct tty_struct *tty = tty_port_tty_get(port);
 
-	if (tty && (!check_clocal || !C_CLOCAL(tty))) {
+	if (tty && (!check_clocal || !C_CLOCAL(tty)))
 		tty_hangup(tty);
-		tty_kref_put(tty);
-	}
+	tty_kref_put(tty);
 }
 EXPORT_SYMBOL_GPL(tty_port_tty_hangup);
 
diff --git a/drivers/usb/chipidea/Kconfig b/drivers/usb/chipidea/Kconfig
index eb2aa2e..d1bd8ef 100644
--- a/drivers/usb/chipidea/Kconfig
+++ b/drivers/usb/chipidea/Kconfig
@@ -12,7 +12,7 @@
 
 config USB_CHIPIDEA_UDC
 	bool "ChipIdea device controller"
-	depends on USB_GADGET=y || USB_CHIPIDEA=m
+	depends on USB_GADGET=y || (USB_CHIPIDEA=m && USB_GADGET=m)
 	help
 	  Say Y here to enable device controller functionality of the
 	  ChipIdea driver.
@@ -20,7 +20,7 @@
 config USB_CHIPIDEA_HOST
 	bool "ChipIdea host controller"
 	depends on USB=y
-	depends on USB_EHCI_HCD=y || USB_CHIPIDEA=m
+	depends on USB_EHCI_HCD=y || (USB_CHIPIDEA=m && USB_EHCI_HCD=m)
 	select USB_EHCI_ROOT_HUB_TT
 	help
 	  Say Y here to enable host controller functionality of the
diff --git a/drivers/usb/chipidea/bits.h b/drivers/usb/chipidea/bits.h
index aefa026..1b23e35 100644
--- a/drivers/usb/chipidea/bits.h
+++ b/drivers/usb/chipidea/bits.h
@@ -50,7 +50,7 @@
 #define PORTSC_PTC            (0x0FUL << 16)
 /* PTS and PTW for non lpm version only */
 #define PORTSC_PTS(d)						\
-	((((d) & 0x3) << 30) | (((d) & 0x4) ? BIT(25) : 0))
+	(u32)((((d) & 0x3) << 30) | (((d) & 0x4) ? BIT(25) : 0))
 #define PORTSC_PTW            BIT(28)
 #define PORTSC_STS            BIT(29)
 
@@ -59,7 +59,7 @@
 #define DEVLC_PSPD_HS         (0x02UL << 25)
 #define DEVLC_PTW             BIT(27)
 #define DEVLC_STS             BIT(28)
-#define DEVLC_PTS(d)          (((d) & 0x7) << 29)
+#define DEVLC_PTS(d)          (u32)(((d) & 0x7) << 29)
 
 /* Encoding for DEVLC_PTS and PORTSC_PTS */
 #define PTS_UTMI              0
diff --git a/drivers/usb/class/usbtmc.c b/drivers/usb/class/usbtmc.c
index 609dbc2..83b4ef4 100644
--- a/drivers/usb/class/usbtmc.c
+++ b/drivers/usb/class/usbtmc.c
@@ -1119,11 +1119,11 @@
 	/* Determine if it is a Rigol or not */
 	data->rigol_quirk = 0;
 	dev_dbg(&intf->dev, "Trying to find if device Vendor 0x%04X Product 0x%04X has the RIGOL quirk\n",
-		data->usb_dev->descriptor.idVendor,
-		data->usb_dev->descriptor.idProduct);
+		le16_to_cpu(data->usb_dev->descriptor.idVendor),
+		le16_to_cpu(data->usb_dev->descriptor.idProduct));
 	for(n = 0; usbtmc_id_quirk[n].idVendor > 0; n++) {
-		if ((usbtmc_id_quirk[n].idVendor == data->usb_dev->descriptor.idVendor) &&
-		    (usbtmc_id_quirk[n].idProduct == data->usb_dev->descriptor.idProduct)) {
+		if ((usbtmc_id_quirk[n].idVendor == le16_to_cpu(data->usb_dev->descriptor.idVendor)) &&
+		    (usbtmc_id_quirk[n].idProduct == le16_to_cpu(data->usb_dev->descriptor.idProduct))) {
 			dev_dbg(&intf->dev, "Setting this device as having the RIGOL quirk\n");
 			data->rigol_quirk = 1;
 			break;
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 4191db3..558313d 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -668,6 +668,15 @@
 static inline int
 hub_clear_tt_buffer (struct usb_device *hdev, u16 devinfo, u16 tt)
 {
+	/* Need to clear both directions for control ep */
+	if (((devinfo >> 11) & USB_ENDPOINT_XFERTYPE_MASK) ==
+			USB_ENDPOINT_XFER_CONTROL) {
+		int status = usb_control_msg(hdev, usb_sndctrlpipe(hdev, 0),
+				HUB_CLEAR_TT_BUFFER, USB_RT_PORT,
+				devinfo ^ 0x8000, tt, NULL, 0, 1000);
+		if (status)
+			return status;
+	}
 	return usb_control_msg(hdev, usb_sndctrlpipe(hdev, 0),
 			       HUB_CLEAR_TT_BUFFER, USB_RT_PORT, devinfo,
 			       tt, NULL, 0, 1000);
@@ -2848,6 +2857,15 @@
 				USB_CTRL_SET_TIMEOUT);
 }
 
+/* Count of wakeup-enabled devices at or below udev */
+static unsigned wakeup_enabled_descendants(struct usb_device *udev)
+{
+	struct usb_hub *hub = usb_hub_to_struct_hub(udev);
+
+	return udev->do_remote_wakeup +
+			(hub ? hub->wakeup_enabled_descendants : 0);
+}
+
 /*
  * usb_port_suspend - suspend a usb device's upstream port
  * @udev: device that's no longer in active use, not a root hub
@@ -2888,8 +2906,8 @@
  * Linux (2.6) currently has NO mechanisms to initiate that:  no khubd
  * timer, no SRP, no requests through sysfs.
  *
- * If Runtime PM isn't enabled or used, non-SuperSpeed devices really get
- * suspended only when their bus goes into global suspend (i.e., the root
+ * If Runtime PM isn't enabled or used, non-SuperSpeed devices may not get
+ * suspended until their bus goes into global suspend (i.e., the root
  * hub is suspended).  Nevertheless, we change @udev->state to
  * USB_STATE_SUSPENDED as this is the device's "logical" state.  The actual
  * upstream port setting is stored in @udev->port_is_suspended.
@@ -2960,15 +2978,21 @@
 	/* see 7.1.7.6 */
 	if (hub_is_superspeed(hub->hdev))
 		status = hub_set_port_link_state(hub, port1, USB_SS_PORT_LS_U3);
-	else if (PMSG_IS_AUTO(msg))
-		status = set_port_feature(hub->hdev, port1,
-						USB_PORT_FEAT_SUSPEND);
+
 	/*
 	 * For system suspend, we do not need to enable the suspend feature
 	 * on individual USB-2 ports.  The devices will automatically go
 	 * into suspend a few ms after the root hub stops sending packets.
 	 * The USB 2.0 spec calls this "global suspend".
+	 *
+	 * However, many USB hubs have a bug: They don't relay wakeup requests
+	 * from a downstream port if the port's suspend feature isn't on.
+	 * Therefore we will turn on the suspend feature if udev or any of its
+	 * descendants is enabled for remote wakeup.
 	 */
+	else if (PMSG_IS_AUTO(msg) || wakeup_enabled_descendants(udev) > 0)
+		status = set_port_feature(hub->hdev, port1,
+				USB_PORT_FEAT_SUSPEND);
 	else {
 		really_suspend = false;
 		status = 0;
@@ -3003,15 +3027,16 @@
 		if (!PMSG_IS_AUTO(msg))
 			status = 0;
 	} else {
-		/* device has up to 10 msec to fully suspend */
 		dev_dbg(&udev->dev, "usb %ssuspend, wakeup %d\n",
 				(PMSG_IS_AUTO(msg) ? "auto-" : ""),
 				udev->do_remote_wakeup);
-		usb_set_device_state(udev, USB_STATE_SUSPENDED);
 		if (really_suspend) {
 			udev->port_is_suspended = 1;
+
+			/* device has up to 10 msec to fully suspend */
 			msleep(10);
 		}
+		usb_set_device_state(udev, USB_STATE_SUSPENDED);
 	}
 
 	/*
@@ -3293,7 +3318,11 @@
 	unsigned		port1;
 	int			status;
 
-	/* Warn if children aren't already suspended */
+	/*
+	 * Warn if children aren't already suspended.
+	 * Also, add up the number of wakeup-enabled descendants.
+	 */
+	hub->wakeup_enabled_descendants = 0;
 	for (port1 = 1; port1 <= hdev->maxchild; port1++) {
 		struct usb_device	*udev;
 
@@ -3303,6 +3332,9 @@
 			if (PMSG_IS_AUTO(msg))
 				return -EBUSY;
 		}
+		if (udev)
+			hub->wakeup_enabled_descendants +=
+					wakeup_enabled_descendants(udev);
 	}
 
 	if (hdev->do_remote_wakeup && hub->quirk_check_port_auto_suspend) {
@@ -4766,7 +4798,8 @@
 					hub->ports[i - 1]->child;
 
 				dev_dbg(hub_dev, "warm reset port %d\n", i);
-				if (!udev) {
+				if (!udev || !(portstatus &
+						USB_PORT_STAT_CONNECTION)) {
 					status = hub_port_reset(hub, i,
 							NULL, HUB_BH_RESET_TIME,
 							true);
@@ -4776,8 +4809,8 @@
 					usb_lock_device(udev);
 					status = usb_reset_device(udev);
 					usb_unlock_device(udev);
+					connect_change = 0;
 				}
-				connect_change = 0;
 			}
 
 			if (connect_change)
diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h
index 6508e02..4e4790d 100644
--- a/drivers/usb/core/hub.h
+++ b/drivers/usb/core/hub.h
@@ -59,6 +59,9 @@
 	struct usb_tt		tt;		/* Transaction Translator */
 
 	unsigned		mA_per_port;	/* current for each child */
+#ifdef	CONFIG_PM
+	unsigned		wakeup_enabled_descendants;
+#endif
 
 	unsigned		limited_power:1;
 	unsigned		quiescing:1;
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index a635988..5b44cd4 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -78,6 +78,12 @@
 	{ USB_DEVICE(0x04d8, 0x000c), .driver_info =
 			USB_QUIRK_CONFIG_INTF_STRINGS },
 
+	/* CarrolTouch 4000U */
+	{ USB_DEVICE(0x04e7, 0x0009), .driver_info = USB_QUIRK_RESET_RESUME },
+
+	/* CarrolTouch 4500U */
+	{ USB_DEVICE(0x04e7, 0x0030), .driver_info = USB_QUIRK_RESET_RESUME },
+
 	/* Samsung Android phone modem - ID conflict with SPH-I500 */
 	{ USB_DEVICE(0x04e8, 0x6601), .driver_info =
 			USB_QUIRK_CONFIG_INTF_STRINGS },
diff --git a/drivers/usb/dwc3/Kconfig b/drivers/usb/dwc3/Kconfig
index 757aa18..2378958 100644
--- a/drivers/usb/dwc3/Kconfig
+++ b/drivers/usb/dwc3/Kconfig
@@ -1,6 +1,6 @@
 config USB_DWC3
 	tristate "DesignWare USB3 DRD Core Support"
-	depends on (USB || USB_GADGET) && GENERIC_HARDIRQS
+	depends on (USB || USB_GADGET) && GENERIC_HARDIRQS && HAS_DMA
 	select USB_XHCI_PLATFORM if USB_SUPPORT && USB_XHCI_HCD
 	help
 	  Say Y or M here if your system has a Dual Role SuperSpeed
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index c35d49d..358375e 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -450,7 +450,7 @@
 	}
 
 	if (IS_ERR(dwc->usb3_phy)) {
-		ret = PTR_ERR(dwc->usb2_phy);
+		ret = PTR_ERR(dwc->usb3_phy);
 
 		/*
 		 * if -ENXIO is returned, it means PHY layer wasn't
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index b69d322..27dad99 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -759,8 +759,8 @@
 
 struct dwc3_event_type {
 	u32	is_devspec:1;
-	u32	type:6;
-	u32	reserved8_31:25;
+	u32	type:7;
+	u32	reserved8_31:24;
 } __packed;
 
 #define DWC3_DEPEVT_XFERCOMPLETE	0x01
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index b5e5b35..f77083f 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1584,6 +1584,7 @@
 	__dwc3_gadget_ep_disable(dwc->eps[0]);
 
 err0:
+	dwc->gadget_driver = NULL;
 	spin_unlock_irqrestore(&dwc->lock, flags);
 
 	return ret;
diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index 62f6802..8e93683 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -193,6 +193,7 @@
 	   Faraday usb device controller FUSB300 driver
 
 config USB_FOTG210_UDC
+	depends on HAS_DMA
 	tristate "Faraday FOTG210 USB Peripheral Controller"
 	help
 	   Faraday USB2.0 OTG controller which can be configured as
@@ -328,13 +329,14 @@
 
 config USB_MV_UDC
 	tristate "Marvell USB2.0 Device Controller"
-	depends on GENERIC_HARDIRQS
+	depends on GENERIC_HARDIRQS && HAS_DMA
 	help
 	  Marvell Socs (including PXA and MMP series) include a high speed
 	  USB2.0 OTG controller, which can be configured as high speed or
 	  full speed USB peripheral.
 
 config USB_MV_U3D
+	depends on HAS_DMA
 	tristate "MARVELL PXA2128 USB 3.0 controller"
 	help
 	  MARVELL PXA2128 Processor series include a super speed USB3.0 device
@@ -639,6 +641,7 @@
 	depends on USB_CONFIGFS
 	depends on NET
 	select USB_U_ETHER
+	select USB_U_RNDIS
 	select USB_F_RNDIS
 	help
 	   Microsoft Windows XP bundles the "Remote NDIS" (RNDIS) protocol,
diff --git a/drivers/usb/gadget/at91_udc.c b/drivers/usb/gadget/at91_udc.c
index 073b938..d9a6add0 100644
--- a/drivers/usb/gadget/at91_udc.c
+++ b/drivers/usb/gadget/at91_udc.c
@@ -870,8 +870,8 @@
 	if (udc->clocked)
 		return;
 	udc->clocked = 1;
-	clk_enable(udc->iclk);
-	clk_enable(udc->fclk);
+	clk_prepare_enable(udc->iclk);
+	clk_prepare_enable(udc->fclk);
 }
 
 static void clk_off(struct at91_udc *udc)
@@ -880,8 +880,8 @@
 		return;
 	udc->clocked = 0;
 	udc->gadget.speed = USB_SPEED_UNKNOWN;
-	clk_disable(udc->fclk);
-	clk_disable(udc->iclk);
+	clk_disable_unprepare(udc->fclk);
+	clk_disable_unprepare(udc->iclk);
 }
 
 /*
@@ -1725,7 +1725,7 @@
 	/* init software state */
 	udc = &controller;
 	udc->gadget.dev.parent = dev;
-	if (pdev->dev.of_node)
+	if (IS_ENABLED(CONFIG_OF) && pdev->dev.of_node)
 		at91udc_of_init(udc, pdev->dev.of_node);
 	else
 		memcpy(&udc->board, dev->platform_data,
@@ -1782,12 +1782,14 @@
 	}
 
 	/* don't do anything until we have both gadget driver and VBUS */
-	clk_enable(udc->iclk);
+	retval = clk_prepare_enable(udc->iclk);
+	if (retval)
+		goto fail1;
 	at91_udp_write(udc, AT91_UDP_TXVC, AT91_UDP_TXVC_TXVDIS);
 	at91_udp_write(udc, AT91_UDP_IDR, 0xffffffff);
 	/* Clear all pending interrupts - UDP may be used by bootloader. */
 	at91_udp_write(udc, AT91_UDP_ICR, 0xffffffff);
-	clk_disable(udc->iclk);
+	clk_disable_unprepare(udc->iclk);
 
 	/* request UDC and maybe VBUS irqs */
 	udc->udp_irq = platform_get_irq(pdev, 0);
diff --git a/drivers/usb/gadget/ether.c b/drivers/usb/gadget/ether.c
index f48712f..c1c113e 100644
--- a/drivers/usb/gadget/ether.c
+++ b/drivers/usb/gadget/ether.c
@@ -449,14 +449,20 @@
 
 static int __exit eth_unbind(struct usb_composite_dev *cdev)
 {
-	if (has_rndis())
+	if (has_rndis()) {
+		usb_put_function(f_rndis);
 		usb_put_function_instance(fi_rndis);
-	if (use_eem)
+	}
+	if (use_eem) {
+		usb_put_function(f_eem);
 		usb_put_function_instance(fi_eem);
-	else if (can_support_ecm(cdev->gadget))
+	} else if (can_support_ecm(cdev->gadget)) {
+		usb_put_function(f_ecm);
 		usb_put_function_instance(fi_ecm);
-	else
+	} else {
+		usb_put_function(f_geth);
 		usb_put_function_instance(fi_geth);
+	}
 	return 0;
 }
 
diff --git a/drivers/usb/gadget/f_ecm.c b/drivers/usb/gadget/f_ecm.c
index 5d3561e..edab45d 100644
--- a/drivers/usb/gadget/f_ecm.c
+++ b/drivers/usb/gadget/f_ecm.c
@@ -959,8 +959,11 @@
 	mutex_init(&opts->lock);
 	opts->func_inst.free_func_inst = ecm_free_inst;
 	opts->net = gether_setup_default();
-	if (IS_ERR(opts->net))
-		return ERR_PTR(PTR_ERR(opts->net));
+	if (IS_ERR(opts->net)) {
+		struct net_device *net = opts->net;
+		kfree(opts);
+		return ERR_CAST(net);
+	}
 
 	config_group_init_type_name(&opts->func_inst.group, "", &ecm_func_type);
 
diff --git a/drivers/usb/gadget/f_eem.c b/drivers/usb/gadget/f_eem.c
index 90ee802..d00392d 100644
--- a/drivers/usb/gadget/f_eem.c
+++ b/drivers/usb/gadget/f_eem.c
@@ -593,8 +593,11 @@
 	mutex_init(&opts->lock);
 	opts->func_inst.free_func_inst = eem_free_inst;
 	opts->net = gether_setup_default();
-	if (IS_ERR(opts->net))
-		return ERR_CAST(opts->net);
+	if (IS_ERR(opts->net)) {
+		struct net_device *net = opts->net;
+		kfree(opts);
+		return ERR_CAST(net);
+	}
 
 	config_group_init_type_name(&opts->func_inst.group, "", &eem_func_type);
 
diff --git a/drivers/usb/gadget/f_ncm.c b/drivers/usb/gadget/f_ncm.c
index 952177f..1c28fe1 100644
--- a/drivers/usb/gadget/f_ncm.c
+++ b/drivers/usb/gadget/f_ncm.c
@@ -1350,8 +1350,11 @@
 	mutex_init(&opts->lock);
 	opts->func_inst.free_func_inst = ncm_free_inst;
 	opts->net = gether_setup_default();
-	if (IS_ERR(opts->net))
-		return ERR_PTR(PTR_ERR(opts->net));
+	if (IS_ERR(opts->net)) {
+		struct net_device *net = opts->net;
+		kfree(opts);
+		return ERR_CAST(net);
+	}
 
 	config_group_init_type_name(&opts->func_inst.group, "", &ncm_func_type);
 
diff --git a/drivers/usb/gadget/f_phonet.c b/drivers/usb/gadget/f_phonet.c
index 7944fb0..eb3aa81 100644
--- a/drivers/usb/gadget/f_phonet.c
+++ b/drivers/usb/gadget/f_phonet.c
@@ -488,7 +488,6 @@
 	struct usb_ep *ep;
 	int status, i;
 
-#ifndef USBF_PHONET_INCLUDED
 	struct f_phonet_opts *phonet_opts;
 
 	phonet_opts = container_of(f->fi, struct f_phonet_opts, func_inst);
@@ -507,7 +506,6 @@
 			return status;
 		phonet_opts->bound = true;
 	}
-#endif
 
 	/* Reserve interface IDs */
 	status = usb_interface_id(c, f);
@@ -656,8 +654,11 @@
 
 	opts->func_inst.free_func_inst = phonet_free_inst;
 	opts->net = gphonet_setup_default();
-	if (IS_ERR(opts->net))
-		return ERR_PTR(PTR_ERR(opts->net));
+	if (IS_ERR(opts->net)) {
+		struct net_device *net = opts->net;
+		kfree(opts);
+		return ERR_CAST(net);
+	}
 
 	config_group_init_type_name(&opts->func_inst.group, "",
 			&phonet_func_type);
diff --git a/drivers/usb/gadget/f_rndis.c b/drivers/usb/gadget/f_rndis.c
index 191df35..717ed7f 100644
--- a/drivers/usb/gadget/f_rndis.c
+++ b/drivers/usb/gadget/f_rndis.c
@@ -963,8 +963,11 @@
 	mutex_init(&opts->lock);
 	opts->func_inst.free_func_inst = rndis_free_inst;
 	opts->net = gether_setup_default();
-	if (IS_ERR(opts->net))
-		return ERR_CAST(opts->net);
+	if (IS_ERR(opts->net)) {
+		struct net_device *net = opts->net;
+		kfree(opts);
+		return ERR_CAST(net);
+	}
 
 	config_group_init_type_name(&opts->func_inst.group, "",
 				    &rndis_func_type);
diff --git a/drivers/usb/gadget/f_subset.c b/drivers/usb/gadget/f_subset.c
index 5601e1d..7c8674f 100644
--- a/drivers/usb/gadget/f_subset.c
+++ b/drivers/usb/gadget/f_subset.c
@@ -505,8 +505,11 @@
 	mutex_init(&opts->lock);
 	opts->func_inst.free_func_inst = geth_free_inst;
 	opts->net = gether_setup_default();
-	if (IS_ERR(opts->net))
-		return ERR_CAST(opts->net);
+	if (IS_ERR(opts->net)) {
+		struct net_device *net = opts->net;
+		kfree(opts);
+		return ERR_CAST(net);
+	}
 
 	config_group_init_type_name(&opts->func_inst.group, "",
 				    &gether_func_type);
diff --git a/drivers/usb/gadget/fotg210-udc.c b/drivers/usb/gadget/fotg210-udc.c
index cce5535..10cd18d 100644
--- a/drivers/usb/gadget/fotg210-udc.c
+++ b/drivers/usb/gadget/fotg210-udc.c
@@ -1074,7 +1074,7 @@
 	.udc_stop		= fotg210_udc_stop,
 };
 
-static int __exit fotg210_udc_remove(struct platform_device *pdev)
+static int fotg210_udc_remove(struct platform_device *pdev)
 {
 	struct fotg210_udc *fotg210 = dev_get_drvdata(&pdev->dev);
 
@@ -1088,7 +1088,7 @@
 	return 0;
 }
 
-static int __init fotg210_udc_probe(struct platform_device *pdev)
+static int fotg210_udc_probe(struct platform_device *pdev)
 {
 	struct resource *res, *ires;
 	struct fotg210_udc *fotg210 = NULL;
diff --git a/drivers/usb/gadget/multi.c b/drivers/usb/gadget/multi.c
index 032b96a..2a1ebef 100644
--- a/drivers/usb/gadget/multi.c
+++ b/drivers/usb/gadget/multi.c
@@ -160,10 +160,8 @@
 		return ret;
 
 	f_acm_rndis = usb_get_function(fi_acm);
-	if (IS_ERR(f_acm_rndis)) {
-		ret = PTR_ERR(f_acm_rndis);
-		goto err_func_acm;
-	}
+	if (IS_ERR(f_acm_rndis))
+		return PTR_ERR(f_acm_rndis);
 
 	ret = usb_add_function(c, f_acm_rndis);
 	if (ret)
@@ -178,7 +176,6 @@
 	usb_remove_function(c, f_acm_rndis);
 err_conf:
 	usb_put_function(f_acm_rndis);
-err_func_acm:
 	return ret;
 }
 
@@ -226,7 +223,7 @@
 	/* implicit port_num is zero */
 	f_acm_multi = usb_get_function(fi_acm);
 	if (IS_ERR(f_acm_multi))
-		goto err_func_acm;
+		return PTR_ERR(f_acm_multi);
 
 	ret = usb_add_function(c, f_acm_multi);
 	if (ret)
@@ -241,7 +238,6 @@
 	usb_remove_function(c, f_acm_multi);
 err_conf:
 	usb_put_function(f_acm_multi);
-err_func_acm:
 	return ret;
 }
 
diff --git a/drivers/usb/gadget/mv_u3d_core.c b/drivers/usb/gadget/mv_u3d_core.c
index 07fdb3e..ec6a2d2 100644
--- a/drivers/usb/gadget/mv_u3d_core.c
+++ b/drivers/usb/gadget/mv_u3d_core.c
@@ -1776,7 +1776,7 @@
 	kfree(u3d->eps);
 
 	if (u3d->irq)
-		free_irq(u3d->irq, &dev->dev);
+		free_irq(u3d->irq, u3d);
 
 	if (u3d->cap_regs)
 		iounmap(u3d->cap_regs);
@@ -1974,7 +1974,7 @@
 	return 0;
 
 err_unregister:
-	free_irq(u3d->irq, &dev->dev);
+	free_irq(u3d->irq, u3d);
 err_request_irq:
 err_get_irq:
 	kfree(u3d->status_req);
diff --git a/drivers/usb/gadget/udc-core.c b/drivers/usb/gadget/udc-core.c
index ffd8fa5..13e25f8 100644
--- a/drivers/usb/gadget/udc-core.c
+++ b/drivers/usb/gadget/udc-core.c
@@ -50,6 +50,8 @@
 
 /* ------------------------------------------------------------------------- */
 
+#ifdef	CONFIG_HAS_DMA
+
 int usb_gadget_map_request(struct usb_gadget *gadget,
 		struct usb_request *req, int is_in)
 {
@@ -99,13 +101,15 @@
 }
 EXPORT_SYMBOL_GPL(usb_gadget_unmap_request);
 
+#endif	/* CONFIG_HAS_DMA */
+
 /* ------------------------------------------------------------------------- */
 
 void usb_gadget_set_state(struct usb_gadget *gadget,
 		enum usb_device_state state)
 {
 	gadget->state = state;
-	sysfs_notify(&gadget->dev.kobj, NULL, "status");
+	sysfs_notify(&gadget->dev.kobj, NULL, "state");
 }
 EXPORT_SYMBOL_GPL(usb_gadget_set_state);
 
@@ -194,9 +198,11 @@
 	dev_set_name(&gadget->dev, "gadget");
 	gadget->dev.parent = parent;
 
+#ifdef	CONFIG_HAS_DMA
 	dma_set_coherent_mask(&gadget->dev, parent->coherent_dma_mask);
 	gadget->dev.dma_parms = parent->dma_parms;
 	gadget->dev.dma_mask = parent->dma_mask;
+#endif
 
 	if (release)
 		gadget->dev.release = release;
diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c
index 2b70277..6dce375 100644
--- a/drivers/usb/host/ehci-hub.c
+++ b/drivers/usb/host/ehci-hub.c
@@ -874,6 +874,7 @@
 				ehci->reset_done[wIndex] = jiffies
 						+ msecs_to_jiffies(20);
 				usb_hcd_start_port_resume(&hcd->self, wIndex);
+				set_bit(wIndex, &ehci->resuming_ports);
 				/* check the port again */
 				mod_timer(&ehci_to_hcd(ehci)->rh_timer,
 						ehci->reset_done[wIndex]);
diff --git a/drivers/usb/host/ehci-sched.c b/drivers/usb/host/ehci-sched.c
index f80d033..8e3c878 100644
--- a/drivers/usb/host/ehci-sched.c
+++ b/drivers/usb/host/ehci-sched.c
@@ -1391,21 +1391,20 @@
 
 		/* Behind the scheduling threshold? */
 		if (unlikely(start < next)) {
+			unsigned now2 = (now - base) & (mod - 1);
 
 			/* USB_ISO_ASAP: Round up to the first available slot */
 			if (urb->transfer_flags & URB_ISO_ASAP)
 				start += (next - start + period - 1) & -period;
 
 			/*
-			 * Not ASAP: Use the next slot in the stream.  If
-			 * the entire URB falls before the threshold, fail.
+			 * Not ASAP: Use the next slot in the stream,
+			 * no matter what.
 			 */
-			else if (start + span - period < next) {
-				ehci_dbg(ehci, "iso urb late %p (%u+%u < %u)\n",
+			else if (start + span - period < now2) {
+				ehci_dbg(ehci, "iso underrun %p (%u+%u < %u)\n",
 						urb, start + base,
-						span - period, next + base);
-				status = -EXDEV;
-				goto fail;
+						span - period, now2 + base);
 			}
 		}
 
diff --git a/drivers/usb/host/ohci-pci.c b/drivers/usb/host/ohci-pci.c
index 08613e2..0f1d193 100644
--- a/drivers/usb/host/ohci-pci.c
+++ b/drivers/usb/host/ohci-pci.c
@@ -304,6 +304,11 @@
 	pr_info("%s: " DRIVER_DESC "\n", hcd_name);
 
 	ohci_init_driver(&ohci_pci_hc_driver, &pci_overrides);
+
+	/* Entries for the PCI suspend/resume callbacks are special */
+	ohci_pci_hc_driver.pci_suspend = ohci_suspend;
+	ohci_pci_hc_driver.pci_resume = ohci_resume;
+
 	return pci_register_driver(&ohci_pci_driver);
 }
 module_init(ohci_pci_init);
diff --git a/drivers/usb/host/pci-quirks.h b/drivers/usb/host/pci-quirks.h
index 4b8a209..978c849 100644
--- a/drivers/usb/host/pci-quirks.h
+++ b/drivers/usb/host/pci-quirks.h
@@ -13,6 +13,7 @@
 void usb_disable_xhci_ports(struct pci_dev *xhci_pdev);
 void sb800_prefetch(struct device *dev, int on);
 #else
+struct pci_dev;
 static inline void usb_amd_quirk_pll_disable(void) {}
 static inline void usb_amd_quirk_pll_enable(void) {}
 static inline void usb_amd_dev_put(void) {}
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index df6978a..6f8c2fd 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -24,6 +24,7 @@
 #include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/dmapool.h>
+#include <linux/dma-mapping.h>
 
 #include "xhci.h"
 
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index cc24e39..f00cb20 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -93,7 +93,6 @@
 	}
 	if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
 			pdev->device == PCI_DEVICE_ID_INTEL_PANTHERPOINT_XHCI) {
-		xhci->quirks |= XHCI_SPURIOUS_SUCCESS;
 		xhci->quirks |= XHCI_EP_LIMIT_QUIRK;
 		xhci->limit_active_eps = 64;
 		xhci->quirks |= XHCI_SW_BW_CHECKING;
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 1e57eaf..5b08cd8 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -434,7 +434,7 @@
 
 	/* A ring has pending URBs if its TD list is not empty */
 	if (!(ep->ep_state & EP_HAS_STREAMS)) {
-		if (!(list_empty(&ep->ring->td_list)))
+		if (ep->ring && !(list_empty(&ep->ring->td_list)))
 			xhci_ring_ep_doorbell(xhci, slot_id, ep_index, 0);
 		return;
 	}
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 2c49f00..9478caa 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -27,6 +27,7 @@
 #include <linux/moduleparam.h>
 #include <linux/slab.h>
 #include <linux/dmi.h>
+#include <linux/dma-mapping.h>
 
 #include "xhci.h"
 
@@ -329,7 +330,7 @@
 	return;
 }
 
-static void xhci_msix_sync_irqs(struct xhci_hcd *xhci)
+static void __maybe_unused xhci_msix_sync_irqs(struct xhci_hcd *xhci)
 {
 	int i;
 
@@ -1181,9 +1182,6 @@
 	}
 
 	xhci = hcd_to_xhci(hcd);
-	if (xhci->xhc_state & XHCI_STATE_HALTED)
-		return -ENODEV;
-
 	if (check_virt_dev) {
 		if (!udev->slot_id || !xhci->devs[udev->slot_id]) {
 			printk(KERN_DEBUG "xHCI %s called with unaddressed "
@@ -1199,6 +1197,9 @@
 		}
 	}
 
+	if (xhci->xhc_state & XHCI_STATE_HALTED)
+		return -ENODEV;
+
 	return 1;
 }
 
@@ -3898,7 +3899,7 @@
  * Issue an Evaluate Context command to change the Maximum Exit Latency in the
  * slot context.  If that succeeds, store the new MEL in the xhci_virt_device.
  */
-static int xhci_change_max_exit_latency(struct xhci_hcd *xhci,
+static int __maybe_unused xhci_change_max_exit_latency(struct xhci_hcd *xhci,
 			struct usb_device *udev, u16 max_exit_latency)
 {
 	struct xhci_virt_device *virt_dev;
@@ -4892,6 +4893,13 @@
 
 	get_quirks(dev, xhci);
 
+	/* In xhci controllers which follow xhci 1.0 spec gives a spurious
+	 * success event after a short transfer. This quirk will ignore such
+	 * spurious event.
+	 */
+	if (xhci->hci_version > 0x96)
+		xhci->quirks |= XHCI_SPURIOUS_SUCCESS;
+
 	/* Make sure the HC is halted. */
 	retval = xhci_halt(xhci);
 	if (retval)
diff --git a/drivers/usb/misc/adutux.c b/drivers/usb/misc/adutux.c
index eb3c8c1..eeb2720 100644
--- a/drivers/usb/misc/adutux.c
+++ b/drivers/usb/misc/adutux.c
@@ -830,7 +830,7 @@
 
 	/* let the user know what node this device is now attached to */
 	dev_info(&interface->dev, "ADU%d %s now attached to /dev/usb/adutux%d\n",
-		 udev->descriptor.idProduct, dev->serial_number,
+		 le16_to_cpu(udev->descriptor.idProduct), dev->serial_number,
 		 (dev->minor - ADU_MINOR_BASE));
 exit:
 	dbg(2, " %s : leave, return value %p (dev)", __func__, dev);
diff --git a/drivers/usb/misc/sisusbvga/sisusb.c b/drivers/usb/misc/sisusbvga/sisusb.c
index c21386e..de98906 100644
--- a/drivers/usb/misc/sisusbvga/sisusb.c
+++ b/drivers/usb/misc/sisusbvga/sisusb.c
@@ -3247,6 +3247,7 @@
 	{ USB_DEVICE(0x0711, 0x0903) },
 	{ USB_DEVICE(0x0711, 0x0918) },
 	{ USB_DEVICE(0x0711, 0x0920) },
+	{ USB_DEVICE(0x0711, 0x0950) },
 	{ USB_DEVICE(0x182d, 0x021c) },
 	{ USB_DEVICE(0x182d, 0x0269) },
 	{ }
diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c
index 6708a3b..f44e8b5 100644
--- a/drivers/usb/musb/omap2430.c
+++ b/drivers/usb/musb/omap2430.c
@@ -481,7 +481,7 @@
 
 static int omap2430_probe(struct platform_device *pdev)
 {
-	struct resource			musb_resources[2];
+	struct resource			musb_resources[3];
 	struct musb_hdrc_platform_data	*pdata = pdev->dev.platform_data;
 	struct omap_musb_board_data	*data;
 	struct platform_device		*musb;
@@ -581,6 +581,11 @@
 	musb_resources[1].end = pdev->resource[1].end;
 	musb_resources[1].flags = pdev->resource[1].flags;
 
+	musb_resources[2].name = pdev->resource[2].name;
+	musb_resources[2].start = pdev->resource[2].start;
+	musb_resources[2].end = pdev->resource[2].end;
+	musb_resources[2].flags = pdev->resource[2].flags;
+
 	ret = platform_device_add_resources(musb, musb_resources,
 			ARRAY_SIZE(musb_resources));
 	if (ret) {
diff --git a/drivers/usb/musb/tusb6010.c b/drivers/usb/musb/tusb6010.c
index 2c06a89..6f8a9ca 100644
--- a/drivers/usb/musb/tusb6010.c
+++ b/drivers/usb/musb/tusb6010.c
@@ -1156,7 +1156,7 @@
 
 static int tusb_probe(struct platform_device *pdev)
 {
-	struct resource musb_resources[2];
+	struct resource musb_resources[3];
 	struct musb_hdrc_platform_data	*pdata = pdev->dev.platform_data;
 	struct platform_device		*musb;
 	struct tusb6010_glue		*glue;
@@ -1199,6 +1199,11 @@
 	musb_resources[1].end = pdev->resource[1].end;
 	musb_resources[1].flags = pdev->resource[1].flags;
 
+	musb_resources[2].name = pdev->resource[2].name;
+	musb_resources[2].start = pdev->resource[2].start;
+	musb_resources[2].end = pdev->resource[2].end;
+	musb_resources[2].flags = pdev->resource[2].flags;
+
 	ret = platform_device_add_resources(musb, musb_resources,
 			ARRAY_SIZE(musb_resources));
 	if (ret) {
diff --git a/drivers/usb/phy/phy-fsl-usb.h b/drivers/usb/phy/phy-fsl-usb.h
index ca26628..e1859b8 100644
--- a/drivers/usb/phy/phy-fsl-usb.h
+++ b/drivers/usb/phy/phy-fsl-usb.h
@@ -15,7 +15,7 @@
  * 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include "otg_fsm.h"
+#include "phy-fsm-usb.h"
 #include <linux/usb/otg.h>
 #include <linux/ioctl.h>
 
diff --git a/drivers/usb/phy/phy-fsm-usb.c b/drivers/usb/phy/phy-fsm-usb.c
index c520b35..7f45966 100644
--- a/drivers/usb/phy/phy-fsm-usb.c
+++ b/drivers/usb/phy/phy-fsm-usb.c
@@ -29,7 +29,7 @@
 #include <linux/usb/gadget.h>
 #include <linux/usb/otg.h>
 
-#include "phy-otg-fsm.h"
+#include "phy-fsm-usb.h"
 
 /* Change USB protocol when there is a protocol change */
 static int otg_set_protocol(struct otg_fsm *fsm, int protocol)
diff --git a/drivers/usb/phy/phy-omap-usb3.c b/drivers/usb/phy/phy-omap-usb3.c
index efe6e14..a2fb30b 100644
--- a/drivers/usb/phy/phy-omap-usb3.c
+++ b/drivers/usb/phy/phy-omap-usb3.c
@@ -71,9 +71,9 @@
 	{1250, 5, 4, 20, 0},		/* 12 MHz */
 	{3125, 20, 4, 20, 0},		/* 16.8 MHz */
 	{1172, 8, 4, 20, 65537},	/* 19.2 MHz */
+	{1000, 7, 4, 10, 0},            /* 20 MHz */
 	{1250, 12, 4, 20, 0},		/* 26 MHz */
 	{3125, 47, 4, 20, 92843},	/* 38.4 MHz */
-	{1000, 7, 4, 10, 0},            /* 20 MHz */
 
 };
 
diff --git a/drivers/usb/phy/phy-samsung-usb2.c b/drivers/usb/phy/phy-samsung-usb2.c
index 1011c16..758b86d 100644
--- a/drivers/usb/phy/phy-samsung-usb2.c
+++ b/drivers/usb/phy/phy-samsung-usb2.c
@@ -388,7 +388,7 @@
 		clk = devm_clk_get(dev, "otg");
 
 	if (IS_ERR(clk)) {
-		dev_err(dev, "Failed to get otg clock\n");
+		dev_err(dev, "Failed to get usbhost/otg clock\n");
 		return PTR_ERR(clk);
 	}
 
diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c
index ed4949f..805940c 100644
--- a/drivers/usb/renesas_usbhs/mod_gadget.c
+++ b/drivers/usb/renesas_usbhs/mod_gadget.c
@@ -855,10 +855,6 @@
 	struct usbhsg_gpriv *gpriv = usbhsg_gadget_to_gpriv(gadget);
 	struct usbhs_priv *priv = usbhsg_gpriv_to_priv(gpriv);
 
-	if (!driver		||
-	    !driver->unbind)
-		return -EINVAL;
-
 	usbhsg_try_stop(priv, USBHSG_STATUS_REGISTERD);
 	gpriv->driver = NULL;
 
diff --git a/drivers/usb/serial/Kconfig b/drivers/usb/serial/Kconfig
index 8c3a42ea..7eef9b3 100644
--- a/drivers/usb/serial/Kconfig
+++ b/drivers/usb/serial/Kconfig
@@ -719,6 +719,13 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called flashloader.
 
+config USB_SERIAL_SUUNTO
+	tristate "USB Suunto ANT+ driver"
+	help
+	  Say Y here if you want to use the Suunto ANT+ USB device.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called suunto.
 
 config USB_SERIAL_DEBUG
 	tristate "USB Debugging Device"
diff --git a/drivers/usb/serial/Makefile b/drivers/usb/serial/Makefile
index f7130114..a14a870 100644
--- a/drivers/usb/serial/Makefile
+++ b/drivers/usb/serial/Makefile
@@ -54,6 +54,7 @@
 obj-$(CONFIG_USB_SERIAL_SIERRAWIRELESS)		+= sierra.o
 obj-$(CONFIG_USB_SERIAL_SPCP8X5)		+= spcp8x5.o
 obj-$(CONFIG_USB_SERIAL_SSU100)			+= ssu100.o
+obj-$(CONFIG_USB_SERIAL_SUUNTO)			+= suunto.o
 obj-$(CONFIG_USB_SERIAL_SYMBOL)			+= symbolserial.o
 obj-$(CONFIG_USB_SERIAL_WWAN)			+= usb_wwan.o
 obj-$(CONFIG_USB_SERIAL_TI)			+= ti_usb_3410_5052.o
diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index d6ef2f8..0eae4ba 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -53,6 +53,7 @@
 	{ USB_DEVICE(0x0489, 0xE000) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */
 	{ USB_DEVICE(0x0489, 0xE003) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */
 	{ USB_DEVICE(0x0745, 0x1000) }, /* CipherLab USB CCD Barcode Scanner 1000 */
+	{ USB_DEVICE(0x0846, 0x1100) }, /* NetGear Managed Switch M4100 series, M5300 series, M7100 series */
 	{ USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */
 	{ USB_DEVICE(0x08FD, 0x000A) }, /* Digianswer A/S , ZigBee/802.15.4 MAC Device */
 	{ USB_DEVICE(0x0BED, 0x1100) }, /* MEI (TM) Cashflow-SC Bill/Voucher Acceptor */
@@ -118,6 +119,8 @@
 	{ USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */
 	{ USB_DEVICE(0x10C4, 0x8664) }, /* AC-Services CAN-IF */
 	{ USB_DEVICE(0x10C4, 0x8665) }, /* AC-Services OBD-IF */
+	{ USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */
+	{ USB_DEVICE(0x10C4, 0x88A5) }, /* Planet Innovation Ingeni ZigBee USB Device */
 	{ USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */
 	{ USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */
 	{ USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */
@@ -148,6 +151,7 @@
 	{ USB_DEVICE(0x17F4, 0xAAAA) }, /* Wavesense Jazz blood glucose meter */
 	{ USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
 	{ USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
+	{ USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */
 	{ USB_DEVICE(0x1BE3, 0x07A6) }, /* WAGO 750-923 USB Service Cable */
 	{ USB_DEVICE(0x1E29, 0x0102) }, /* Festo CPX-USB */
 	{ USB_DEVICE(0x1E29, 0x0501) }, /* Festo CMSP */
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 7260ec6..b65e657 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -735,9 +735,34 @@
 	{ USB_DEVICE(FTDI_VID, FTDI_NDI_AURORA_SCU_PID),
 		.driver_info = (kernel_ulong_t)&ftdi_NDI_device_quirk },
 	{ USB_DEVICE(TELLDUS_VID, TELLDUS_TELLSTICK_PID) },
-	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_SERIAL_VX7_PID) },
-	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_CT29B_PID) },
-	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_RTS01_PID) },
+	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_S03_PID) },
+	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_59_PID) },
+	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_57A_PID) },
+	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_57B_PID) },
+	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_29A_PID) },
+	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_29B_PID) },
+	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_29F_PID) },
+	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_62B_PID) },
+	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_S01_PID) },
+	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_63_PID) },
+	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_29C_PID) },
+	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_81B_PID) },
+	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_82B_PID) },
+	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_K5D_PID) },
+	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_K4Y_PID) },
+	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_K5G_PID) },
+	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_S05_PID) },
+	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_60_PID) },
+	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_61_PID) },
+	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_62_PID) },
+	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_63B_PID) },
+	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_64_PID) },
+	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_65_PID) },
+	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_92_PID) },
+	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_92D_PID) },
+	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_W5R_PID) },
+	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_A5R_PID) },
+	{ USB_DEVICE(RTSYSTEMS_VID, RTSYSTEMS_USB_PW1_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_MAXSTREAM_PID) },
 	{ USB_DEVICE(FTDI_VID, FTDI_PHI_FISCO_PID) },
 	{ USB_DEVICE(TML_VID, TML_USB_SERIAL_PID) },
diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
index 6dd7925..1b8af46 100644
--- a/drivers/usb/serial/ftdi_sio_ids.h
+++ b/drivers/usb/serial/ftdi_sio_ids.h
@@ -815,11 +815,35 @@
 /*
  * RT Systems programming cables for various ham radios
  */
-#define RTSYSTEMS_VID			0x2100	/* Vendor ID */
-#define RTSYSTEMS_SERIAL_VX7_PID	0x9e52	/* Serial converter for VX-7 Radios using FT232RL */
-#define RTSYSTEMS_CT29B_PID		0x9e54	/* CT29B Radio Cable */
-#define RTSYSTEMS_RTS01_PID		0x9e57	/* USB-RTS01 Radio Cable */
-
+#define RTSYSTEMS_VID		0x2100	/* Vendor ID */
+#define RTSYSTEMS_USB_S03_PID	0x9001	/* RTS-03 USB to Serial Adapter */
+#define RTSYSTEMS_USB_59_PID	0x9e50	/* USB-59 USB to 8 pin plug */
+#define RTSYSTEMS_USB_57A_PID	0x9e51	/* USB-57A USB to 4pin 3.5mm plug */
+#define RTSYSTEMS_USB_57B_PID	0x9e52	/* USB-57B USB to extended 4pin 3.5mm plug */
+#define RTSYSTEMS_USB_29A_PID	0x9e53	/* USB-29A USB to 3.5mm stereo plug */
+#define RTSYSTEMS_USB_29B_PID	0x9e54	/* USB-29B USB to 6 pin mini din */
+#define RTSYSTEMS_USB_29F_PID	0x9e55	/* USB-29F USB to 6 pin modular plug */
+#define RTSYSTEMS_USB_62B_PID	0x9e56	/* USB-62B USB to 8 pin mini din plug*/
+#define RTSYSTEMS_USB_S01_PID	0x9e57	/* USB-RTS01 USB to 3.5 mm stereo plug*/
+#define RTSYSTEMS_USB_63_PID	0x9e58	/* USB-63 USB to 9 pin female*/
+#define RTSYSTEMS_USB_29C_PID	0x9e59	/* USB-29C USB to 4 pin modular plug*/
+#define RTSYSTEMS_USB_81B_PID	0x9e5A	/* USB-81 USB to 8 pin mini din plug*/
+#define RTSYSTEMS_USB_82B_PID	0x9e5B	/* USB-82 USB to 2.5 mm stereo plug*/
+#define RTSYSTEMS_USB_K5D_PID	0x9e5C	/* USB-K5D USB to 8 pin modular plug*/
+#define RTSYSTEMS_USB_K4Y_PID	0x9e5D	/* USB-K4Y USB to 2.5/3.5 mm plugs*/
+#define RTSYSTEMS_USB_K5G_PID	0x9e5E	/* USB-K5G USB to 8 pin modular plug*/
+#define RTSYSTEMS_USB_S05_PID	0x9e5F	/* USB-RTS05 USB to 2.5 mm stereo plug*/
+#define RTSYSTEMS_USB_60_PID	0x9e60	/* USB-60 USB to 6 pin din*/
+#define RTSYSTEMS_USB_61_PID	0x9e61	/* USB-61 USB to 6 pin mini din*/
+#define RTSYSTEMS_USB_62_PID	0x9e62	/* USB-62 USB to 8 pin mini din*/
+#define RTSYSTEMS_USB_63B_PID	0x9e63	/* USB-63 USB to 9 pin female*/
+#define RTSYSTEMS_USB_64_PID	0x9e64	/* USB-64 USB to 9 pin male*/
+#define RTSYSTEMS_USB_65_PID	0x9e65	/* USB-65 USB to 9 pin female null modem*/
+#define RTSYSTEMS_USB_92_PID	0x9e66	/* USB-92 USB to 12 pin plug*/
+#define RTSYSTEMS_USB_92D_PID	0x9e67	/* USB-92D USB to 12 pin plug data*/
+#define RTSYSTEMS_USB_W5R_PID	0x9e68	/* USB-W5R USB to 8 pin modular plug*/
+#define RTSYSTEMS_USB_A5R_PID	0x9e69	/* USB-A5R USB to 8 pin modular plug*/
+#define RTSYSTEMS_USB_PW1_PID	0x9e6A	/* USB-PW1 USB to 8 pin modular plug*/
 
 /*
  * Physik Instrumente
diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c
index 5a97972..58c17fd 100644
--- a/drivers/usb/serial/keyspan.c
+++ b/drivers/usb/serial/keyspan.c
@@ -2303,7 +2303,7 @@
 	if (d_details == NULL) {
 		dev_err(&serial->dev->dev, "%s - unknown product id %x\n",
 		    __func__, le16_to_cpu(serial->dev->descriptor.idProduct));
-		return 1;
+		return -ENODEV;
 	}
 
 	/* Setup private data for serial driver */
diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c
index 51da424..b013001 100644
--- a/drivers/usb/serial/mos7720.c
+++ b/drivers/usb/serial/mos7720.c
@@ -90,6 +90,7 @@
 	struct list_head        urblist_entry;
 	struct kref             ref_count;
 	struct urb              *urb;
+	struct usb_ctrlrequest	*setup;
 };
 
 enum mos7715_pp_modes {
@@ -271,6 +272,7 @@
 	struct mos7715_parport *mos_parport = urbtrack->mos_parport;
 
 	usb_free_urb(urbtrack->urb);
+	kfree(urbtrack->setup);
 	kfree(urbtrack);
 	kref_put(&mos_parport->ref_count, destroy_mos_parport);
 }
@@ -355,7 +357,6 @@
 	struct urbtracker *urbtrack;
 	int ret_val;
 	unsigned long flags;
-	struct usb_ctrlrequest setup;
 	struct usb_serial *serial = mos_parport->serial;
 	struct usb_device *usbdev = serial->dev;
 
@@ -373,14 +374,20 @@
 		kfree(urbtrack);
 		return -ENOMEM;
 	}
-	setup.bRequestType = (__u8)0x40;
-	setup.bRequest = (__u8)0x0e;
-	setup.wValue = get_reg_value(reg, dummy);
-	setup.wIndex = get_reg_index(reg);
-	setup.wLength = 0;
+	urbtrack->setup = kmalloc(sizeof(*urbtrack->setup), GFP_KERNEL);
+	if (!urbtrack->setup) {
+		usb_free_urb(urbtrack->urb);
+		kfree(urbtrack);
+		return -ENOMEM;
+	}
+	urbtrack->setup->bRequestType = (__u8)0x40;
+	urbtrack->setup->bRequest = (__u8)0x0e;
+	urbtrack->setup->wValue = get_reg_value(reg, dummy);
+	urbtrack->setup->wIndex = get_reg_index(reg);
+	urbtrack->setup->wLength = 0;
 	usb_fill_control_urb(urbtrack->urb, usbdev,
 			     usb_sndctrlpipe(usbdev, 0),
-			     (unsigned char *)&setup,
+			     (unsigned char *)urbtrack->setup,
 			     NULL, 0, async_complete, urbtrack);
 	kref_init(&urbtrack->ref_count);
 	INIT_LIST_HEAD(&urbtrack->urblist_entry);
diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
index 0a818b2..3bac469 100644
--- a/drivers/usb/serial/mos7840.c
+++ b/drivers/usb/serial/mos7840.c
@@ -183,7 +183,10 @@
 #define LED_ON_MS	500
 #define LED_OFF_MS	500
 
-static int device_type;
+enum mos7840_flag {
+	MOS7840_FLAG_CTRL_BUSY,
+	MOS7840_FLAG_LED_BUSY,
+};
 
 static const struct usb_device_id id_table[] = {
 	{USB_DEVICE(USB_VENDOR_ID_MOSCHIP, MOSCHIP_DEVICE_ID_7840)},
@@ -238,9 +241,12 @@
 
 	/* For device(s) with LED indicator */
 	bool has_led;
-	bool led_flag;
 	struct timer_list led_timer1;	/* Timer for LED on */
 	struct timer_list led_timer2;	/* Timer for LED off */
+	struct urb *led_urb;
+	struct usb_ctrlrequest *led_dr;
+
+	unsigned long flags;
 };
 
 /*
@@ -460,10 +466,10 @@
 	case -ESHUTDOWN:
 		/* this urb is terminated, clean up */
 		dev_dbg(dev, "%s - urb shutting down with status: %d\n", __func__, status);
-		return;
+		goto out;
 	default:
 		dev_dbg(dev, "%s - nonzero urb status received: %d\n", __func__, status);
-		return;
+		goto out;
 	}
 
 	dev_dbg(dev, "%s urb buffer size is %d\n", __func__, urb->actual_length);
@@ -476,6 +482,8 @@
 		mos7840_handle_new_msr(mos7840_port, regval);
 	else if (mos7840_port->MsrLsr == 1)
 		mos7840_handle_new_lsr(mos7840_port, regval);
+out:
+	clear_bit_unlock(MOS7840_FLAG_CTRL_BUSY, &mos7840_port->flags);
 }
 
 static int mos7840_get_reg(struct moschip_port *mcs, __u16 Wval, __u16 reg,
@@ -486,6 +494,9 @@
 	unsigned char *buffer = mcs->ctrl_buf;
 	int ret;
 
+	if (test_and_set_bit_lock(MOS7840_FLAG_CTRL_BUSY, &mcs->flags))
+		return -EBUSY;
+
 	dr->bRequestType = MCS_RD_RTYPE;
 	dr->bRequest = MCS_RDREQ;
 	dr->wValue = cpu_to_le16(Wval);	/* 0 */
@@ -497,6 +508,9 @@
 			     mos7840_control_callback, mcs);
 	mcs->control_urb->transfer_buffer_length = 2;
 	ret = usb_submit_urb(mcs->control_urb, GFP_ATOMIC);
+	if (ret)
+		clear_bit_unlock(MOS7840_FLAG_CTRL_BUSY, &mcs->flags);
+
 	return ret;
 }
 
@@ -523,7 +537,7 @@
 				__u16 reg)
 {
 	struct usb_device *dev = mcs->port->serial->dev;
-	struct usb_ctrlrequest *dr = mcs->dr;
+	struct usb_ctrlrequest *dr = mcs->led_dr;
 
 	dr->bRequestType = MCS_WR_RTYPE;
 	dr->bRequest = MCS_WRREQ;
@@ -531,10 +545,10 @@
 	dr->wIndex = cpu_to_le16(reg);
 	dr->wLength = cpu_to_le16(0);
 
-	usb_fill_control_urb(mcs->control_urb, dev, usb_sndctrlpipe(dev, 0),
+	usb_fill_control_urb(mcs->led_urb, dev, usb_sndctrlpipe(dev, 0),
 		(unsigned char *)dr, NULL, 0, mos7840_set_led_callback, NULL);
 
-	usb_submit_urb(mcs->control_urb, GFP_ATOMIC);
+	usb_submit_urb(mcs->led_urb, GFP_ATOMIC);
 }
 
 static void mos7840_set_led_sync(struct usb_serial_port *port, __u16 reg,
@@ -560,7 +574,19 @@
 {
 	struct moschip_port *mcs = (struct moschip_port *) arg;
 
-	mcs->led_flag = false;
+	clear_bit_unlock(MOS7840_FLAG_LED_BUSY, &mcs->flags);
+}
+
+static void mos7840_led_activity(struct usb_serial_port *port)
+{
+	struct moschip_port *mos7840_port = usb_get_serial_port_data(port);
+
+	if (test_and_set_bit_lock(MOS7840_FLAG_LED_BUSY, &mos7840_port->flags))
+		return;
+
+	mos7840_set_led_async(mos7840_port, 0x0301, MODEM_CONTROL_REGISTER);
+	mod_timer(&mos7840_port->led_timer1,
+				jiffies + msecs_to_jiffies(LED_ON_MS));
 }
 
 /*****************************************************************************
@@ -758,14 +784,8 @@
 		return;
 	}
 
-	/* Turn on LED */
-	if (mos7840_port->has_led && !mos7840_port->led_flag) {
-		mos7840_port->led_flag = true;
-		mos7840_set_led_async(mos7840_port, 0x0301,
-					MODEM_CONTROL_REGISTER);
-		mod_timer(&mos7840_port->led_timer1,
-				jiffies + msecs_to_jiffies(LED_ON_MS));
-	}
+	if (mos7840_port->has_led)
+		mos7840_led_activity(port);
 
 	mos7840_port->read_urb_busy = true;
 	retval = usb_submit_urb(mos7840_port->read_urb, GFP_ATOMIC);
@@ -816,18 +836,6 @@
 /************************************************************************/
 /*       D R I V E R  T T Y  I N T E R F A C E  F U N C T I O N S       */
 /************************************************************************/
-#ifdef MCSSerialProbe
-static int mos7840_serial_probe(struct usb_serial *serial,
-				const struct usb_device_id *id)
-{
-
-	/*need to implement the mode_reg reading and updating\
-	   structures usb_serial_ device_type\
-	   (i.e num_ports, num_bulkin,bulkout etc) */
-	/* Also we can update the changes  attach */
-	return 1;
-}
-#endif
 
 /*****************************************************************************
  * mos7840_open
@@ -905,20 +913,20 @@
 	status = mos7840_get_reg_sync(port, mos7840_port->SpRegOffset, &Data);
 	if (status < 0) {
 		dev_dbg(&port->dev, "Reading Spreg failed\n");
-		return -1;
+		goto err;
 	}
 	Data |= 0x80;
 	status = mos7840_set_reg_sync(port, mos7840_port->SpRegOffset, Data);
 	if (status < 0) {
 		dev_dbg(&port->dev, "writing Spreg failed\n");
-		return -1;
+		goto err;
 	}
 
 	Data &= ~0x80;
 	status = mos7840_set_reg_sync(port, mos7840_port->SpRegOffset, Data);
 	if (status < 0) {
 		dev_dbg(&port->dev, "writing Spreg failed\n");
-		return -1;
+		goto err;
 	}
 	/* End of block to be checked */
 
@@ -927,7 +935,7 @@
 									&Data);
 	if (status < 0) {
 		dev_dbg(&port->dev, "Reading Controlreg failed\n");
-		return -1;
+		goto err;
 	}
 	Data |= 0x08;		/* Driver done bit */
 	Data |= 0x20;		/* rx_disable */
@@ -935,7 +943,7 @@
 				mos7840_port->ControlRegOffset, Data);
 	if (status < 0) {
 		dev_dbg(&port->dev, "writing Controlreg failed\n");
-		return -1;
+		goto err;
 	}
 	/* do register settings here */
 	/* Set all regs to the device default values. */
@@ -946,21 +954,21 @@
 	status = mos7840_set_uart_reg(port, INTERRUPT_ENABLE_REGISTER, Data);
 	if (status < 0) {
 		dev_dbg(&port->dev, "disabling interrupts failed\n");
-		return -1;
+		goto err;
 	}
 	/* Set FIFO_CONTROL_REGISTER to the default value */
 	Data = 0x00;
 	status = mos7840_set_uart_reg(port, FIFO_CONTROL_REGISTER, Data);
 	if (status < 0) {
 		dev_dbg(&port->dev, "Writing FIFO_CONTROL_REGISTER  failed\n");
-		return -1;
+		goto err;
 	}
 
 	Data = 0xcf;
 	status = mos7840_set_uart_reg(port, FIFO_CONTROL_REGISTER, Data);
 	if (status < 0) {
 		dev_dbg(&port->dev, "Writing FIFO_CONTROL_REGISTER  failed\n");
-		return -1;
+		goto err;
 	}
 
 	Data = 0x03;
@@ -1103,6 +1111,15 @@
 	/* mos7840_change_port_settings(mos7840_port,old_termios); */
 
 	return 0;
+err:
+	for (j = 0; j < NUM_URBS; ++j) {
+		urb = mos7840_port->write_urb_pool[j];
+		if (!urb)
+			continue;
+		kfree(urb->transfer_buffer);
+		usb_free_urb(urb);
+	}
+	return status;
 }
 
 /*****************************************************************************
@@ -1445,13 +1462,8 @@
 	data1 = urb->transfer_buffer;
 	dev_dbg(&port->dev, "bulkout endpoint is %d\n", port->bulk_out_endpointAddress);
 
-	/* Turn on LED */
-	if (mos7840_port->has_led && !mos7840_port->led_flag) {
-		mos7840_port->led_flag = true;
-		mos7840_set_led_sync(port, MODEM_CONTROL_REGISTER, 0x0301);
-		mod_timer(&mos7840_port->led_timer1,
-				jiffies + msecs_to_jiffies(LED_ON_MS));
-	}
+	if (mos7840_port->has_led)
+		mos7840_led_activity(port);
 
 	/* send it down the pipe */
 	status = usb_submit_urb(urb, GFP_ATOMIC);
@@ -2178,38 +2190,48 @@
 	return 0;
 }
 
-static int mos7840_calc_num_ports(struct usb_serial *serial)
+static int mos7840_probe(struct usb_serial *serial,
+				const struct usb_device_id *id)
 {
-	__u16 data = 0x00;
+	u16 product = le16_to_cpu(serial->dev->descriptor.idProduct);
 	u8 *buf;
-	int mos7840_num_ports;
+	int device_type;
+
+	if (product == MOSCHIP_DEVICE_ID_7810 ||
+		product == MOSCHIP_DEVICE_ID_7820) {
+		device_type = product;
+		goto out;
+	}
 
 	buf = kzalloc(VENDOR_READ_LENGTH, GFP_KERNEL);
-	if (buf) {
-		usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0),
+	if (!buf)
+		return -ENOMEM;
+
+	usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0),
 			MCS_RDREQ, MCS_RD_RTYPE, 0, GPIO_REGISTER, buf,
 			VENDOR_READ_LENGTH, MOS_WDR_TIMEOUT);
-		data = *buf;
-		kfree(buf);
-	}
 
-	if (serial->dev->descriptor.idProduct == MOSCHIP_DEVICE_ID_7810 ||
-		serial->dev->descriptor.idProduct == MOSCHIP_DEVICE_ID_7820) {
-		device_type = serial->dev->descriptor.idProduct;
-	} else {
-		/* For a MCS7840 device GPIO0 must be set to 1 */
-		if ((data & 0x01) == 1)
-			device_type = MOSCHIP_DEVICE_ID_7840;
-		else if (mos7810_check(serial))
-			device_type = MOSCHIP_DEVICE_ID_7810;
-		else
-			device_type = MOSCHIP_DEVICE_ID_7820;
-	}
+	/* For a MCS7840 device GPIO0 must be set to 1 */
+	if (buf[0] & 0x01)
+		device_type = MOSCHIP_DEVICE_ID_7840;
+	else if (mos7810_check(serial))
+		device_type = MOSCHIP_DEVICE_ID_7810;
+	else
+		device_type = MOSCHIP_DEVICE_ID_7820;
+
+	kfree(buf);
+out:
+	usb_set_serial_data(serial, (void *)(unsigned long)device_type);
+
+	return 0;
+}
+
+static int mos7840_calc_num_ports(struct usb_serial *serial)
+{
+	int device_type = (unsigned long)usb_get_serial_data(serial);
+	int mos7840_num_ports;
 
 	mos7840_num_ports = (device_type >> 4) & 0x000F;
-	serial->num_bulk_in = mos7840_num_ports;
-	serial->num_bulk_out = mos7840_num_ports;
-	serial->num_ports = mos7840_num_ports;
 
 	return mos7840_num_ports;
 }
@@ -2217,6 +2239,7 @@
 static int mos7840_port_probe(struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
+	int device_type = (unsigned long)usb_get_serial_data(serial);
 	struct moschip_port *mos7840_port;
 	int status;
 	int pnum;
@@ -2392,6 +2415,14 @@
 	if (device_type == MOSCHIP_DEVICE_ID_7810) {
 		mos7840_port->has_led = true;
 
+		mos7840_port->led_urb = usb_alloc_urb(0, GFP_KERNEL);
+		mos7840_port->led_dr = kmalloc(sizeof(*mos7840_port->led_dr),
+								GFP_KERNEL);
+		if (!mos7840_port->led_urb || !mos7840_port->led_dr) {
+			status = -ENOMEM;
+			goto error;
+		}
+
 		init_timer(&mos7840_port->led_timer1);
 		mos7840_port->led_timer1.function = mos7840_led_off;
 		mos7840_port->led_timer1.expires =
@@ -2404,8 +2435,6 @@
 			jiffies + msecs_to_jiffies(LED_OFF_MS);
 		mos7840_port->led_timer2.data = (unsigned long)mos7840_port;
 
-		mos7840_port->led_flag = false;
-
 		/* Turn off LED */
 		mos7840_set_led_sync(port, MODEM_CONTROL_REGISTER, 0x0300);
 	}
@@ -2427,6 +2456,8 @@
 	}
 	return 0;
 error:
+	kfree(mos7840_port->led_dr);
+	usb_free_urb(mos7840_port->led_urb);
 	kfree(mos7840_port->dr);
 	kfree(mos7840_port->ctrl_buf);
 	usb_free_urb(mos7840_port->control_urb);
@@ -2447,6 +2478,10 @@
 
 		del_timer_sync(&mos7840_port->led_timer1);
 		del_timer_sync(&mos7840_port->led_timer2);
+
+		usb_kill_urb(mos7840_port->led_urb);
+		usb_free_urb(mos7840_port->led_urb);
+		kfree(mos7840_port->led_dr);
 	}
 	usb_kill_urb(mos7840_port->control_urb);
 	usb_free_urb(mos7840_port->control_urb);
@@ -2473,9 +2508,7 @@
 	.throttle = mos7840_throttle,
 	.unthrottle = mos7840_unthrottle,
 	.calc_num_ports = mos7840_calc_num_ports,
-#ifdef MCSSerialProbe
-	.probe = mos7840_serial_probe,
-#endif
+	.probe = mos7840_probe,
 	.ioctl = mos7840_ioctl,
 	.set_termios = mos7840_set_termios,
 	.break_ctl = mos7840_break,
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 5dd857d..1cf6f12 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -341,17 +341,12 @@
 #define OLIVETTI_VENDOR_ID			0x0b3c
 #define OLIVETTI_PRODUCT_OLICARD100		0xc000
 #define OLIVETTI_PRODUCT_OLICARD145		0xc003
+#define OLIVETTI_PRODUCT_OLICARD200		0xc005
 
 /* Celot products */
 #define CELOT_VENDOR_ID				0x211f
 #define CELOT_PRODUCT_CT680M			0x6801
 
-/* ONDA Communication vendor id */
-#define ONDA_VENDOR_ID       0x1ee8
-
-/* ONDA MT825UP HSDPA 14.2 modem */
-#define ONDA_MT825UP         0x000b
-
 /* Samsung products */
 #define SAMSUNG_VENDOR_ID                       0x04e8
 #define SAMSUNG_PRODUCT_GT_B3730                0x6889
@@ -444,7 +439,8 @@
 
 /* Hyundai Petatel Inc. products */
 #define PETATEL_VENDOR_ID			0x1ff4
-#define PETATEL_PRODUCT_NP10T			0x600e
+#define PETATEL_PRODUCT_NP10T_600A		0x600a
+#define PETATEL_PRODUCT_NP10T_600E		0x600e
 
 /* TP-LINK Incorporated products */
 #define TPLINK_VENDOR_ID			0x2357
@@ -782,6 +778,7 @@
 	{ USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC650) },
 	{ USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC680) },
 	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */
+	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x0023)}, /* ONYX 3G device */
 	{ USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000)}, /* SIMCom SIM5218 */
 	{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6280) }, /* BP3-USB & BP3-EXT HSDPA */
 	{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6008) },
@@ -817,7 +814,8 @@
 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0017, 0xff, 0xff, 0xff),
 		.driver_info = (kernel_ulong_t)&net_intf3_blacklist },
 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0018, 0xff, 0xff, 0xff) },
-	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0019, 0xff, 0xff, 0xff) },
+	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0019, 0xff, 0xff, 0xff),
+		.driver_info = (kernel_ulong_t)&net_intf3_blacklist },
 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0020, 0xff, 0xff, 0xff) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0021, 0xff, 0xff, 0xff),
 		.driver_info = (kernel_ulong_t)&net_intf4_blacklist },
@@ -1256,8 +1254,8 @@
 
 	{ USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD100) },
 	{ USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD145) },
+	{ USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD200) },
 	{ USB_DEVICE(CELOT_VENDOR_ID, CELOT_PRODUCT_CT680M) }, /* CT-650 CDMA 450 1xEVDO modem */
-	{ USB_DEVICE(ONDA_VENDOR_ID, ONDA_MT825UP) }, /* ONDA MT825UP modem */
 	{ USB_DEVICE_AND_INTERFACE_INFO(SAMSUNG_VENDOR_ID, SAMSUNG_PRODUCT_GT_B3730, USB_CLASS_CDC_DATA, 0x00, 0x00) }, /* Samsung GT-B3730 LTE USB modem.*/
 	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM600) },
 	{ USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM610) },
@@ -1329,9 +1327,12 @@
 	{ USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM2, 0xff, 0x02, 0x01) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM2, 0xff, 0x00, 0x00) },
 	{ USB_DEVICE(CELLIENT_VENDOR_ID, CELLIENT_PRODUCT_MEN200) },
-	{ USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T) },
+	{ USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T_600A) },
+	{ USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T_600E) },
 	{ USB_DEVICE(TPLINK_VENDOR_ID, TPLINK_PRODUCT_MA180),
 	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
+	{ USB_DEVICE(TPLINK_VENDOR_ID, 0x9000),					/* TP-Link MA260 */
+	  .driver_info = (kernel_ulong_t)&net_intf4_blacklist },
 	{ USB_DEVICE(CHANGHONG_VENDOR_ID, CHANGHONG_PRODUCT_CH690) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d01, 0xff, 0x02, 0x01) },	/* D-Link DWM-156 (variant) */
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d01, 0xff, 0x00, 0x00) },	/* D-Link DWM-156 (variant) */
@@ -1339,6 +1340,8 @@
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d02, 0xff, 0x00, 0x00) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d03, 0xff, 0x02, 0x01) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d03, 0xff, 0x00, 0x00) },
+	{ USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */
+	{ USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */
 	{ } /* Terminating entry */
 };
 MODULE_DEVICE_TABLE(usb, option_ids);
diff --git a/drivers/usb/serial/suunto.c b/drivers/usb/serial/suunto.c
new file mode 100644
index 0000000..2248e7a
--- /dev/null
+++ b/drivers/usb/serial/suunto.c
@@ -0,0 +1,41 @@
+/*
+ * Suunto ANT+ USB Driver
+ *
+ * Copyright (C) 2013 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ * Copyright (C) 2013 Linux Foundation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation only.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/tty.h>
+#include <linux/module.h>
+#include <linux/usb.h>
+#include <linux/usb/serial.h>
+#include <linux/uaccess.h>
+
+static const struct usb_device_id id_table[] = {
+	{ USB_DEVICE(0x0fcf, 0x1008) },
+	{ },
+};
+MODULE_DEVICE_TABLE(usb, id_table);
+
+static struct usb_serial_driver suunto_device = {
+	.driver = {
+		.owner =	THIS_MODULE,
+		.name =		KBUILD_MODNAME,
+	},
+	.id_table =		id_table,
+	.num_ports =		1,
+};
+
+static struct usb_serial_driver * const serial_drivers[] = {
+	&suunto_device,
+	NULL,
+};
+
+module_usb_serial_driver(serial_drivers, id_table);
+MODULE_LICENSE("GPL");
diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c
index 7182bb7..5c9f9b1 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.c
+++ b/drivers/usb/serial/ti_usb_3410_5052.c
@@ -371,7 +371,7 @@
 	usb_set_serial_data(serial, tdev);
 
 	/* determine device type */
-	if (usb_match_id(serial->interface, ti_id_table_3410))
+	if (serial->type == &ti_1port_device)
 		tdev->td_is_3410 = 1;
 	dev_dbg(&dev->dev, "%s - device type is %s\n", __func__,
 		tdev->td_is_3410 ? "3410" : "5052");
@@ -1536,14 +1536,15 @@
 	char buf[32];
 
 	/* try ID specific firmware first, then try generic firmware */
-	sprintf(buf, "ti_usb-v%04x-p%04x.fw", dev->descriptor.idVendor,
-	    dev->descriptor.idProduct);
+	sprintf(buf, "ti_usb-v%04x-p%04x.fw",
+			le16_to_cpu(dev->descriptor.idVendor),
+			le16_to_cpu(dev->descriptor.idProduct));
 	status = request_firmware(&fw_p, buf, &dev->dev);
 
 	if (status != 0) {
 		buf[0] = '\0';
-		if (dev->descriptor.idVendor == MTS_VENDOR_ID) {
-			switch (dev->descriptor.idProduct) {
+		if (le16_to_cpu(dev->descriptor.idVendor) == MTS_VENDOR_ID) {
+			switch (le16_to_cpu(dev->descriptor.idProduct)) {
 			case MTS_CDMA_PRODUCT_ID:
 				strcpy(buf, "mts_cdma.fw");
 				break;
diff --git a/drivers/usb/serial/usb_wwan.c b/drivers/usb/serial/usb_wwan.c
index 8257d30..8536578 100644
--- a/drivers/usb/serial/usb_wwan.c
+++ b/drivers/usb/serial/usb_wwan.c
@@ -291,18 +291,18 @@
 			tty_flip_buffer_push(&port->port);
 		} else
 			dev_dbg(dev, "%s: empty read urb received\n", __func__);
-
-		/* Resubmit urb so we continue receiving */
-		err = usb_submit_urb(urb, GFP_ATOMIC);
-		if (err) {
-			if (err != -EPERM) {
-				dev_err(dev, "%s: resubmit read urb failed. (%d)\n", __func__, err);
-				/* busy also in error unless we are killed */
-				usb_mark_last_busy(port->serial->dev);
-			}
-		} else {
+	}
+	/* Resubmit urb so we continue receiving */
+	err = usb_submit_urb(urb, GFP_ATOMIC);
+	if (err) {
+		if (err != -EPERM) {
+			dev_err(dev, "%s: resubmit read urb failed. (%d)\n",
+				__func__, err);
+			/* busy also in error unless we are killed */
 			usb_mark_last_busy(port->serial->dev);
 		}
+	} else {
+		usb_mark_last_busy(port->serial->dev);
 	}
 }
 
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index 179933528..c015f2c 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -665,6 +665,13 @@
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_FIX_INQUIRY ),
 
+/* Submitted by Ren Bigcren <bigcren.ren@sonymobile.com> */
+UNUSUAL_DEV(  0x054c, 0x02a5, 0x0100, 0x0100,
+		"Sony Corp.",
+		"MicroVault Flash Drive",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_NO_READ_CAPACITY_16 ),
+
 /* floppy reports multiple luns */
 UNUSUAL_DEV(  0x055d, 0x2020, 0x0000, 0x0210,
 		"SAMSUNG",
diff --git a/drivers/usb/wusbcore/wa-xfer.c b/drivers/usb/wusbcore/wa-xfer.c
index 16968c8..d3493ca 100644
--- a/drivers/usb/wusbcore/wa-xfer.c
+++ b/drivers/usb/wusbcore/wa-xfer.c
@@ -1226,6 +1226,12 @@
 	}
 	spin_lock_irqsave(&xfer->lock, flags);
 	rpipe = xfer->ep->hcpriv;
+	if (rpipe == NULL) {
+		pr_debug("%s: xfer id 0x%08X has no RPIPE.  %s",
+			__func__, wa_xfer_id(xfer),
+			"Probably already aborted.\n" );
+		goto out_unlock;
+	}
 	/* Check the delayed list -> if there, release and complete */
 	spin_lock_irqsave(&wa->xfer_list_lock, flags2);
 	if (!list_empty(&xfer->list_node) && xfer->seg == NULL)
@@ -1644,8 +1650,7 @@
 			break;
 		}
 		usb_status = xfer_result->bTransferStatus & 0x3f;
-		if (usb_status == WA_XFER_STATUS_ABORTED
-		    || usb_status == WA_XFER_STATUS_NOT_FOUND)
+		if (usb_status == WA_XFER_STATUS_NOT_FOUND)
 			/* taken care of already */
 			break;
 		xfer_id = xfer_result->dwTransferID;
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index c5179e2..cef6002 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -137,8 +137,27 @@
 	 */
 	pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
 
-	if (vdev->reset_works)
-		__pci_reset_function(pdev);
+	/*
+	 * Careful, device_lock may already be held.  This is the case if
+	 * a driver unbind is blocked.  Try to get the locks ourselves to
+	 * prevent a deadlock.
+	 */
+	if (vdev->reset_works) {
+		bool reset_done = false;
+
+		if (pci_cfg_access_trylock(pdev)) {
+			if (device_trylock(&pdev->dev)) {
+				__pci_reset_function_locked(pdev);
+				reset_done = true;
+				device_unlock(&pdev->dev);
+			}
+			pci_cfg_access_unlock(pdev);
+		}
+
+		if (!reset_done)
+			pr_warn("%s: Unable to acquire locks for reset of %s\n",
+				__func__, dev_name(&pdev->dev));
+	}
 
 	pci_restore_state(pdev);
 }
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index c488da5..842f450 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -494,27 +494,6 @@
 	return 0;
 }
 
-static int vfio_group_nb_del_dev(struct vfio_group *group, struct device *dev)
-{
-	struct vfio_device *device;
-
-	/*
-	 * Expect to fall out here.  If a device was in use, it would
-	 * have been bound to a vfio sub-driver, which would have blocked
-	 * in .remove at vfio_del_group_dev.  Sanity check that we no
-	 * longer track the device, so it's safe to remove.
-	 */
-	device = vfio_group_get_device(group, dev);
-	if (likely(!device))
-		return 0;
-
-	WARN("Device %s removed from live group %d!\n", dev_name(dev),
-	     iommu_group_id(group->iommu_group));
-
-	vfio_device_put(device);
-	return 0;
-}
-
 static int vfio_group_nb_verify(struct vfio_group *group, struct device *dev)
 {
 	/* We don't care what happens when the group isn't in use */
@@ -531,13 +510,11 @@
 	struct device *dev = data;
 
 	/*
-	 * Need to go through a group_lock lookup to get a reference or
-	 * we risk racing a group being removed.  Leave a WARN_ON for
-	 * debuging, but if the group no longer exists, a spurious notify
-	 * is harmless.
+	 * Need to go through a group_lock lookup to get a reference or we
+	 * risk racing a group being removed.  Ignore spurious notifies.
 	 */
 	group = vfio_group_try_get(group);
-	if (WARN_ON(!group))
+	if (!group)
 		return NOTIFY_OK;
 
 	switch (action) {
@@ -545,7 +522,13 @@
 		vfio_group_nb_add_dev(group, dev);
 		break;
 	case IOMMU_GROUP_NOTIFY_DEL_DEVICE:
-		vfio_group_nb_del_dev(group, dev);
+		/*
+		 * Nothing to do here.  If the device is in use, then the
+		 * vfio sub-driver should block the remove callback until
+		 * it is unused.  If the device is unused or attached to a
+		 * stub driver, then it should be released and we don't
+		 * care that it will be going away.
+		 */
 		break;
 	case IOMMU_GROUP_NOTIFY_BIND_DRIVER:
 		pr_debug("%s: Device %s, group %d binding to driver\n",
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 027be91..969a859 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -15,7 +15,6 @@
 #include <linux/moduleparam.h>
 #include <linux/mutex.h>
 #include <linux/workqueue.h>
-#include <linux/rcupdate.h>
 #include <linux/file.h>
 #include <linux/slab.h>
 
@@ -346,12 +345,11 @@
 	struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
 	bool zcopy, zcopy_used;
 
-	/* TODO: check that we are running from vhost_worker? */
-	sock = rcu_dereference_check(vq->private_data, 1);
-	if (!sock)
-		return;
-
 	mutex_lock(&vq->mutex);
+	sock = vq->private_data;
+	if (!sock)
+		goto out;
+
 	vhost_disable_notify(&net->dev, vq);
 
 	hdr_size = nvq->vhost_hlen;
@@ -461,7 +459,7 @@
 			break;
 		}
 	}
-
+out:
 	mutex_unlock(&vq->mutex);
 }
 
@@ -570,14 +568,14 @@
 	s16 headcount;
 	size_t vhost_hlen, sock_hlen;
 	size_t vhost_len, sock_len;
-	/* TODO: check that we are running from vhost_worker? */
-	struct socket *sock = rcu_dereference_check(vq->private_data, 1);
-
-	if (!sock)
-		return;
+	struct socket *sock;
 
 	mutex_lock(&vq->mutex);
+	sock = vq->private_data;
+	if (!sock)
+		goto out;
 	vhost_disable_notify(&net->dev, vq);
+
 	vhost_hlen = nvq->vhost_hlen;
 	sock_hlen = nvq->sock_hlen;
 
@@ -652,7 +650,7 @@
 			break;
 		}
 	}
-
+out:
 	mutex_unlock(&vq->mutex);
 }
 
@@ -750,8 +748,7 @@
 	struct vhost_poll *poll = n->poll + (nvq - n->vqs);
 	struct socket *sock;
 
-	sock = rcu_dereference_protected(vq->private_data,
-					 lockdep_is_held(&vq->mutex));
+	sock = vq->private_data;
 	if (!sock)
 		return 0;
 
@@ -764,10 +761,9 @@
 	struct socket *sock;
 
 	mutex_lock(&vq->mutex);
-	sock = rcu_dereference_protected(vq->private_data,
-					 lockdep_is_held(&vq->mutex));
+	sock = vq->private_data;
 	vhost_net_disable_vq(n, vq);
-	rcu_assign_pointer(vq->private_data, NULL);
+	vq->private_data = NULL;
 	mutex_unlock(&vq->mutex);
 	return sock;
 }
@@ -923,8 +919,7 @@
 	}
 
 	/* start polling new socket */
-	oldsock = rcu_dereference_protected(vq->private_data,
-					    lockdep_is_held(&vq->mutex));
+	oldsock = vq->private_data;
 	if (sock != oldsock) {
 		ubufs = vhost_net_ubuf_alloc(vq,
 					     sock && vhost_sock_zcopy(sock));
@@ -934,7 +929,7 @@
 		}
 
 		vhost_net_disable_vq(n, vq);
-		rcu_assign_pointer(vq->private_data, sock);
+		vq->private_data = sock;
 		r = vhost_init_used(vq);
 		if (r)
 			goto err_used;
@@ -968,7 +963,7 @@
 	return 0;
 
 err_used:
-	rcu_assign_pointer(vq->private_data, oldsock);
+	vq->private_data = oldsock;
 	vhost_net_enable_vq(n, vq);
 	if (ubufs)
 		vhost_net_ubuf_put_wait_and_free(ubufs);
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 06adf31..0c27c7d 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -902,19 +902,15 @@
 	int head, ret;
 	u8 target;
 
+	mutex_lock(&vq->mutex);
 	/*
 	 * We can handle the vq only after the endpoint is setup by calling the
 	 * VHOST_SCSI_SET_ENDPOINT ioctl.
-	 *
-	 * TODO: Check that we are running from vhost_worker which acts
-	 * as read-side critical section for vhost kind of RCU.
-	 * See the comments in struct vhost_virtqueue in drivers/vhost/vhost.h
 	 */
-	vs_tpg = rcu_dereference_check(vq->private_data, 1);
+	vs_tpg = vq->private_data;
 	if (!vs_tpg)
-		return;
+		goto out;
 
-	mutex_lock(&vq->mutex);
 	vhost_disable_notify(&vs->dev, vq);
 
 	for (;;) {
@@ -1064,6 +1060,7 @@
 	vhost_scsi_free_cmd(cmd);
 err_cmd:
 	vhost_scsi_send_bad_target(vs, vq, head, out);
+out:
 	mutex_unlock(&vq->mutex);
 }
 
@@ -1232,9 +1229,8 @@
 		       sizeof(vs->vs_vhost_wwpn));
 		for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
 			vq = &vs->vqs[i].vq;
-			/* Flushing the vhost_work acts as synchronize_rcu */
 			mutex_lock(&vq->mutex);
-			rcu_assign_pointer(vq->private_data, vs_tpg);
+			vq->private_data = vs_tpg;
 			vhost_init_used(vq);
 			mutex_unlock(&vq->mutex);
 		}
@@ -1313,9 +1309,8 @@
 	if (match) {
 		for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
 			vq = &vs->vqs[i].vq;
-			/* Flushing the vhost_work acts as synchronize_rcu */
 			mutex_lock(&vq->mutex);
-			rcu_assign_pointer(vq->private_data, NULL);
+			vq->private_data = NULL;
 			mutex_unlock(&vq->mutex);
 		}
 	}
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index a73ea21..339eae8 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -13,7 +13,6 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/workqueue.h>
-#include <linux/rcupdate.h>
 #include <linux/file.h>
 #include <linux/slab.h>
 
@@ -200,9 +199,8 @@
 		priv = test ? n : NULL;
 
 		/* start polling new socket */
-		oldpriv = rcu_dereference_protected(vq->private_data,
-						    lockdep_is_held(&vq->mutex));
-		rcu_assign_pointer(vq->private_data, priv);
+		oldpriv = vq->private_data;
+		vq->private_data = priv;
 
 		r = vhost_init_used(&n->vqs[index]);
 
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 42298cd..4465ed5 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -103,14 +103,8 @@
 	struct iovec iov[UIO_MAXIOV];
 	struct iovec *indirect;
 	struct vring_used_elem *heads;
-	/* We use a kind of RCU to access private pointer.
-	 * All readers access it from worker, which makes it possible to
-	 * flush the vhost_work instead of synchronize_rcu. Therefore readers do
-	 * not need to call rcu_read_lock/rcu_read_unlock: the beginning of
-	 * vhost_work execution acts instead of rcu_read_lock() and the end of
-	 * vhost_work execution acts instead of rcu_read_unlock().
-	 * Writers use virtqueue mutex. */
-	void __rcu *private_data;
+	/* Protected by virtqueue mutex. */
+	void *private_data;
 	/* Log write descriptors */
 	void __user *log_base;
 	struct vhost_log *log;
diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c
index a89c15d..9b0f12c 100644
--- a/drivers/video/aty/atyfb_base.c
+++ b/drivers/video/aty/atyfb_base.c
@@ -435,8 +435,8 @@
 	const char *name;
 	int i;
 
-	for (i = ARRAY_SIZE(aty_chips); i > 0; i--)
-		if (par->pci_id == aty_chips[i - 1].pci_id)
+	for (i = (int)ARRAY_SIZE(aty_chips) - 1; i >= 0; i--)
+		if (par->pci_id == aty_chips[i].pci_id)
 			break;
 
 	if (i < 0)
diff --git a/drivers/video/backlight/max8925_bl.c b/drivers/video/backlight/max8925_bl.c
index 5ca11b0..886e797 100644
--- a/drivers/video/backlight/max8925_bl.c
+++ b/drivers/video/backlight/max8925_bl.c
@@ -101,33 +101,37 @@
 	.get_brightness	= max8925_backlight_get_brightness,
 };
 
-#ifdef CONFIG_OF
-static int max8925_backlight_dt_init(struct platform_device *pdev,
-			      struct max8925_backlight_pdata *pdata)
+static void max8925_backlight_dt_init(struct platform_device *pdev)
 {
 	struct device_node *nproot = pdev->dev.parent->of_node, *np;
-	int dual_string;
+	struct max8925_backlight_pdata *pdata;
+	u32 val;
 
-	if (!nproot)
-		return -ENODEV;
+	if (!nproot || !IS_ENABLED(CONFIG_OF))
+		return;
+
+	pdata = devm_kzalloc(&pdev->dev,
+			     sizeof(struct max8925_backlight_pdata),
+			     GFP_KERNEL);
+	if (!pdata)
+		return;
+
 	np = of_find_node_by_name(nproot, "backlight");
 	if (!np) {
 		dev_err(&pdev->dev, "failed to find backlight node\n");
-		return -ENODEV;
+		return;
 	}
 
-	of_property_read_u32(np, "maxim,max8925-dual-string", &dual_string);
-	pdata->dual_string = dual_string;
-	return 0;
+	if (!of_property_read_u32(np, "maxim,max8925-dual-string", &val))
+		pdata->dual_string = val;
+
+	pdev->dev.platform_data = pdata;
 }
-#else
-#define max8925_backlight_dt_init(x, y)	(-1)
-#endif
 
 static int max8925_backlight_probe(struct platform_device *pdev)
 {
 	struct max8925_chip *chip = dev_get_drvdata(pdev->dev.parent);
-	struct max8925_backlight_pdata *pdata = pdev->dev.platform_data;
+	struct max8925_backlight_pdata *pdata;
 	struct max8925_backlight_data *data;
 	struct backlight_device *bl;
 	struct backlight_properties props;
@@ -170,13 +174,10 @@
 	platform_set_drvdata(pdev, bl);
 
 	value = 0;
-	if (pdev->dev.parent->of_node && !pdata) {
-		pdata = devm_kzalloc(&pdev->dev,
-				     sizeof(struct max8925_backlight_pdata),
-				     GFP_KERNEL);
-		max8925_backlight_dt_init(pdev, pdata);
-	}
+	if (!pdev->dev.platform_data)
+		max8925_backlight_dt_init(pdev);
 
+	pdata = pdev->dev.platform_data;
 	if (pdata) {
 		if (pdata->lxw_scl)
 			value |= (1 << 7);
diff --git a/drivers/video/hdmi.c b/drivers/video/hdmi.c
index 4017833..9e758a8 100644
--- a/drivers/video/hdmi.c
+++ b/drivers/video/hdmi.c
@@ -22,6 +22,7 @@
  */
 
 #include <linux/bitops.h>
+#include <linux/bug.h>
 #include <linux/errno.h>
 #include <linux/export.h>
 #include <linux/hdmi.h>
@@ -52,7 +53,7 @@
 
 	frame->type = HDMI_INFOFRAME_TYPE_AVI;
 	frame->version = 2;
-	frame->length = 13;
+	frame->length = HDMI_AVI_INFOFRAME_SIZE;
 
 	return 0;
 }
@@ -83,7 +84,7 @@
 	if (size < length)
 		return -ENOSPC;
 
-	memset(buffer, 0, length);
+	memset(buffer, 0, size);
 
 	ptr[0] = frame->type;
 	ptr[1] = frame->version;
@@ -95,13 +96,18 @@
 
 	ptr[0] = ((frame->colorspace & 0x3) << 5) | (frame->scan_mode & 0x3);
 
-	if (frame->active_info_valid)
+	/*
+	 * Data byte 1, bit 4 has to be set if we provide the active format
+	 * aspect ratio
+	 */
+	if (frame->active_aspect & 0xf)
 		ptr[0] |= BIT(4);
 
-	if (frame->horizontal_bar_valid)
+	/* Bit 3 and 2 indicate if we transmit horizontal/vertical bar data */
+	if (frame->top_bar || frame->bottom_bar)
 		ptr[0] |= BIT(3);
 
-	if (frame->vertical_bar_valid)
+	if (frame->left_bar || frame->right_bar)
 		ptr[0] |= BIT(2);
 
 	ptr[1] = ((frame->colorimetry & 0x3) << 6) |
@@ -151,7 +157,7 @@
 
 	frame->type = HDMI_INFOFRAME_TYPE_SPD;
 	frame->version = 1;
-	frame->length = 25;
+	frame->length = HDMI_SPD_INFOFRAME_SIZE;
 
 	strncpy(frame->vendor, vendor, sizeof(frame->vendor));
 	strncpy(frame->product, product, sizeof(frame->product));
@@ -185,7 +191,7 @@
 	if (size < length)
 		return -ENOSPC;
 
-	memset(buffer, 0, length);
+	memset(buffer, 0, size);
 
 	ptr[0] = frame->type;
 	ptr[1] = frame->version;
@@ -218,7 +224,7 @@
 
 	frame->type = HDMI_INFOFRAME_TYPE_AUDIO;
 	frame->version = 1;
-	frame->length = 10;
+	frame->length = HDMI_AUDIO_INFOFRAME_SIZE;
 
 	return 0;
 }
@@ -250,7 +256,7 @@
 	if (size < length)
 		return -ENOSPC;
 
-	memset(buffer, 0, length);
+	memset(buffer, 0, size);
 
 	if (frame->channels >= 2)
 		channels = frame->channels - 1;
@@ -282,9 +288,33 @@
 EXPORT_SYMBOL(hdmi_audio_infoframe_pack);
 
 /**
- * hdmi_vendor_infoframe_pack() - write a HDMI vendor infoframe to binary
- *                                buffer
+ * hdmi_vendor_infoframe_init() - initialize an HDMI vendor infoframe
  * @frame: HDMI vendor infoframe
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int hdmi_vendor_infoframe_init(struct hdmi_vendor_infoframe *frame)
+{
+	memset(frame, 0, sizeof(*frame));
+
+	frame->type = HDMI_INFOFRAME_TYPE_VENDOR;
+	frame->version = 1;
+
+	frame->oui = HDMI_IEEE_OUI;
+
+	/*
+	 * 0 is a valid value for s3d_struct, so we use a special "not set"
+	 * value
+	 */
+	frame->s3d_struct = HDMI_3D_STRUCTURE_INVALID;
+
+	return 0;
+}
+EXPORT_SYMBOL(hdmi_vendor_infoframe_init);
+
+/**
+ * hdmi_vendor_infoframe_pack() - write a HDMI vendor infoframe to binary buffer
+ * @frame: HDMI infoframe
  * @buffer: destination buffer
  * @size: size of buffer
  *
@@ -297,27 +327,110 @@
  * error code on failure.
  */
 ssize_t hdmi_vendor_infoframe_pack(struct hdmi_vendor_infoframe *frame,
-				   void *buffer, size_t size)
+				 void *buffer, size_t size)
 {
 	u8 *ptr = buffer;
 	size_t length;
 
+	/* empty info frame */
+	if (frame->vic == 0 && frame->s3d_struct == HDMI_3D_STRUCTURE_INVALID)
+		return -EINVAL;
+
+	/* only one of those can be supplied */
+	if (frame->vic != 0 && frame->s3d_struct != HDMI_3D_STRUCTURE_INVALID)
+		return -EINVAL;
+
+	/* for side by side (half) we also need to provide 3D_Ext_Data */
+	if (frame->s3d_struct >= HDMI_3D_STRUCTURE_SIDE_BY_SIDE_HALF)
+		frame->length = 6;
+	else
+		frame->length = 5;
+
 	length = HDMI_INFOFRAME_HEADER_SIZE + frame->length;
 
 	if (size < length)
 		return -ENOSPC;
 
-	memset(buffer, 0, length);
+	memset(buffer, 0, size);
 
 	ptr[0] = frame->type;
 	ptr[1] = frame->version;
 	ptr[2] = frame->length;
 	ptr[3] = 0; /* checksum */
 
-	memcpy(&ptr[HDMI_INFOFRAME_HEADER_SIZE], frame->data, frame->length);
+	/* HDMI OUI */
+	ptr[4] = 0x03;
+	ptr[5] = 0x0c;
+	ptr[6] = 0x00;
+
+	if (frame->vic) {
+		ptr[7] = 0x1 << 5;	/* video format */
+		ptr[8] = frame->vic;
+	} else {
+		ptr[7] = 0x2 << 5;	/* video format */
+		ptr[8] = (frame->s3d_struct & 0xf) << 4;
+		if (frame->s3d_struct >= HDMI_3D_STRUCTURE_SIDE_BY_SIDE_HALF)
+			ptr[9] = (frame->s3d_ext_data & 0xf) << 4;
+	}
 
 	hdmi_infoframe_checksum(buffer, length);
 
 	return length;
 }
 EXPORT_SYMBOL(hdmi_vendor_infoframe_pack);
+
+/*
+ * hdmi_vendor_any_infoframe_pack() - write a vendor infoframe to binary buffer
+ */
+static ssize_t
+hdmi_vendor_any_infoframe_pack(union hdmi_vendor_any_infoframe *frame,
+			   void *buffer, size_t size)
+{
+	/* we only know about HDMI vendor infoframes */
+	if (frame->any.oui != HDMI_IEEE_OUI)
+		return -EINVAL;
+
+	return hdmi_vendor_infoframe_pack(&frame->hdmi, buffer, size);
+}
+
+/**
+ * hdmi_infoframe_pack() - write a HDMI infoframe to binary buffer
+ * @frame: HDMI infoframe
+ * @buffer: destination buffer
+ * @size: size of buffer
+ *
+ * Packs the information contained in the @frame structure into a binary
+ * representation that can be written into the corresponding controller
+ * registers. Also computes the checksum as required by section 5.3.5 of
+ * the HDMI 1.4 specification.
+ *
+ * Returns the number of bytes packed into the binary buffer or a negative
+ * error code on failure.
+ */
+ssize_t
+hdmi_infoframe_pack(union hdmi_infoframe *frame, void *buffer, size_t size)
+{
+	ssize_t length;
+
+	switch (frame->any.type) {
+	case HDMI_INFOFRAME_TYPE_AVI:
+		length = hdmi_avi_infoframe_pack(&frame->avi, buffer, size);
+		break;
+	case HDMI_INFOFRAME_TYPE_SPD:
+		length = hdmi_spd_infoframe_pack(&frame->spd, buffer, size);
+		break;
+	case HDMI_INFOFRAME_TYPE_AUDIO:
+		length = hdmi_audio_infoframe_pack(&frame->audio, buffer, size);
+		break;
+	case HDMI_INFOFRAME_TYPE_VENDOR:
+		length = hdmi_vendor_any_infoframe_pack(&frame->vendor,
+							buffer, size);
+		break;
+	default:
+		WARN(1, "Bad infoframe type %d\n", frame->any.type);
+		length = -EINVAL;
+	}
+
+	return length;
+}
+EXPORT_SYMBOL(hdmi_infoframe_pack);
diff --git a/drivers/video/mxsfb.c b/drivers/video/mxsfb.c
index 3ba3771..dc09ebe 100644
--- a/drivers/video/mxsfb.c
+++ b/drivers/video/mxsfb.c
@@ -239,24 +239,6 @@
 	}
 };
 
-static const struct fb_bitfield def_rgb666[] = {
-	[RED] = {
-		.offset = 16,
-		.length = 6,
-	},
-	[GREEN] = {
-		.offset = 8,
-		.length = 6,
-	},
-	[BLUE] = {
-		.offset = 0,
-		.length = 6,
-	},
-	[TRANSP] = {	/* no support for transparency */
-		.length = 0,
-	}
-};
-
 static const struct fb_bitfield def_rgb888[] = {
 	[RED] = {
 		.offset = 16,
@@ -309,9 +291,6 @@
 			break;
 		case STMLCDIF_16BIT:
 		case STMLCDIF_18BIT:
-			/* 24 bit to 18 bit mapping */
-			rgb = def_rgb666;
-			break;
 		case STMLCDIF_24BIT:
 			/* real 24 bit */
 			rgb = def_rgb888;
@@ -453,11 +432,6 @@
 			return -EINVAL;
 		case STMLCDIF_16BIT:
 		case STMLCDIF_18BIT:
-			/* 24 bit to 18 bit mapping */
-			ctrl |= CTRL_DF24; /* ignore the upper 2 bits in
-					    *  each colour component
-					    */
-			break;
 		case STMLCDIF_24BIT:
 			/* real 24 bit */
 			break;
diff --git a/drivers/video/nuc900fb.c b/drivers/video/nuc900fb.c
index 8c527e5..796e511 100644
--- a/drivers/video/nuc900fb.c
+++ b/drivers/video/nuc900fb.c
@@ -587,8 +587,7 @@
 	fbinfo->flags			= FBINFO_FLAG_DEFAULT;
 	fbinfo->pseudo_palette		= &fbi->pseudo_pal;
 
-	ret = request_irq(irq, nuc900fb_irqhandler, 0,
-			  pdev->name, fbinfo);
+	ret = request_irq(irq, nuc900fb_irqhandler, 0, pdev->name, fbi);
 	if (ret) {
 		dev_err(&pdev->dev, "cannot register irq handler %d -err %d\n",
 			irq, ret);
diff --git a/drivers/video/omap2/displays-new/connector-analog-tv.c b/drivers/video/omap2/displays-new/connector-analog-tv.c
index 5338f36..1b60698 100644
--- a/drivers/video/omap2/displays-new/connector-analog-tv.c
+++ b/drivers/video/omap2/displays-new/connector-analog-tv.c
@@ -28,6 +28,20 @@
 	bool invert_polarity;
 };
 
+static const struct omap_video_timings tvc_pal_timings = {
+	.x_res		= 720,
+	.y_res		= 574,
+	.pixel_clock	= 13500,
+	.hsw		= 64,
+	.hfp		= 12,
+	.hbp		= 68,
+	.vsw		= 5,
+	.vfp		= 5,
+	.vbp		= 41,
+
+	.interlace	= true,
+};
+
 #define to_panel_data(x) container_of(x, struct panel_drv_data, dssdev)
 
 static int tvc_connect(struct omap_dss_device *dssdev)
@@ -212,14 +226,14 @@
 		return -ENODEV;
 	}
 
-	ddata->timings = omap_dss_pal_timings;
+	ddata->timings = tvc_pal_timings;
 
 	dssdev = &ddata->dssdev;
 	dssdev->driver = &tvc_driver;
 	dssdev->dev = &pdev->dev;
 	dssdev->type = OMAP_DISPLAY_TYPE_VENC;
 	dssdev->owner = THIS_MODULE;
-	dssdev->panel.timings = omap_dss_pal_timings;
+	dssdev->panel.timings = tvc_pal_timings;
 
 	r = omapdss_register_display(dssdev);
 	if (r) {
diff --git a/drivers/video/sgivwfb.c b/drivers/video/sgivwfb.c
index b2a8912..a9ac3ce 100644
--- a/drivers/video/sgivwfb.c
+++ b/drivers/video/sgivwfb.c
@@ -713,7 +713,7 @@
 	r = vm_iomap_memory(vma, sgivwfb_mem_phys, sgivwfb_mem_size);
 
 	printk(KERN_DEBUG "sgivwfb: mmap framebuffer P(%lx)->V(%lx)\n",
-	       offset, vma->vm_start);
+		sgivwfb_mem_phys + (vma->vm_pgoff << PAGE_SHIFT), vma->vm_start);
 
 	return r;
 }
diff --git a/drivers/video/sh7760fb.c b/drivers/video/sh7760fb.c
index a8c6c43..1265b25 100644
--- a/drivers/video/sh7760fb.c
+++ b/drivers/video/sh7760fb.c
@@ -567,7 +567,7 @@
 	fb_dealloc_cmap(&info->cmap);
 	sh7760fb_free_mem(info);
 	if (par->irq >= 0)
-		free_irq(par->irq, par);
+		free_irq(par->irq, &par->vsync);
 	iounmap(par->base);
 	release_mem_region(par->ioarea->start, resource_size(par->ioarea));
 	framebuffer_release(info);
diff --git a/drivers/video/vga16fb.c b/drivers/video/vga16fb.c
index 830ded4..2827333 100644
--- a/drivers/video/vga16fb.c
+++ b/drivers/video/vga16fb.c
@@ -1265,7 +1265,6 @@
 
 static void vga16fb_destroy(struct fb_info *info)
 {
-	struct platform_device *dev = container_of(info->device, struct platform_device, dev);
 	iounmap(info->screen_base);
 	fb_dealloc_cmap(&info->cmap);
 	/* XXX unshare VGA regions */
diff --git a/drivers/video/xilinxfb.c b/drivers/video/xilinxfb.c
index f3d4a69..6629b29 100644
--- a/drivers/video/xilinxfb.c
+++ b/drivers/video/xilinxfb.c
@@ -341,8 +341,8 @@
 
 	if (drvdata->flags & BUS_ACCESS_FLAG) {
 		/* Put a banner in the log (for DEBUG) */
-		dev_dbg(dev, "regs: phys=%x, virt=%p\n", drvdata->regs_phys,
-					drvdata->regs);
+		dev_dbg(dev, "regs: phys=%pa, virt=%p\n",
+			&drvdata->regs_phys, drvdata->regs);
 	}
 	/* Put a banner in the log (for DEBUG) */
 	dev_dbg(dev, "fb: phys=%llx, virt=%p, size=%x\n",
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 9e02d60..23eae5c 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -145,7 +145,7 @@
 
 config XEN_TMEM
 	tristate
-	depends on !ARM
+	depends on !ARM && !ARM64
 	default m if (CLEANCACHE || FRONTSWAP)
 	help
 	  Shim to interface in-kernel Transcendent Memory hooks
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index eabd0ee..14fe79d 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -1,9 +1,8 @@
-ifneq ($(CONFIG_ARM),y)
-obj-y	+= manage.o
+ifeq ($(filter y, $(CONFIG_ARM) $(CONFIG_ARM64)),)
 obj-$(CONFIG_HOTPLUG_CPU)		+= cpu_hotplug.o
 endif
 obj-$(CONFIG_X86)			+= fallback.o
-obj-y	+= grant-table.o features.o events.o balloon.o
+obj-y	+= grant-table.o features.o events.o balloon.o manage.o
 obj-y	+= xenbus/
 
 nostackp := $(call cc-option, -fno-stack-protector)
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index a58ac43..5e8be46 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -348,7 +348,7 @@
 
 	for_each_possible_cpu(i)
 		memset(per_cpu(cpu_evtchn_mask, i),
-		       (i == 0) ? ~0 : 0, sizeof(*per_cpu(cpu_evtchn_mask, i)));
+		       (i == 0) ? ~0 : 0, NR_EVENT_CHANNELS/8);
 }
 
 static inline void clear_evtchn(int port)
@@ -1493,8 +1493,10 @@
 /* Rebind an evtchn so that it gets delivered to a specific cpu */
 static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
 {
+	struct shared_info *s = HYPERVISOR_shared_info;
 	struct evtchn_bind_vcpu bind_vcpu;
 	int evtchn = evtchn_from_irq(irq);
+	int masked;
 
 	if (!VALID_EVTCHN(evtchn))
 		return -1;
@@ -1511,6 +1513,12 @@
 	bind_vcpu.vcpu = tcpu;
 
 	/*
+	 * Mask the event while changing the VCPU binding to prevent
+	 * it being delivered on an unexpected VCPU.
+	 */
+	masked = sync_test_and_set_bit(evtchn, BM(s->evtchn_mask));
+
+	/*
 	 * If this fails, it usually just indicates that we're dealing with a
 	 * virq or IPI channel, which don't actually need to be rebound. Ignore
 	 * it, but don't do the xenlinux-level rebind in that case.
@@ -1518,6 +1526,9 @@
 	if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
 		bind_evtchn_to_cpu(evtchn, tcpu);
 
+	if (!masked)
+		unmask_evtchn(evtchn);
+
 	return 0;
 }
 
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 8feecf0..b6165e0 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -379,18 +379,12 @@
 		if (unbind.port >= NR_EVENT_CHANNELS)
 			break;
 
-		spin_lock_irq(&port_user_lock);
-
 		rc = -ENOTCONN;
-		if (get_port_user(unbind.port) != u) {
-			spin_unlock_irq(&port_user_lock);
+		if (get_port_user(unbind.port) != u)
 			break;
-		}
 
 		disable_irq(irq_from_evtchn(unbind.port));
 
-		spin_unlock_irq(&port_user_lock);
-
 		evtchn_unbind_from_user(u, unbind.port);
 
 		rc = 0;
@@ -490,26 +484,15 @@
 	int i;
 	struct per_user_data *u = filp->private_data;
 
-	spin_lock_irq(&port_user_lock);
-
-	free_page((unsigned long)u->ring);
-
 	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
 		if (get_port_user(i) != u)
 			continue;
 
 		disable_irq(irq_from_evtchn(i));
-	}
-
-	spin_unlock_irq(&port_user_lock);
-
-	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
-		if (get_port_user(i) != u)
-			continue;
-
 		evtchn_unbind_from_user(get_port_user(i), i);
 	}
 
+	free_page((unsigned long)u->ring);
 	kfree(u->name);
 	kfree(u);
 
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index 6ed8a9d..34b20bf 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -115,7 +115,6 @@
 			return -EFAULT;
 		}
 
-		INIT_WORK(&xdev->work, xenbus_frontend_delayed_resume);
 		queue_work(xenbus_frontend_wq, &xdev->work);
 
 		return 0;
@@ -124,6 +123,16 @@
 	return xenbus_dev_resume(dev);
 }
 
+static int xenbus_frontend_dev_probe(struct device *dev)
+{
+	if (xen_store_domain_type == XS_LOCAL) {
+		struct xenbus_device *xdev = to_xenbus_device(dev);
+		INIT_WORK(&xdev->work, xenbus_frontend_delayed_resume);
+	}
+
+	return xenbus_dev_probe(dev);
+}
+
 static const struct dev_pm_ops xenbus_pm_ops = {
 	.suspend	= xenbus_dev_suspend,
 	.resume		= xenbus_frontend_dev_resume,
@@ -142,7 +151,7 @@
 		.name		= "xen",
 		.match		= xenbus_match,
 		.uevent		= xenbus_uevent_frontend,
-		.probe		= xenbus_dev_probe,
+		.probe		= xenbus_frontend_dev_probe,
 		.remove		= xenbus_dev_remove,
 		.shutdown	= xenbus_dev_shutdown,
 		.dev_attrs	= xenbus_dev_attrs,
@@ -474,7 +483,11 @@
 
 	register_xenstore_notifier(&xenstore_notifier);
 
-	xenbus_frontend_wq = create_workqueue("xenbus_frontend");
+	if (xen_store_domain_type == XS_LOCAL) {
+		xenbus_frontend_wq = create_workqueue("xenbus_frontend");
+		if (!xenbus_frontend_wq)
+			pr_warn("create xenbus frontend workqueue failed, S3 resume is likely to fail\n");
+	}
 
 	return 0;
 }
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 5e376bb..8defc6b 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -40,7 +40,7 @@
 	int block, off;
 
 	inode = iget_locked(sb, ino);
-	if (IS_ERR(inode))
+	if (!inode)
 		return ERR_PTR(-ENOMEM);
 	if (!(inode->i_state & I_NEW))
 		return inode;
diff --git a/fs/bio.c b/fs/bio.c
index 94bbc04..c5eae72 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1045,12 +1045,22 @@
 int bio_uncopy_user(struct bio *bio)
 {
 	struct bio_map_data *bmd = bio->bi_private;
-	int ret = 0;
+	struct bio_vec *bvec;
+	int ret = 0, i;
 
-	if (!bio_flagged(bio, BIO_NULL_MAPPED))
-		ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
-				     bmd->nr_sgvecs, bio_data_dir(bio) == READ,
-				     0, bmd->is_our_pages);
+	if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
+		/*
+		 * if we're in a workqueue, the request is orphaned, so
+		 * don't copy into a random user address space, just free.
+		 */
+		if (current->mm)
+			ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
+					     bmd->nr_sgvecs, bio_data_dir(bio) == READ,
+					     0, bmd->is_our_pages);
+		else if (bmd->is_our_pages)
+			bio_for_each_segment_all(bvec, bio, i)
+				__free_page(bvec->bv_page);
+	}
 	bio_free_map_data(bmd);
 	bio_put(bio);
 	return ret;
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index eaf1333..8bc5e8c 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -36,16 +36,23 @@
 				u64 extent_item_pos,
 				struct extent_inode_elem **eie)
 {
-	u64 data_offset;
-	u64 data_len;
+	u64 offset = 0;
 	struct extent_inode_elem *e;
 
-	data_offset = btrfs_file_extent_offset(eb, fi);
-	data_len = btrfs_file_extent_num_bytes(eb, fi);
+	if (!btrfs_file_extent_compression(eb, fi) &&
+	    !btrfs_file_extent_encryption(eb, fi) &&
+	    !btrfs_file_extent_other_encoding(eb, fi)) {
+		u64 data_offset;
+		u64 data_len;
 
-	if (extent_item_pos < data_offset ||
-	    extent_item_pos >= data_offset + data_len)
-		return 1;
+		data_offset = btrfs_file_extent_offset(eb, fi);
+		data_len = btrfs_file_extent_num_bytes(eb, fi);
+
+		if (extent_item_pos < data_offset ||
+		    extent_item_pos >= data_offset + data_len)
+			return 1;
+		offset = extent_item_pos - data_offset;
+	}
 
 	e = kmalloc(sizeof(*e), GFP_NOFS);
 	if (!e)
@@ -53,7 +60,7 @@
 
 	e->next = *eie;
 	e->inum = key->objectid;
-	e->offset = key->offset + (extent_item_pos - data_offset);
+	e->offset = key->offset + offset;
 	*eie = e;
 
 	return 0;
@@ -189,7 +196,7 @@
 	struct extent_buffer *eb;
 	struct btrfs_key key;
 	struct btrfs_file_extent_item *fi;
-	struct extent_inode_elem *eie = NULL;
+	struct extent_inode_elem *eie = NULL, *old = NULL;
 	u64 disk_byte;
 
 	if (level != 0) {
@@ -223,6 +230,7 @@
 
 		if (disk_byte == wanted_disk_byte) {
 			eie = NULL;
+			old = NULL;
 			if (extent_item_pos) {
 				ret = check_extent_in_eb(&key, eb, fi,
 						*extent_item_pos,
@@ -230,18 +238,20 @@
 				if (ret < 0)
 					break;
 			}
-			if (!ret) {
-				ret = ulist_add(parents, eb->start,
-						(uintptr_t)eie, GFP_NOFS);
-				if (ret < 0)
-					break;
-				if (!extent_item_pos) {
-					ret = btrfs_next_old_leaf(root, path,
-							time_seq);
-					continue;
-				}
+			if (ret > 0)
+				goto next;
+			ret = ulist_add_merge(parents, eb->start,
+					      (uintptr_t)eie,
+					      (u64 *)&old, GFP_NOFS);
+			if (ret < 0)
+				break;
+			if (!ret && extent_item_pos) {
+				while (old->next)
+					old = old->next;
+				old->next = eie;
 			}
 		}
+next:
 		ret = btrfs_next_old_item(root, path, time_seq);
 	}
 
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 5bf4c39..ed50460 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1271,7 +1271,6 @@
 		BUG_ON(!eb_rewin);
 	}
 
-	extent_buffer_get(eb_rewin);
 	btrfs_tree_read_unlock(eb);
 	free_extent_buffer(eb);
 
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 583d98b..fe443fe 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4048,7 +4048,7 @@
 	}
 
 	while (!end) {
-		u64 offset_in_extent;
+		u64 offset_in_extent = 0;
 
 		/* break if the extent we found is outside the range */
 		if (em->start >= max || extent_map_end(em) < off)
@@ -4064,9 +4064,12 @@
 
 		/*
 		 * record the offset from the start of the extent
-		 * for adjusting the disk offset below
+		 * for adjusting the disk offset below.  Only do this if the
+		 * extent isn't compressed since our in ram offset may be past
+		 * what we have actually allocated on disk.
 		 */
-		offset_in_extent = em_start - em->start;
+		if (!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
+			offset_in_extent = em_start - em->start;
 		em_end = extent_map_end(em);
 		em_len = em_end - em_start;
 		emflags = em->flags;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index a005fe2..8e686a4 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -596,20 +596,29 @@
 		if (no_splits)
 			goto next;
 
-		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
-		    em->start < start) {
+		if (em->start < start) {
 			split->start = em->start;
 			split->len = start - em->start;
-			split->orig_start = em->orig_start;
-			split->block_start = em->block_start;
 
-			if (compressed)
-				split->block_len = em->block_len;
-			else
-				split->block_len = split->len;
-			split->ram_bytes = em->ram_bytes;
-			split->orig_block_len = max(split->block_len,
-						    em->orig_block_len);
+			if (em->block_start < EXTENT_MAP_LAST_BYTE) {
+				split->orig_start = em->orig_start;
+				split->block_start = em->block_start;
+
+				if (compressed)
+					split->block_len = em->block_len;
+				else
+					split->block_len = split->len;
+				split->orig_block_len = max(split->block_len,
+						em->orig_block_len);
+				split->ram_bytes = em->ram_bytes;
+			} else {
+				split->orig_start = split->start;
+				split->block_len = 0;
+				split->block_start = em->block_start;
+				split->orig_block_len = 0;
+				split->ram_bytes = split->len;
+			}
+
 			split->generation = gen;
 			split->bdev = em->bdev;
 			split->flags = flags;
@@ -620,8 +629,7 @@
 			split = split2;
 			split2 = NULL;
 		}
-		if (em->block_start < EXTENT_MAP_LAST_BYTE &&
-		    testend && em->start + em->len > start + len) {
+		if (testend && em->start + em->len > start + len) {
 			u64 diff = start + len - em->start;
 
 			split->start = start + len;
@@ -630,18 +638,28 @@
 			split->flags = flags;
 			split->compress_type = em->compress_type;
 			split->generation = gen;
-			split->orig_block_len = max(em->block_len,
-						    em->orig_block_len);
-			split->ram_bytes = em->ram_bytes;
 
-			if (compressed) {
-				split->block_len = em->block_len;
-				split->block_start = em->block_start;
-				split->orig_start = em->orig_start;
+			if (em->block_start < EXTENT_MAP_LAST_BYTE) {
+				split->orig_block_len = max(em->block_len,
+						    em->orig_block_len);
+
+				split->ram_bytes = em->ram_bytes;
+				if (compressed) {
+					split->block_len = em->block_len;
+					split->block_start = em->block_start;
+					split->orig_start = em->orig_start;
+				} else {
+					split->block_len = split->len;
+					split->block_start = em->block_start
+						+ diff;
+					split->orig_start = em->orig_start;
+				}
 			} else {
-				split->block_len = split->len;
-				split->block_start = em->block_start + diff;
-				split->orig_start = em->orig_start;
+				split->ram_bytes = split->len;
+				split->orig_start = split->start;
+				split->block_len = 0;
+				split->block_start = em->block_start;
+				split->orig_block_len = 0;
 			}
 
 			ret = add_extent_mapping(em_tree, split, modified);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6d1b93c..021694c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2166,16 +2166,23 @@
 		if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr)
 			continue;
 
-		extent_offset = btrfs_file_extent_offset(leaf, extent);
-		if (key.offset - extent_offset != offset)
+		/*
+		 * 'offset' refers to the exact key.offset,
+		 * NOT the 'offset' field in btrfs_extent_data_ref, ie.
+		 * (key.offset - extent_offset).
+		 */
+		if (key.offset != offset)
 			continue;
 
+		extent_offset = btrfs_file_extent_offset(leaf, extent);
 		num_bytes = btrfs_file_extent_num_bytes(leaf, extent);
+
 		if (extent_offset >= old->extent_offset + old->offset +
 		    old->len || extent_offset + num_bytes <=
 		    old->extent_offset + old->offset)
 			continue;
 
+		ret = 0;
 		break;
 	}
 
@@ -2187,7 +2194,7 @@
 
 	backref->root_id = root_id;
 	backref->inum = inum;
-	backref->file_pos = offset + extent_offset;
+	backref->file_pos = offset;
 	backref->num_bytes = num_bytes;
 	backref->extent_offset = extent_offset;
 	backref->generation = btrfs_file_extent_generation(leaf, extent);
@@ -2210,7 +2217,8 @@
 	new->path = path;
 
 	list_for_each_entry_safe(old, tmp, &new->head, list) {
-		ret = iterate_inodes_from_logical(old->bytenr, fs_info,
+		ret = iterate_inodes_from_logical(old->bytenr +
+						  old->extent_offset, fs_info,
 						  path, record_one_backref,
 						  old);
 		BUG_ON(ret < 0 && ret != -ENOENT);
@@ -4391,9 +4399,6 @@
 	int mask = attr->ia_valid;
 	int ret;
 
-	if (newsize == oldsize)
-		return 0;
-
 	/*
 	 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
 	 * special case where we need to update the times despite not having
@@ -5165,14 +5170,31 @@
 	}
 
 	/* Reached end of directory/root. Bump pos past the last item. */
-	if (key_type == BTRFS_DIR_INDEX_KEY)
-		/*
-		 * 32-bit glibc will use getdents64, but then strtol -
-		 * so the last number we can serve is this.
-		 */
-		ctx->pos = 0x7fffffff;
-	else
-		ctx->pos++;
+	ctx->pos++;
+
+	/*
+	 * Stop new entries from being returned after we return the last
+	 * entry.
+	 *
+	 * New directory entries are assigned a strictly increasing
+	 * offset.  This means that new entries created during readdir
+	 * are *guaranteed* to be seen in the future by that readdir.
+	 * This has broken buggy programs which operate on names as
+	 * they're returned by readdir.  Until we re-use freed offsets
+	 * we have this hack to stop new entries from being returned
+	 * under the assumption that they'll never reach this huge
+	 * offset.
+	 *
+	 * This is being careful not to overflow 32bit loff_t unless the
+	 * last entry requires it because doing so has broken 32bit apps
+	 * in the past.
+	 */
+	if (key_type == BTRFS_DIR_INDEX_KEY) {
+		if (ctx->pos >= INT_MAX)
+			ctx->pos = LLONG_MAX;
+		else
+			ctx->pos = INT_MAX;
+	}
 nopos:
 	ret = 0;
 err:
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index d58cce7..af1931a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -983,12 +983,12 @@
  * a dirty root struct and adds it into the list of dead roots that need to
  * be deleted
  */
-int btrfs_add_dead_root(struct btrfs_root *root)
+void btrfs_add_dead_root(struct btrfs_root *root)
 {
 	spin_lock(&root->fs_info->trans_lock);
-	list_add_tail(&root->root_list, &root->fs_info->dead_roots);
+	if (list_empty(&root->root_list))
+		list_add_tail(&root->root_list, &root->fs_info->dead_roots);
 	spin_unlock(&root->fs_info->trans_lock);
-	return 0;
 }
 
 /*
@@ -1925,7 +1925,7 @@
 	}
 	root = list_first_entry(&fs_info->dead_roots,
 			struct btrfs_root, root_list);
-	list_del(&root->root_list);
+	list_del_init(&root->root_list);
 	spin_unlock(&fs_info->trans_lock);
 
 	pr_debug("btrfs: cleaner removing %llu\n",
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 005b037..defbc42 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -143,7 +143,7 @@
 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
 				     struct btrfs_root *root);
 
-int btrfs_add_dead_root(struct btrfs_root *root);
+void btrfs_add_dead_root(struct btrfs_root *root);
 int btrfs_defrag_root(struct btrfs_root *root);
 int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root);
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 2c67914..ff60d89 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3746,8 +3746,9 @@
 	}
 
 log_extents:
+	btrfs_release_path(path);
+	btrfs_release_path(dst_path);
 	if (fast_search) {
-		btrfs_release_path(dst_path);
 		ret = btrfs_log_changed_extents(trans, root, inode, dst_path);
 		if (ret) {
 			err = ret;
@@ -3764,8 +3765,6 @@
 	}
 
 	if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
-		btrfs_release_path(path);
-		btrfs_release_path(dst_path);
 		ret = log_directory_changes(trans, root, inode, path, dst_path);
 		if (ret) {
 			err = ret;
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 45e57cc..fc6f4f3 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -43,17 +43,18 @@
 	server->secmech.md5 = crypto_alloc_shash("md5", 0, 0);
 	if (IS_ERR(server->secmech.md5)) {
 		cifs_dbg(VFS, "could not allocate crypto md5\n");
-		return PTR_ERR(server->secmech.md5);
+		rc = PTR_ERR(server->secmech.md5);
+		server->secmech.md5 = NULL;
+		return rc;
 	}
 
 	size = sizeof(struct shash_desc) +
 			crypto_shash_descsize(server->secmech.md5);
 	server->secmech.sdescmd5 = kmalloc(size, GFP_KERNEL);
 	if (!server->secmech.sdescmd5) {
-		rc = -ENOMEM;
 		crypto_free_shash(server->secmech.md5);
 		server->secmech.md5 = NULL;
-		return rc;
+		return -ENOMEM;
 	}
 	server->secmech.sdescmd5->shash.tfm = server->secmech.md5;
 	server->secmech.sdescmd5->shash.flags = 0x0;
@@ -421,7 +422,7 @@
 		if (blobptr + attrsize > blobend)
 			break;
 		if (type == NTLMSSP_AV_NB_DOMAIN_NAME) {
-			if (!attrsize)
+			if (!attrsize || attrsize >= CIFS_MAX_DOMAINNAME_LEN)
 				break;
 			if (!ses->domainName) {
 				ses->domainName =
@@ -591,6 +592,7 @@
 
 static int crypto_hmacmd5_alloc(struct TCP_Server_Info *server)
 {
+	int rc;
 	unsigned int size;
 
 	/* check if already allocated */
@@ -600,7 +602,9 @@
 	server->secmech.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0);
 	if (IS_ERR(server->secmech.hmacmd5)) {
 		cifs_dbg(VFS, "could not allocate crypto hmacmd5\n");
-		return PTR_ERR(server->secmech.hmacmd5);
+		rc = PTR_ERR(server->secmech.hmacmd5);
+		server->secmech.hmacmd5 = NULL;
+		return rc;
 	}
 
 	size = sizeof(struct shash_desc) +
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 4bdd547..85ea98d 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -147,18 +147,17 @@
 		goto out_no_root;
 	}
 
+	if (cifs_sb_master_tcon(cifs_sb)->nocase)
+		sb->s_d_op = &cifs_ci_dentry_ops;
+	else
+		sb->s_d_op = &cifs_dentry_ops;
+
 	sb->s_root = d_make_root(inode);
 	if (!sb->s_root) {
 		rc = -ENOMEM;
 		goto out_no_root;
 	}
 
-	/* do that *after* d_make_root() - we want NULL ->d_op for root here */
-	if (cifs_sb_master_tcon(cifs_sb)->nocase)
-		sb->s_d_op = &cifs_ci_dentry_ops;
-	else
-		sb->s_d_op = &cifs_dentry_ops;
-
 #ifdef CONFIG_CIFS_NFSD_EXPORT
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
 		cifs_dbg(FYI, "export ops supported\n");
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 1fdc370..52ca861 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -44,6 +44,7 @@
 #define MAX_TREE_SIZE (2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1)
 #define MAX_SERVER_SIZE 15
 #define MAX_SHARE_SIZE 80
+#define CIFS_MAX_DOMAINNAME_LEN 256 /* max domain name length */
 #define MAX_USERNAME_SIZE 256	/* reasonable maximum for current servers */
 #define MAX_PASSWORD_SIZE 512	/* max for windows seems to be 256 wide chars */
 
@@ -369,6 +370,9 @@
 	void (*generate_signingkey)(struct TCP_Server_Info *server);
 	int (*calc_signature)(struct smb_rqst *rqst,
 				   struct TCP_Server_Info *server);
+	int (*query_mf_symlink)(const unsigned char *path, char *pbuf,
+			unsigned int *pbytes_read, struct cifs_sb_info *cifs_sb,
+			unsigned int xid);
 };
 
 struct smb_version_values {
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index f7e584d..b29a012 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -497,5 +497,7 @@
 struct cifs_writedata *cifs_writedata_alloc(unsigned int nr_pages,
 						work_func_t complete);
 void cifs_writedata_release(struct kref *refcount);
-
+int open_query_close_cifs_symlink(const unsigned char *path, char *pbuf,
+			unsigned int *pbytes_read, struct cifs_sb_info *cifs_sb,
+			unsigned int xid);
 #endif			/* _CIFSPROTO_H */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index fa68813..d67c550 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1675,7 +1675,8 @@
 			if (string == NULL)
 				goto out_nomem;
 
-			if (strnlen(string, 256) == 256) {
+			if (strnlen(string, CIFS_MAX_DOMAINNAME_LEN)
+					== CIFS_MAX_DOMAINNAME_LEN) {
 				printk(KERN_WARNING "CIFS: domain name too"
 						    " long\n");
 				goto cifs_parse_mount_err;
@@ -2276,8 +2277,8 @@
 
 #ifdef CONFIG_KEYS
 
-/* strlen("cifs:a:") + INET6_ADDRSTRLEN + 1 */
-#define CIFSCREDS_DESC_SIZE (7 + INET6_ADDRSTRLEN + 1)
+/* strlen("cifs:a:") + CIFS_MAX_DOMAINNAME_LEN + 1 */
+#define CIFSCREDS_DESC_SIZE (7 + CIFS_MAX_DOMAINNAME_LEN + 1)
 
 /* Populate username and pw fields from keyring if possible */
 static int
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 1e57f36..7e36ae3 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -647,6 +647,7 @@
 				     oflags, &oplock, &cfile->fid.netfid, xid);
 		if (rc == 0) {
 			cifs_dbg(FYI, "posix reopen succeeded\n");
+			oparms.reconnect = true;
 			goto reopen_success;
 		}
 		/*
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index b83c3f5..562044f 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -305,67 +305,89 @@
 }
 
 int
-CIFSCheckMFSymlink(struct cifs_fattr *fattr,
-		   const unsigned char *path,
-		   struct cifs_sb_info *cifs_sb, unsigned int xid)
+open_query_close_cifs_symlink(const unsigned char *path, char *pbuf,
+			unsigned int *pbytes_read, struct cifs_sb_info *cifs_sb,
+			unsigned int xid)
 {
 	int rc;
 	int oplock = 0;
 	__u16 netfid = 0;
 	struct tcon_link *tlink;
-	struct cifs_tcon *pTcon;
+	struct cifs_tcon *ptcon;
 	struct cifs_io_parms io_parms;
-	u8 *buf;
-	char *pbuf;
-	unsigned int bytes_read = 0;
 	int buf_type = CIFS_NO_BUFFER;
-	unsigned int link_len = 0;
 	FILE_ALL_INFO file_info;
 
-	if (!CIFSCouldBeMFSymlink(fattr))
-		/* it's not a symlink */
-		return 0;
-
 	tlink = cifs_sb_tlink(cifs_sb);
 	if (IS_ERR(tlink))
 		return PTR_ERR(tlink);
-	pTcon = tlink_tcon(tlink);
+	ptcon = tlink_tcon(tlink);
 
-	rc = CIFSSMBOpen(xid, pTcon, path, FILE_OPEN, GENERIC_READ,
+	rc = CIFSSMBOpen(xid, ptcon, path, FILE_OPEN, GENERIC_READ,
 			 CREATE_NOT_DIR, &netfid, &oplock, &file_info,
 			 cifs_sb->local_nls,
 			 cifs_sb->mnt_cifs_flags &
 				CIFS_MOUNT_MAP_SPECIAL_CHR);
-	if (rc != 0)
-		goto out;
+	if (rc != 0) {
+		cifs_put_tlink(tlink);
+		return rc;
+	}
 
 	if (file_info.EndOfFile != cpu_to_le64(CIFS_MF_SYMLINK_FILE_SIZE)) {
-		CIFSSMBClose(xid, pTcon, netfid);
+		CIFSSMBClose(xid, ptcon, netfid);
+		cifs_put_tlink(tlink);
 		/* it's not a symlink */
-		goto out;
+		return rc;
 	}
 
+	io_parms.netfid = netfid;
+	io_parms.pid = current->tgid;
+	io_parms.tcon = ptcon;
+	io_parms.offset = 0;
+	io_parms.length = CIFS_MF_SYMLINK_FILE_SIZE;
+
+	rc = CIFSSMBRead(xid, &io_parms, pbytes_read, &pbuf, &buf_type);
+	CIFSSMBClose(xid, ptcon, netfid);
+	cifs_put_tlink(tlink);
+	return rc;
+}
+
+
+int
+CIFSCheckMFSymlink(struct cifs_fattr *fattr,
+		   const unsigned char *path,
+		   struct cifs_sb_info *cifs_sb, unsigned int xid)
+{
+	int rc = 0;
+	u8 *buf = NULL;
+	unsigned int link_len = 0;
+	unsigned int bytes_read = 0;
+	struct cifs_tcon *ptcon;
+
+	if (!CIFSCouldBeMFSymlink(fattr))
+		/* it's not a symlink */
+		return 0;
+
 	buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL);
 	if (!buf) {
 		rc = -ENOMEM;
 		goto out;
 	}
-	pbuf = buf;
-	io_parms.netfid = netfid;
-	io_parms.pid = current->tgid;
-	io_parms.tcon = pTcon;
-	io_parms.offset = 0;
-	io_parms.length = CIFS_MF_SYMLINK_FILE_SIZE;
 
-	rc = CIFSSMBRead(xid, &io_parms, &bytes_read, &pbuf, &buf_type);
-	CIFSSMBClose(xid, pTcon, netfid);
-	if (rc != 0) {
-		kfree(buf);
+	ptcon = tlink_tcon(cifs_sb_tlink(cifs_sb));
+	if ((ptcon->ses) && (ptcon->ses->server->ops->query_mf_symlink))
+		rc = ptcon->ses->server->ops->query_mf_symlink(path, buf,
+						 &bytes_read, cifs_sb, xid);
+	else
 		goto out;
-	}
+
+	if (rc != 0)
+		goto out;
+
+	if (bytes_read == 0) /* not a symlink */
+		goto out;
 
 	rc = CIFSParseMFSymlink(buf, bytes_read, &link_len, NULL);
-	kfree(buf);
 	if (rc == -EINVAL) {
 		/* it's not a symlink */
 		rc = 0;
@@ -381,7 +403,7 @@
 	fattr->cf_mode |= S_IFLNK | S_IRWXU | S_IRWXG | S_IRWXO;
 	fattr->cf_dtype = DT_LNK;
 out:
-	cifs_put_tlink(tlink);
+	kfree(buf);
 	return rc;
 }
 
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index ab87784..69d2c82 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -111,6 +111,14 @@
 			return;
 	}
 
+	/*
+	 * If we know that the inode will need to be revalidated immediately,
+	 * then don't create a new dentry for it. We'll end up doing an on
+	 * the wire call either way and this spares us an invalidation.
+	 */
+	if (fattr->cf_flags & CIFS_FATTR_NEED_REVAL)
+		return;
+
 	dentry = d_alloc(parent, name);
 	if (!dentry)
 		return;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 79358e3..08dd37b 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -197,7 +197,7 @@
 		bytes_ret = 0;
 	} else
 		bytes_ret = cifs_strtoUTF16((__le16 *) bcc_ptr, ses->domainName,
-					    256, nls_cp);
+					    CIFS_MAX_DOMAINNAME_LEN, nls_cp);
 	bcc_ptr += 2 * bytes_ret;
 	bcc_ptr += 2;  /* account for null terminator */
 
@@ -255,8 +255,8 @@
 
 	/* copy domain */
 	if (ses->domainName != NULL) {
-		strncpy(bcc_ptr, ses->domainName, 256);
-		bcc_ptr += strnlen(ses->domainName, 256);
+		strncpy(bcc_ptr, ses->domainName, CIFS_MAX_DOMAINNAME_LEN);
+		bcc_ptr += strnlen(ses->domainName, CIFS_MAX_DOMAINNAME_LEN);
 	} /* else we will send a null domain name
 	     so the server will default to its own domain */
 	*bcc_ptr = 0;
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 6457690..6094397 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -944,6 +944,7 @@
 	.mand_lock = cifs_mand_lock,
 	.mand_unlock_range = cifs_unlock_range,
 	.push_mand_locks = cifs_push_mandatory_locks,
+	.query_mf_symlink = open_query_close_cifs_symlink,
 };
 
 struct smb_version_values smb1_values = {
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 301b191..4f2300d 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -42,6 +42,7 @@
 static int
 smb2_crypto_shash_allocate(struct TCP_Server_Info *server)
 {
+	int rc;
 	unsigned int size;
 
 	if (server->secmech.sdeschmacsha256 != NULL)
@@ -50,7 +51,9 @@
 	server->secmech.hmacsha256 = crypto_alloc_shash("hmac(sha256)", 0, 0);
 	if (IS_ERR(server->secmech.hmacsha256)) {
 		cifs_dbg(VFS, "could not allocate crypto hmacsha256\n");
-		return PTR_ERR(server->secmech.hmacsha256);
+		rc = PTR_ERR(server->secmech.hmacsha256);
+		server->secmech.hmacsha256 = NULL;
+		return rc;
 	}
 
 	size = sizeof(struct shash_desc) +
@@ -87,7 +90,9 @@
 		server->secmech.sdeschmacsha256 = NULL;
 		crypto_free_shash(server->secmech.hmacsha256);
 		server->secmech.hmacsha256 = NULL;
-		return PTR_ERR(server->secmech.cmacaes);
+		rc = PTR_ERR(server->secmech.cmacaes);
+		server->secmech.cmacaes = NULL;
+		return rc;
 	}
 
 	size = sizeof(struct shash_desc) +
diff --git a/fs/dcache.c b/fs/dcache.c
index 87bdb53..83cfb83 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2724,6 +2724,17 @@
 	return memcpy(buffer, temp, sz);
 }
 
+char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
+{
+	char *end = buffer + buflen;
+	/* these dentries are never renamed, so d_lock is not needed */
+	if (prepend(&end, &buflen, " (deleted)", 11) ||
+	    prepend_name(&end, &buflen, &dentry->d_name) ||
+	    prepend(&end, &buflen, "/", 1))  
+		end = ERR_PTR(-ENAMETOOLONG);
+	return end;  
+}
+
 /*
  * Write full pathname from the root of the filesystem into the buffer.
  */
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 4888cb3..c7c83ff 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -533,8 +533,7 @@
  */
 void debugfs_remove_recursive(struct dentry *dentry)
 {
-	struct dentry *child;
-	struct dentry *parent;
+	struct dentry *child, *next, *parent;
 
 	if (IS_ERR_OR_NULL(dentry))
 		return;
@@ -544,61 +543,37 @@
 		return;
 
 	parent = dentry;
+ down:
 	mutex_lock(&parent->d_inode->i_mutex);
+	list_for_each_entry_safe(child, next, &parent->d_subdirs, d_u.d_child) {
+		if (!debugfs_positive(child))
+			continue;
 
-	while (1) {
-		/*
-		 * When all dentries under "parent" has been removed,
-		 * walk up the tree until we reach our starting point.
-		 */
-		if (list_empty(&parent->d_subdirs)) {
-			mutex_unlock(&parent->d_inode->i_mutex);
-			if (parent == dentry)
-				break;
-			parent = parent->d_parent;
-			mutex_lock(&parent->d_inode->i_mutex);
-		}
-		child = list_entry(parent->d_subdirs.next, struct dentry,
-				d_u.d_child);
- next_sibling:
-
-		/*
-		 * If "child" isn't empty, walk down the tree and
-		 * remove all its descendants first.
-		 */
+		/* perhaps simple_empty(child) makes more sense */
 		if (!list_empty(&child->d_subdirs)) {
 			mutex_unlock(&parent->d_inode->i_mutex);
 			parent = child;
-			mutex_lock(&parent->d_inode->i_mutex);
-			continue;
+			goto down;
 		}
-		__debugfs_remove(child, parent);
-		if (parent->d_subdirs.next == &child->d_u.d_child) {
-			/*
-			 * Try the next sibling.
-			 */
-			if (child->d_u.d_child.next != &parent->d_subdirs) {
-				child = list_entry(child->d_u.d_child.next,
-						   struct dentry,
-						   d_u.d_child);
-				goto next_sibling;
-			}
-
-			/*
-			 * Avoid infinite loop if we fail to remove
-			 * one dentry.
-			 */
-			mutex_unlock(&parent->d_inode->i_mutex);
-			break;
-		}
-		simple_release_fs(&debugfs_mount, &debugfs_mount_count);
+ up:
+		if (!__debugfs_remove(child, parent))
+			simple_release_fs(&debugfs_mount, &debugfs_mount_count);
 	}
 
-	parent = dentry->d_parent;
-	mutex_lock(&parent->d_inode->i_mutex);
-	__debugfs_remove(dentry, parent);
 	mutex_unlock(&parent->d_inode->i_mutex);
-	simple_release_fs(&debugfs_mount, &debugfs_mount_count);
+	child = parent;
+	parent = parent->d_parent;
+	mutex_lock(&parent->d_inode->i_mutex);
+
+	if (child != dentry) {
+		next = list_entry(child->d_u.d_child.next, struct dentry,
+					d_u.d_child);
+		goto up;
+	}
+
+	if (!__debugfs_remove(child, parent))
+		simple_release_fs(&debugfs_mount, &debugfs_mount_count);
+	mutex_unlock(&parent->d_inode->i_mutex);
 }
 EXPORT_SYMBOL_GPL(debugfs_remove_recursive);
 
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 911649a..81214911 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -686,7 +686,6 @@
 	   device_remove_lockspace() */
 
 	sigprocmask(SIG_SETMASK, &tmpsig, NULL);
-	recalc_sigpending();
 
 	return 0;
 }
diff --git a/fs/efs/inode.c b/fs/efs/inode.c
index f3913eb..d15ccf2 100644
--- a/fs/efs/inode.c
+++ b/fs/efs/inode.c
@@ -57,7 +57,7 @@
 	struct inode *inode;
 
 	inode = iget_locked(super, ino);
-	if (IS_ERR(inode))
+	if (!inode)
 		return ERR_PTR(-ENOMEM);
 	if (!(inode->i_state & I_NEW))
 		return inode;
diff --git a/fs/exec.c b/fs/exec.c
index 9c73def..fd774c7 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -608,7 +608,7 @@
 		return -ENOMEM;
 
 	lru_add_drain();
-	tlb_gather_mmu(&tlb, mm, 0);
+	tlb_gather_mmu(&tlb, mm, old_start, old_end);
 	if (new_end > old_start) {
 		/*
 		 * when the old and new regions overlap clear from new_end.
@@ -625,7 +625,7 @@
 		free_pgd_range(&tlb, old_start, old_end, new_end,
 			vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
 	}
-	tlb_finish_mmu(&tlb, new_end, old_end);
+	tlb_finish_mmu(&tlb, old_start, old_end);
 
 	/*
 	 * Shrink the vma to just the new range.  Always succeeds.
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b577e45..0ab26fb 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2086,6 +2086,7 @@
 extern void ext4_dirty_inode(struct inode *, int);
 extern int ext4_change_inode_journal_flag(struct inode *, int);
 extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
+extern int ext4_inode_attach_jinode(struct inode *inode);
 extern int ext4_can_truncate(struct inode *inode);
 extern void ext4_truncate(struct inode *);
 extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 72a3600..17ac112 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -255,10 +255,10 @@
 	set_buffer_prio(bh);
 	if (ext4_handle_valid(handle)) {
 		err = jbd2_journal_dirty_metadata(handle, bh);
-		if (err) {
-			/* Errors can only happen if there is a bug */
-			handle->h_err = err;
-			__ext4_journal_stop(where, line, handle);
+		/* Errors can only happen if there is a bug */
+		if (WARN_ON_ONCE(err)) {
+			ext4_journal_abort_handle(where, line, __func__, bh,
+						  handle, err);
 		}
 	} else {
 		if (inode)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index a618738..72ba470 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4412,7 +4412,7 @@
 retry:
 	err = ext4_es_remove_extent(inode, last_block,
 				    EXT_MAX_BLOCKS - last_block);
-	if (err == ENOMEM) {
+	if (err == -ENOMEM) {
 		cond_resched();
 		congestion_wait(BLK_RW_ASYNC, HZ/50);
 		goto retry;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 6f4cc56..319c9d2 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -219,7 +219,6 @@
 {
 	struct super_block *sb = inode->i_sb;
 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-	struct ext4_inode_info *ei = EXT4_I(inode);
 	struct vfsmount *mnt = filp->f_path.mnt;
 	struct path path;
 	char buf[64], *cp;
@@ -259,22 +258,10 @@
 	 * Set up the jbd2_inode if we are opening the inode for
 	 * writing and the journal is present
 	 */
-	if (sbi->s_journal && !ei->jinode && (filp->f_mode & FMODE_WRITE)) {
-		struct jbd2_inode *jinode = jbd2_alloc_inode(GFP_KERNEL);
-
-		spin_lock(&inode->i_lock);
-		if (!ei->jinode) {
-			if (!jinode) {
-				spin_unlock(&inode->i_lock);
-				return -ENOMEM;
-			}
-			ei->jinode = jinode;
-			jbd2_journal_init_jbd_inode(ei->jinode, inode);
-			jinode = NULL;
-		}
-		spin_unlock(&inode->i_lock);
-		if (unlikely(jinode != NULL))
-			jbd2_free_inode(jinode);
+	if (filp->f_mode & FMODE_WRITE) {
+		int ret = ext4_inode_attach_jinode(inode);
+		if (ret < 0)
+			return ret;
 	}
 	return dquot_file_open(inode, filp);
 }
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f03598c..8bf5999 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -734,11 +734,8 @@
 		ino = ext4_find_next_zero_bit((unsigned long *)
 					      inode_bitmap_bh->b_data,
 					      EXT4_INODES_PER_GROUP(sb), ino);
-		if (ino >= EXT4_INODES_PER_GROUP(sb)) {
-			if (++group == ngroups)
-				group = 0;
-			continue;
-		}
+		if (ino >= EXT4_INODES_PER_GROUP(sb))
+			goto next_group;
 		if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) {
 			ext4_error(sb, "reserved inode found cleared - "
 				   "inode=%lu", ino + 1);
@@ -769,6 +766,9 @@
 			goto got; /* we grabbed the inode! */
 		if (ino < EXT4_INODES_PER_GROUP(sb))
 			goto repeat_in_this_group;
+next_group:
+		if (++group == ngroups)
+			group = 0;
 	}
 	err = -ENOSPC;
 	goto out;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ba33c67..c2ca04e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -555,14 +555,13 @@
 		int ret;
 		unsigned long long status;
 
-#ifdef ES_AGGRESSIVE_TEST
-		if (retval != map->m_len) {
-			printk("ES len assertion failed for inode: %lu "
-			       "retval %d != map->m_len %d "
-			       "in %s (lookup)\n", inode->i_ino, retval,
-			       map->m_len, __func__);
+		if (unlikely(retval != map->m_len)) {
+			ext4_warning(inode->i_sb,
+				     "ES len assertion failed for inode "
+				     "%lu: retval %d != map->m_len %d",
+				     inode->i_ino, retval, map->m_len);
+			WARN_ON(1);
 		}
-#endif
 
 		status = map->m_flags & EXT4_MAP_UNWRITTEN ?
 				EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
@@ -656,14 +655,13 @@
 		int ret;
 		unsigned long long status;
 
-#ifdef ES_AGGRESSIVE_TEST
-		if (retval != map->m_len) {
-			printk("ES len assertion failed for inode: %lu "
-			       "retval %d != map->m_len %d "
-			       "in %s (allocation)\n", inode->i_ino, retval,
-			       map->m_len, __func__);
+		if (unlikely(retval != map->m_len)) {
+			ext4_warning(inode->i_sb,
+				     "ES len assertion failed for inode "
+				     "%lu: retval %d != map->m_len %d",
+				     inode->i_ino, retval, map->m_len);
+			WARN_ON(1);
 		}
-#endif
 
 		/*
 		 * If the extent has been zeroed out, we don't need to update
@@ -1637,14 +1635,13 @@
 		int ret;
 		unsigned long long status;
 
-#ifdef ES_AGGRESSIVE_TEST
-		if (retval != map->m_len) {
-			printk("ES len assertion failed for inode: %lu "
-			       "retval %d != map->m_len %d "
-			       "in %s (lookup)\n", inode->i_ino, retval,
-			       map->m_len, __func__);
+		if (unlikely(retval != map->m_len)) {
+			ext4_warning(inode->i_sb,
+				     "ES len assertion failed for inode "
+				     "%lu: retval %d != map->m_len %d",
+				     inode->i_ino, retval, map->m_len);
+			WARN_ON(1);
 		}
-#endif
 
 		status = map->m_flags & EXT4_MAP_UNWRITTEN ?
 				EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
@@ -3536,6 +3533,18 @@
 		   offset;
 	}
 
+	if (offset & (sb->s_blocksize - 1) ||
+	    (offset + length) & (sb->s_blocksize - 1)) {
+		/*
+		 * Attach jinode to inode for jbd2 if we do any zeroing of
+		 * partial block
+		 */
+		ret = ext4_inode_attach_jinode(inode);
+		if (ret < 0)
+			goto out_mutex;
+
+	}
+
 	first_block_offset = round_up(offset, sb->s_blocksize);
 	last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
 
@@ -3604,6 +3613,31 @@
 	return ret;
 }
 
+int ext4_inode_attach_jinode(struct inode *inode)
+{
+	struct ext4_inode_info *ei = EXT4_I(inode);
+	struct jbd2_inode *jinode;
+
+	if (ei->jinode || !EXT4_SB(inode->i_sb)->s_journal)
+		return 0;
+
+	jinode = jbd2_alloc_inode(GFP_KERNEL);
+	spin_lock(&inode->i_lock);
+	if (!ei->jinode) {
+		if (!jinode) {
+			spin_unlock(&inode->i_lock);
+			return -ENOMEM;
+		}
+		ei->jinode = jinode;
+		jbd2_journal_init_jbd_inode(ei->jinode, inode);
+		jinode = NULL;
+	}
+	spin_unlock(&inode->i_lock);
+	if (unlikely(jinode != NULL))
+		jbd2_free_inode(jinode);
+	return 0;
+}
+
 /*
  * ext4_truncate()
  *
@@ -3664,6 +3698,12 @@
 			return;
 	}
 
+	/* If we zero-out tail of the page, we have to create jinode for jbd2 */
+	if (inode->i_size & (inode->i_sb->s_blocksize - 1)) {
+		if (ext4_inode_attach_jinode(inode) < 0)
+			return;
+	}
+
 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
 		credits = ext4_writepage_trans_blocks(inode);
 	else
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 9491ac0..c0427e2 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -77,8 +77,10 @@
 	memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data));
 	memswap(&ei1->i_flags, &ei2->i_flags, sizeof(ei1->i_flags));
 	memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize));
-	memswap(&ei1->i_es_tree, &ei2->i_es_tree, sizeof(ei1->i_es_tree));
-	memswap(&ei1->i_es_lru_nr, &ei2->i_es_lru_nr, sizeof(ei1->i_es_lru_nr));
+	ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS);
+	ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS);
+	ext4_es_lru_del(inode1);
+	ext4_es_lru_del(inode2);
 
 	isize = i_size_read(inode1);
 	i_size_write(inode1, i_size_read(inode2));
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index bca26f3..b59373b 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1359,7 +1359,7 @@
 	{Opt_delalloc, EXT4_MOUNT_DELALLOC,
 	 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
 	{Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
-	 MOPT_EXT4_ONLY | MOPT_CLEAR | MOPT_EXPLICIT},
+	 MOPT_EXT4_ONLY | MOPT_CLEAR},
 	{Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
 	 MOPT_EXT4_ONLY | MOPT_SET},
 	{Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
@@ -3483,7 +3483,7 @@
 		}
 		if (test_opt(sb, DIOREAD_NOLOCK)) {
 			ext4_msg(sb, KERN_ERR, "can't mount with "
-				 "both data=journal and delalloc");
+				 "both data=journal and dioread_nolock");
 			goto failed_mount;
 		}
 		if (test_opt(sb, DELALLOC))
@@ -4727,6 +4727,21 @@
 		goto restore_opts;
 	}
 
+	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
+		if (test_opt2(sb, EXPLICIT_DELALLOC)) {
+			ext4_msg(sb, KERN_ERR, "can't mount with "
+				 "both data=journal and delalloc");
+			err = -EINVAL;
+			goto restore_opts;
+		}
+		if (test_opt(sb, DIOREAD_NOLOCK)) {
+			ext4_msg(sb, KERN_ERR, "can't mount with "
+				 "both data=journal and dioread_nolock");
+			err = -EINVAL;
+			goto restore_opts;
+		}
+	}
+
 	if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
 		ext4_abort(sb, "Abort forced by user");
 
@@ -5481,6 +5496,7 @@
 	kset_unregister(ext4_kset);
 	ext4_exit_system_zone();
 	ext4_exit_pageio();
+	ext4_exit_es();
 }
 
 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 6599222..65343c3 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -730,14 +730,14 @@
 	 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
 	 * is defined as O_NONBLOCK on some platforms and not on others.
 	 */
-	BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
+	BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
 		O_RDONLY	| O_WRONLY	| O_RDWR	|
 		O_CREAT		| O_EXCL	| O_NOCTTY	|
 		O_TRUNC		| O_APPEND	| /* O_NONBLOCK	| */
 		__O_SYNC	| O_DSYNC	| FASYNC	|
 		O_DIRECT	| O_LARGEFILE	| O_DIRECTORY	|
 		O_NOFOLLOW	| O_NOATIME	| O_CLOEXEC	|
-		__FMODE_EXEC	| O_PATH
+		__FMODE_EXEC	| O_PATH	| __O_TMPFILE
 		));
 
 	fasync_cache = kmem_cache_create("fasync_cache",
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 0eda527..72a5d5b 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1223,30 +1223,46 @@
 		if (name.name[1] == '.' && name.len == 2)
 			return 0;
 	}
+
+	if (invalid_nodeid(o->nodeid))
+		return -EIO;
+	if (!fuse_valid_type(o->attr.mode))
+		return -EIO;
+
 	fc = get_fuse_conn(dir);
 
 	name.hash = full_name_hash(name.name, name.len);
 	dentry = d_lookup(parent, &name);
-	if (dentry && dentry->d_inode) {
+	if (dentry) {
 		inode = dentry->d_inode;
-		if (get_node_id(inode) == o->nodeid) {
+		if (!inode) {
+			d_drop(dentry);
+		} else if (get_node_id(inode) != o->nodeid ||
+			   ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
+			err = d_invalidate(dentry);
+			if (err)
+				goto out;
+		} else if (is_bad_inode(inode)) {
+			err = -EIO;
+			goto out;
+		} else {
 			struct fuse_inode *fi;
 			fi = get_fuse_inode(inode);
 			spin_lock(&fc->lock);
 			fi->nlookup++;
 			spin_unlock(&fc->lock);
 
+			fuse_change_attributes(inode, &o->attr,
+					       entry_attr_timeout(o),
+					       attr_version);
+
 			/*
 			 * The other branch to 'found' comes via fuse_iget()
 			 * which bumps nlookup inside
 			 */
 			goto found;
 		}
-		err = d_invalidate(dentry);
-		if (err)
-			goto out;
 		dput(dentry);
-		dentry = NULL;
 	}
 
 	dentry = d_alloc(parent, &name);
@@ -1259,25 +1275,30 @@
 	if (!inode)
 		goto out;
 
-	alias = d_materialise_unique(dentry, inode);
-	err = PTR_ERR(alias);
-	if (IS_ERR(alias))
-		goto out;
+	if (S_ISDIR(inode->i_mode)) {
+		mutex_lock(&fc->inst_mutex);
+		alias = fuse_d_add_directory(dentry, inode);
+		mutex_unlock(&fc->inst_mutex);
+		err = PTR_ERR(alias);
+		if (IS_ERR(alias)) {
+			iput(inode);
+			goto out;
+		}
+	} else {
+		alias = d_splice_alias(inode, dentry);
+	}
+
 	if (alias) {
 		dput(dentry);
 		dentry = alias;
 	}
 
 found:
-	fuse_change_attributes(inode, &o->attr, entry_attr_timeout(o),
-			       attr_version);
-
 	fuse_change_entry_timeout(dentry, o);
 
 	err = 0;
 out:
-	if (dentry)
-		dput(dentry);
+	dput(dentry);
 	return err;
 }
 
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 9435384..544a809 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1838,14 +1838,14 @@
 
 	glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
 					  WQ_HIGHPRI | WQ_FREEZABLE, 0);
-	if (IS_ERR(glock_workqueue))
-		return PTR_ERR(glock_workqueue);
+	if (!glock_workqueue)
+		return -ENOMEM;
 	gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
 						WQ_MEM_RECLAIM | WQ_FREEZABLE,
 						0);
-	if (IS_ERR(gfs2_delete_workqueue)) {
+	if (!gfs2_delete_workqueue) {
 		destroy_workqueue(glock_workqueue);
-		return PTR_ERR(gfs2_delete_workqueue);
+		return -ENOMEM;
 	}
 
 	register_shrinker(&glock_shrinker);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 5f2e522..e2e0a90 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -47,7 +47,8 @@
  * None of the buffers should be dirty, locked, or pinned.
  */
 
-static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
+static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync,
+			     unsigned int nr_revokes)
 {
 	struct gfs2_sbd *sdp = gl->gl_sbd;
 	struct list_head *head = &gl->gl_ail_list;
@@ -57,7 +58,9 @@
 
 	gfs2_log_lock(sdp);
 	spin_lock(&sdp->sd_ail_lock);
-	list_for_each_entry_safe(bd, tmp, head, bd_ail_gl_list) {
+	list_for_each_entry_safe_reverse(bd, tmp, head, bd_ail_gl_list) {
+		if (nr_revokes == 0)
+			break;
 		bh = bd->bd_bh;
 		if (bh->b_state & b_state) {
 			if (fsync)
@@ -65,6 +68,7 @@
 			gfs2_ail_error(gl, bh);
 		}
 		gfs2_trans_add_revoke(sdp, bd);
+		nr_revokes--;
 	}
 	GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count));
 	spin_unlock(&sdp->sd_ail_lock);
@@ -91,7 +95,7 @@
 	WARN_ON_ONCE(current->journal_info);
 	current->journal_info = &tr;
 
-	__gfs2_ail_flush(gl, 0);
+	__gfs2_ail_flush(gl, 0, tr.tr_revokes);
 
 	gfs2_trans_end(sdp);
 	gfs2_log_flush(sdp, NULL);
@@ -101,15 +105,19 @@
 {
 	struct gfs2_sbd *sdp = gl->gl_sbd;
 	unsigned int revokes = atomic_read(&gl->gl_ail_count);
+	unsigned int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64);
 	int ret;
 
 	if (!revokes)
 		return;
 
-	ret = gfs2_trans_begin(sdp, 0, revokes);
+	while (revokes > max_revokes)
+		max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64);
+
+	ret = gfs2_trans_begin(sdp, 0, max_revokes);
 	if (ret)
 		return;
-	__gfs2_ail_flush(gl, fsync);
+	__gfs2_ail_flush(gl, fsync, max_revokes);
 	gfs2_trans_end(sdp);
 	gfs2_log_flush(sdp, NULL);
 }
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index bbb2715..64915ee 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -594,7 +594,7 @@
 		}
 		gfs2_glock_dq_uninit(ghs);
 		if (IS_ERR(d))
-			return PTR_RET(d);
+			return PTR_ERR(d);
 		return error;
 	} else if (error != -ENOENT) {
 		goto fail_gunlock;
@@ -1750,6 +1750,10 @@
 	struct gfs2_holder gh;
 	int ret;
 
+	/* For selinux during lookup */
+	if (gfs2_glock_is_locked_by_me(ip->i_gl))
+		return generic_getxattr(dentry, name, data, size);
+
 	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
 	ret = gfs2_glock_nq(&gh);
 	if (ret == 0) {
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index e04d0e0..7b0f504 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -155,7 +155,7 @@
 		goto fail_wq;
 
 	gfs2_control_wq = alloc_workqueue("gfs2_control",
-			       WQ_NON_REENTRANT | WQ_UNBOUND | WQ_FREEZABLE, 0);
+					  WQ_UNBOUND | WQ_FREEZABLE, 0);
 	if (!gfs2_control_wq)
 		goto fail_recovery;
 
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a3f868a..d19b30a 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -463,6 +463,14 @@
 	return inode;
 }
 
+/*
+ * Hugetlbfs is not reclaimable; therefore its i_mmap_mutex will never
+ * be taken from reclaim -- unlike regular filesystems. This needs an
+ * annotation because huge_pmd_share() does an allocation under
+ * i_mmap_mutex.
+ */
+struct lock_class_key hugetlbfs_i_mmap_mutex_key;
+
 static struct inode *hugetlbfs_get_inode(struct super_block *sb,
 					struct inode *dir,
 					umode_t mode, dev_t dev)
@@ -474,6 +482,8 @@
 		struct hugetlbfs_inode_info *info;
 		inode->i_ino = get_next_ino();
 		inode_init_owner(inode, dir, mode);
+		lockdep_set_class(&inode->i_mapping->i_mmap_mutex,
+				&hugetlbfs_i_mmap_mutex_key);
 		inode->i_mapping->a_ops = &hugetlbfs_aops;
 		inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -916,14 +926,8 @@
 	return h - hstates;
 }
 
-static char *hugetlb_dname(struct dentry *dentry, char *buffer, int buflen)
-{
-	return dynamic_dname(dentry, buffer, buflen, "/%s (deleted)",
-				dentry->d_name.name);
-}
-
 static struct dentry_operations anon_ops = {
-	.d_dname = hugetlb_dname
+	.d_dname = simple_dname
 };
 
 /*
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 01bfe766..41e491b 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -64,12 +64,17 @@
 				   nlm_init->protocol, nlm_version,
 				   nlm_init->hostname, nlm_init->noresvport,
 				   nlm_init->net);
-	if (host == NULL) {
-		lockd_down(nlm_init->net);
-		return ERR_PTR(-ENOLCK);
-	}
+	if (host == NULL)
+		goto out_nohost;
+	if (host->h_rpcclnt == NULL && nlm_bind_host(host) == NULL)
+		goto out_nobind;
 
 	return host;
+out_nobind:
+	nlmclnt_release_host(host);
+out_nohost:
+	lockd_down(nlm_init->net);
+	return ERR_PTR(-ENOLCK);
 }
 EXPORT_SYMBOL_GPL(nlmclnt_init);
 
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 9760ecb..acd3947 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -125,14 +125,15 @@
 {
 	struct nlm_args	*argp = &req->a_args;
 	struct nlm_lock	*lock = &argp->lock;
+	char *nodename = req->a_host->h_rpcclnt->cl_nodename;
 
 	nlmclnt_next_cookie(&argp->cookie);
 	memcpy(&lock->fh, NFS_FH(file_inode(fl->fl_file)), sizeof(struct nfs_fh));
-	lock->caller  = utsname()->nodename;
+	lock->caller  = nodename;
 	lock->oh.data = req->a_owner;
 	lock->oh.len  = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s",
 				(unsigned int)fl->fl_u.nfs_fl.owner->pid,
-				utsname()->nodename);
+				nodename);
 	lock->svid = fl->fl_u.nfs_fl.owner->pid;
 	lock->fl.fl_start = fl->fl_start;
 	lock->fl.fl_end = fl->fl_end;
diff --git a/fs/namei.c b/fs/namei.c
index 8b61d10..89a612e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3671,15 +3671,11 @@
 	if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
 		return -EINVAL;
 	/*
-	 * To use null names we require CAP_DAC_READ_SEARCH
-	 * This ensures that not everyone will be able to create
-	 * handlink using the passed filedescriptor.
+	 * Using empty names is equivalent to using AT_SYMLINK_FOLLOW
+	 * on /proc/self/fd/<fd>.
 	 */
-	if (flags & AT_EMPTY_PATH) {
-		if (!capable(CAP_DAC_READ_SEARCH))
-			return -ENOENT;
+	if (flags & AT_EMPTY_PATH)
 		how = LOOKUP_EMPTY;
-	}
 
 	if (flags & AT_SYMLINK_FOLLOW)
 		how |= LOOKUP_FOLLOW;
diff --git a/fs/namespace.c b/fs/namespace.c
index 7b1ca9b..a45ba4f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1429,7 +1429,7 @@
 			 CL_COPY_ALL | CL_PRIVATE);
 	namespace_unlock();
 	if (IS_ERR(tree))
-		return NULL;
+		return ERR_CAST(tree);
 	return &tree->mnt;
 }
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index af6e806..941246f 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -463,7 +463,6 @@
 		unlock_new_inode(inode);
 	} else
 		nfs_refresh_inode(inode, fattr);
-		nfs_setsecurity(inode, fattr, label);
 	dprintk("NFS: nfs_fhget(%s/%Ld fh_crc=0x%08x ct=%d)\n",
 		inode->i_sb->s_id,
 		(long long)NFS_FILEID(inode),
@@ -963,9 +962,15 @@
 static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
-	
+	int ret;
+
 	if (mapping->nrpages != 0) {
-		int ret = invalidate_inode_pages2(mapping);
+		if (S_ISREG(inode->i_mode)) {
+			ret = nfs_sync_mapping(mapping);
+			if (ret < 0)
+				return ret;
+		}
+		ret = invalidate_inode_pages2(mapping);
 		if (ret < 0)
 			return ret;
 	}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index cf11799..108a774 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3071,15 +3071,13 @@
 nfs4_proc_lookup_mountpoint(struct inode *dir, struct qstr *name,
 			    struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
+	struct rpc_clnt *client = NFS_CLIENT(dir);
 	int status;
-	struct rpc_clnt *client = rpc_clone_client(NFS_CLIENT(dir));
 
 	status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, NULL);
-	if (status < 0) {
-		rpc_shutdown_client(client);
+	if (status < 0)
 		return ERR_PTR(status);
-	}
-	return client;
+	return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client;
 }
 
 static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index c74d616..3850b01 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1118,11 +1118,11 @@
 				len, ((char *)p - (char *)q) + 4);
 		BUG();
 	}
-	len = (char *)p - (char *)q - (bmval_len << 2);
 	*q++ = htonl(bmval0);
 	*q++ = htonl(bmval1);
 	if (bmval_len == 3)
 		*q++ = htonl(bmval2);
+	len = (char *)p - (char *)(q + 1);
 	*q = htonl(len);
 
 /* out: */
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 71fdc0d..f6db66d 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2478,6 +2478,10 @@
 	if (server->flags & NFS_MOUNT_NOAC)
 		sb_mntdata.mntflags |= MS_SYNCHRONOUS;
 
+	if (mount_info->cloned != NULL && mount_info->cloned->sb != NULL)
+		if (mount_info->cloned->sb->s_flags & MS_SYNCHRONOUS)
+			sb_mntdata.mntflags |= MS_SYNCHRONOUS;
+
 	/* Get a superblock - note that we may end up sharing one that already exists */
 	s = sget(nfs_mod->nfs_fs, compare_super, nfs_set_super, flags, &sb_mntdata);
 	if (IS_ERR(s)) {
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 0d4c410..419572f 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1524,7 +1524,7 @@
 static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
 	return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\
-		1 + 1 + 0 + /* eir_flags, spr_how, SP4_NONE (for now) */\
+		1 + 1 + 2 + /* eir_flags, spr_how, spo_must_enforce & _allow */\
 		2 + /*eir_server_owner.so_minor_id */\
 		/* eir_server_owner.so_major_id<> */\
 		XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 280acef..43f4229 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1264,6 +1264,8 @@
 	struct svc_cred *cr = &rqstp->rq_cred;
 	u32 service;
 
+	if (!cr->cr_gss_mech)
+		return false;
 	service = gss_pseudoflavor_to_service(cr->cr_gss_mech, cr->cr_flavor);
 	return service == RPC_GSS_SVC_INTEGRITY ||
 	       service == RPC_GSS_SVC_PRIVACY;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0c0f3ea9..c2a4701 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3360,7 +3360,8 @@
 		8 /* eir_clientid */ +
 		4 /* eir_sequenceid */ +
 		4 /* eir_flags */ +
-		4 /* spr_how (SP4_NONE) */ +
+		4 /* spr_how */ +
+		8 /* spo_must_enforce, spo_must_allow */ +
 		8 /* so_minor_id */ +
 		4 /* so_major_id.len */ +
 		(XDR_QUADLEN(major_id_sz) * 4) +
@@ -3372,8 +3373,6 @@
 	WRITE32(exid->seqid);
 	WRITE32(exid->flags);
 
-	/* state_protect4_r. Currently only support SP4_NONE */
-	BUG_ON(exid->spa_how != SP4_NONE);
 	WRITE32(exid->spa_how);
 	switch (exid->spa_how) {
 	case SP4_NONE:
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 8ff6a00..c827acb 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -830,9 +830,10 @@
 			flags = O_WRONLY|O_LARGEFILE;
 	}
 	*filp = dentry_open(&path, flags, current_cred());
-	if (IS_ERR(*filp))
+	if (IS_ERR(*filp)) {
 		host_err = PTR_ERR(*filp);
-	else {
+		*filp = NULL;
+	} else {
 		host_err = ima_file_check(*filp, may_flags);
 
 		if (may_flags & NFSD_MAY_64BIT_COOKIE)
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index dc9a913..2d8be51 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -345,8 +345,7 @@
 
 	if (err == -EOPNOTSUPP) {
 		set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
-		bio_put(bio);
-		/* to be detected by submit_seg_bio() */
+		/* to be detected by nilfs_segbuf_submit_bio() */
 	}
 
 	if (!uptodate)
@@ -377,12 +376,12 @@
 	bio->bi_private = segbuf;
 	bio_get(bio);
 	submit_bio(mode, bio);
+	segbuf->sb_nbio++;
 	if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
 		bio_put(bio);
 		err = -EOPNOTSUPP;
 		goto failed;
 	}
-	segbuf->sb_nbio++;
 	bio_put(bio);
 
 	wi->bio = NULL;
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 79736a2..2abf97b 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1757,7 +1757,7 @@
 		goto out;
 	} else if (ret == 1) {
 		clusters_need = wc->w_clen;
-		ret = ocfs2_refcount_cow(inode, filp, di_bh,
+		ret = ocfs2_refcount_cow(inode, di_bh,
 					 wc->w_cpos, wc->w_clen, UINT_MAX);
 		if (ret) {
 			mlog_errno(ret);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index eb760d8..30544ce 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2153,11 +2153,9 @@
 {
 	int ret;
 	struct ocfs2_empty_dir_priv priv = {
-		.ctx.actor = ocfs2_empty_dir_filldir
+		.ctx.actor = ocfs2_empty_dir_filldir,
 	};
 
-	memset(&priv, 0, sizeof(priv));
-
 	if (ocfs2_dir_indexed(inode)) {
 		ret = ocfs2_empty_dir_dx(inode, &priv);
 		if (ret)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 41000f2..3261d71 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -370,7 +370,7 @@
 	if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
 		goto out;
 
-	return ocfs2_refcount_cow(inode, NULL, fe_bh, cpos, 1, cpos+1);
+	return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1);
 
 out:
 	return status;
@@ -899,7 +899,7 @@
 		zero_clusters = last_cpos - zero_cpos;
 
 	if (needs_cow) {
-		rc = ocfs2_refcount_cow(inode, NULL, di_bh, zero_cpos,
+		rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos,
 					zero_clusters, UINT_MAX);
 		if (rc) {
 			mlog_errno(rc);
@@ -2078,7 +2078,7 @@
 
 	*meta_level = 1;
 
-	ret = ocfs2_refcount_cow(inode, file, di_bh, cpos, clusters, UINT_MAX);
+	ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX);
 	if (ret)
 		mlog_errno(ret);
 out:
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 96f9ac2..0a99273 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -537,7 +537,7 @@
 	extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth);
 
 	return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks +
-	       ocfs2_quota_trans_credits(sb) + bits_wanted;
+	       ocfs2_quota_trans_credits(sb);
 }
 
 static inline int ocfs2_calc_symlink_credits(struct super_block *sb)
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index f1fc172..452068b 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -69,7 +69,7 @@
 	u64 ino = ocfs2_metadata_cache_owner(context->et.et_ci);
 	u64 old_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cpos);
 
-	ret = ocfs2_duplicate_clusters_by_page(handle, context->file, cpos,
+	ret = ocfs2_duplicate_clusters_by_page(handle, inode, cpos,
 					       p_cpos, new_p_cpos, len);
 	if (ret) {
 		mlog_errno(ret);
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 998b17e..a70d604 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -49,7 +49,6 @@
 
 struct ocfs2_cow_context {
 	struct inode *inode;
-	struct file *file;
 	u32 cow_start;
 	u32 cow_len;
 	struct ocfs2_extent_tree data_et;
@@ -66,7 +65,7 @@
 			    u32 *num_clusters,
 			    unsigned int *extent_flags);
 	int (*cow_duplicate_clusters)(handle_t *handle,
-				      struct file *file,
+				      struct inode *inode,
 				      u32 cpos, u32 old_cluster,
 				      u32 new_cluster, u32 new_len);
 };
@@ -2922,14 +2921,12 @@
 }
 
 int ocfs2_duplicate_clusters_by_page(handle_t *handle,
-				     struct file *file,
+				     struct inode *inode,
 				     u32 cpos, u32 old_cluster,
 				     u32 new_cluster, u32 new_len)
 {
 	int ret = 0, partial;
-	struct inode *inode = file_inode(file);
-	struct ocfs2_caching_info *ci = INODE_CACHE(inode);
-	struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
+	struct super_block *sb = inode->i_sb;
 	u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
 	struct page *page;
 	pgoff_t page_index;
@@ -2965,6 +2962,11 @@
 			to = map_end & (PAGE_CACHE_SIZE - 1);
 
 		page = find_or_create_page(mapping, page_index, GFP_NOFS);
+		if (!page) {
+			ret = -ENOMEM;
+			mlog_errno(ret);
+			break;
+		}
 
 		/*
 		 * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page
@@ -2973,13 +2975,6 @@
 		if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize)
 			BUG_ON(PageDirty(page));
 
-		if (PageReadahead(page)) {
-			page_cache_async_readahead(mapping,
-						   &file->f_ra, file,
-						   page, page_index,
-						   readahead_pages);
-		}
-
 		if (!PageUptodate(page)) {
 			ret = block_read_full_page(page, ocfs2_get_block);
 			if (ret) {
@@ -2999,7 +2994,8 @@
 			}
 		}
 
-		ocfs2_map_and_dirty_page(inode, handle, from, to,
+		ocfs2_map_and_dirty_page(inode,
+					 handle, from, to,
 					 page, 0, &new_block);
 		mark_page_accessed(page);
 unlock:
@@ -3015,12 +3011,11 @@
 }
 
 int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
-				    struct file *file,
+				    struct inode *inode,
 				    u32 cpos, u32 old_cluster,
 				    u32 new_cluster, u32 new_len)
 {
 	int ret = 0;
-	struct inode *inode = file_inode(file);
 	struct super_block *sb = inode->i_sb;
 	struct ocfs2_caching_info *ci = INODE_CACHE(inode);
 	int i, blocks = ocfs2_clusters_to_blocks(sb, new_len);
@@ -3145,7 +3140,7 @@
 
 	/*If the old clusters is unwritten, no need to duplicate. */
 	if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) {
-		ret = context->cow_duplicate_clusters(handle, context->file,
+		ret = context->cow_duplicate_clusters(handle, context->inode,
 						      cpos, old, new, len);
 		if (ret) {
 			mlog_errno(ret);
@@ -3423,35 +3418,12 @@
 	return ret;
 }
 
-static void ocfs2_readahead_for_cow(struct inode *inode,
-				    struct file *file,
-				    u32 start, u32 len)
-{
-	struct address_space *mapping;
-	pgoff_t index;
-	unsigned long num_pages;
-	int cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
-
-	if (!file)
-		return;
-
-	mapping = file->f_mapping;
-	num_pages = (len << cs_bits) >> PAGE_CACHE_SHIFT;
-	if (!num_pages)
-		num_pages = 1;
-
-	index = ((loff_t)start << cs_bits) >> PAGE_CACHE_SHIFT;
-	page_cache_sync_readahead(mapping, &file->f_ra, file,
-				  index, num_pages);
-}
-
 /*
  * Starting at cpos, try to CoW write_len clusters.  Don't CoW
  * past max_cpos.  This will stop when it runs into a hole or an
  * unrefcounted extent.
  */
 static int ocfs2_refcount_cow_hunk(struct inode *inode,
-				   struct file *file,
 				   struct buffer_head *di_bh,
 				   u32 cpos, u32 write_len, u32 max_cpos)
 {
@@ -3480,8 +3452,6 @@
 
 	BUG_ON(cow_len == 0);
 
-	ocfs2_readahead_for_cow(inode, file, cow_start, cow_len);
-
 	context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS);
 	if (!context) {
 		ret = -ENOMEM;
@@ -3503,7 +3473,6 @@
 	context->ref_root_bh = ref_root_bh;
 	context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page;
 	context->get_clusters = ocfs2_di_get_clusters;
-	context->file = file;
 
 	ocfs2_init_dinode_extent_tree(&context->data_et,
 				      INODE_CACHE(inode), di_bh);
@@ -3532,7 +3501,6 @@
  * clusters between cpos and cpos+write_len are safe to modify.
  */
 int ocfs2_refcount_cow(struct inode *inode,
-		       struct file *file,
 		       struct buffer_head *di_bh,
 		       u32 cpos, u32 write_len, u32 max_cpos)
 {
@@ -3552,7 +3520,7 @@
 			num_clusters = write_len;
 
 		if (ext_flags & OCFS2_EXT_REFCOUNTED) {
-			ret = ocfs2_refcount_cow_hunk(inode, file, di_bh, cpos,
+			ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos,
 						      num_clusters, max_cpos);
 			if (ret) {
 				mlog_errno(ret);
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 7754608..6422bbc 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -53,7 +53,7 @@
 					  int *credits,
 					  int *ref_blocks);
 int ocfs2_refcount_cow(struct inode *inode,
-		       struct file *filep, struct buffer_head *di_bh,
+		       struct buffer_head *di_bh,
 		       u32 cpos, u32 write_len, u32 max_cpos);
 
 typedef int (ocfs2_post_refcount_func)(struct inode *inode,
@@ -85,11 +85,11 @@
 			     u32 cpos, u32 write_len,
 			     struct ocfs2_post_refcount *post);
 int ocfs2_duplicate_clusters_by_page(handle_t *handle,
-				     struct file *file,
+				     struct inode *inode,
 				     u32 cpos, u32 old_cluster,
 				     u32 new_cluster, u32 new_len);
 int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
-				    struct file *file,
+				    struct inode *inode,
 				    u32 cpos, u32 old_cluster,
 				    u32 new_cluster, u32 new_len);
 int ocfs2_cow_sync_writeback(struct super_block *sb,
diff --git a/fs/open.c b/fs/open.c
index d53e298..7931f76 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -823,7 +823,7 @@
 	int lookup_flags = 0;
 	int acc_mode;
 
-	if (flags & O_CREAT)
+	if (flags & (O_CREAT | __O_TMPFILE))
 		op->mode = (mode & S_IALLUGO) | S_IFREG;
 	else
 		op->mode = 0;
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 75f2890..0ff80f9 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -230,8 +230,6 @@
 
 	if (!dir_emit_dots(file, ctx))
 		goto out;
-	if (!dir_emit_dots(file, ctx))
-		goto out;
 	files = get_files_struct(p);
 	if (!files)
 		goto out;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 94441a4..737e156 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -271,7 +271,7 @@
 		de = next;
 	} while (de);
 	spin_unlock(&proc_subdir_lock);
-	return 0;
+	return 1;
 }
 
 int proc_readdir(struct file *file, struct dir_context *ctx)
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 229e366..e0a790d 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -205,7 +205,9 @@
 static int proc_root_readdir(struct file *file, struct dir_context *ctx)
 {
 	if (ctx->pos < FIRST_PROCESS_ENTRY) {
-		proc_readdir(file, ctx);
+		int error = proc_readdir(file, ctx);
+		if (unlikely(error <= 0))
+			return error;
 		ctx->pos = FIRST_PROCESS_ENTRY;
 	}
 
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index dbf61f6..107d026 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -730,8 +730,16 @@
 	 * of how soft-dirty works.
 	 */
 	pte_t ptent = *pte;
-	ptent = pte_wrprotect(ptent);
-	ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY);
+
+	if (pte_present(ptent)) {
+		ptent = pte_wrprotect(ptent);
+		ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY);
+	} else if (is_swap_pte(ptent)) {
+		ptent = pte_swp_clear_soft_dirty(ptent);
+	} else if (pte_file(ptent)) {
+		ptent = pte_file_clear_soft_dirty(ptent);
+	}
+
 	set_pte_at(vma->vm_mm, addr, pte, ptent);
 #endif
 }
@@ -752,14 +760,15 @@
 	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
 	for (; addr != end; pte++, addr += PAGE_SIZE) {
 		ptent = *pte;
-		if (!pte_present(ptent))
-			continue;
 
 		if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
 			clear_soft_dirty(vma, addr, pte);
 			continue;
 		}
 
+		if (!pte_present(ptent))
+			continue;
+
 		page = vm_normal_page(vma, addr, ptent);
 		if (!page)
 			continue;
@@ -859,7 +868,7 @@
 } pagemap_entry_t;
 
 struct pagemapread {
-	int pos, len;
+	int pos, len;		/* units: PM_ENTRY_BYTES, not bytes */
 	pagemap_entry_t *buffer;
 	bool v2;
 };
@@ -867,7 +876,7 @@
 #define PAGEMAP_WALK_SIZE	(PMD_SIZE)
 #define PAGEMAP_WALK_MASK	(PMD_MASK)
 
-#define PM_ENTRY_BYTES      sizeof(u64)
+#define PM_ENTRY_BYTES      sizeof(pagemap_entry_t)
 #define PM_STATUS_BITS      3
 #define PM_STATUS_OFFSET    (64 - PM_STATUS_BITS)
 #define PM_STATUS_MASK      (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
@@ -930,8 +939,10 @@
 		flags = PM_PRESENT;
 		page = vm_normal_page(vma, addr, pte);
 	} else if (is_swap_pte(pte)) {
-		swp_entry_t entry = pte_to_swp_entry(pte);
-
+		swp_entry_t entry;
+		if (pte_swp_soft_dirty(pte))
+			flags2 |= __PM_SOFT_DIRTY;
+		entry = pte_to_swp_entry(pte);
 		frame = swp_type(entry) |
 			(swp_offset(entry) << MAX_SWAPFILES_SHIFT);
 		flags = PM_SWAP;
@@ -1116,8 +1127,8 @@
 		goto out_task;
 
 	pm.v2 = soft_dirty_cleared;
-	pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
-	pm.buffer = kmalloc(pm.len, GFP_TEMPORARY);
+	pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
+	pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_TEMPORARY);
 	ret = -ENOMEM;
 	if (!pm.buffer)
 		goto out_task;
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 33532f7..a958444 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -19,12 +19,13 @@
 /*
  * LOCKING:
  *
- * We rely on new Alexander Viro's super-block locking.
+ * These guys are evicted from procfs as the very first step in ->kill_sb().
  *
  */
 
-static int show_version(struct seq_file *m, struct super_block *sb)
+static int show_version(struct seq_file *m, void *unused)
 {
+	struct super_block *sb = m->private;
 	char *format;
 
 	if (REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_6)) {
@@ -66,8 +67,9 @@
 #define DJP( x ) le32_to_cpu( jp -> x )
 #define JF( x ) ( r -> s_journal -> x )
 
-static int show_super(struct seq_file *m, struct super_block *sb)
+static int show_super(struct seq_file *m, void *unused)
 {
+	struct super_block *sb = m->private;
 	struct reiserfs_sb_info *r = REISERFS_SB(sb);
 
 	seq_printf(m, "state: \t%s\n"
@@ -128,8 +130,9 @@
 	return 0;
 }
 
-static int show_per_level(struct seq_file *m, struct super_block *sb)
+static int show_per_level(struct seq_file *m, void *unused)
 {
+	struct super_block *sb = m->private;
 	struct reiserfs_sb_info *r = REISERFS_SB(sb);
 	int level;
 
@@ -186,8 +189,9 @@
 	return 0;
 }
 
-static int show_bitmap(struct seq_file *m, struct super_block *sb)
+static int show_bitmap(struct seq_file *m, void *unused)
 {
+	struct super_block *sb = m->private;
 	struct reiserfs_sb_info *r = REISERFS_SB(sb);
 
 	seq_printf(m, "free_block: %lu\n"
@@ -218,8 +222,9 @@
 	return 0;
 }
 
-static int show_on_disk_super(struct seq_file *m, struct super_block *sb)
+static int show_on_disk_super(struct seq_file *m, void *unused)
 {
+	struct super_block *sb = m->private;
 	struct reiserfs_sb_info *sb_info = REISERFS_SB(sb);
 	struct reiserfs_super_block *rs = sb_info->s_rs;
 	int hash_code = DFL(s_hash_function_code);
@@ -261,8 +266,9 @@
 	return 0;
 }
 
-static int show_oidmap(struct seq_file *m, struct super_block *sb)
+static int show_oidmap(struct seq_file *m, void *unused)
 {
+	struct super_block *sb = m->private;
 	struct reiserfs_sb_info *sb_info = REISERFS_SB(sb);
 	struct reiserfs_super_block *rs = sb_info->s_rs;
 	unsigned int mapsize = le16_to_cpu(rs->s_v1.s_oid_cursize);
@@ -291,8 +297,9 @@
 	return 0;
 }
 
-static int show_journal(struct seq_file *m, struct super_block *sb)
+static int show_journal(struct seq_file *m, void *unused)
 {
+	struct super_block *sb = m->private;
 	struct reiserfs_sb_info *r = REISERFS_SB(sb);
 	struct reiserfs_super_block *rs = r->s_rs;
 	struct journal_params *jp = &rs->s_v1.s_journal;
@@ -383,92 +390,24 @@
 	return 0;
 }
 
-/* iterator */
-static int test_sb(struct super_block *sb, void *data)
-{
-	return data == sb;
-}
-
-static int set_sb(struct super_block *sb, void *data)
-{
-	return -ENOENT;
-}
-
-struct reiserfs_seq_private {
-	struct super_block *sb;
-	int (*show) (struct seq_file *, struct super_block *);
-};
-
-static void *r_start(struct seq_file *m, loff_t * pos)
-{
-	struct reiserfs_seq_private *priv = m->private;
-	loff_t l = *pos;
-
-	if (l)
-		return NULL;
-
-	if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, 0, priv->sb)))
-		return NULL;
-
-	up_write(&priv->sb->s_umount);
-	return priv->sb;
-}
-
-static void *r_next(struct seq_file *m, void *v, loff_t * pos)
-{
-	++*pos;
-	if (v)
-		deactivate_super(v);
-	return NULL;
-}
-
-static void r_stop(struct seq_file *m, void *v)
-{
-	if (v)
-		deactivate_super(v);
-}
-
-static int r_show(struct seq_file *m, void *v)
-{
-	struct reiserfs_seq_private *priv = m->private;
-	return priv->show(m, v);
-}
-
-static const struct seq_operations r_ops = {
-	.start = r_start,
-	.next = r_next,
-	.stop = r_stop,
-	.show = r_show,
-};
-
 static int r_open(struct inode *inode, struct file *file)
 {
-	struct reiserfs_seq_private *priv;
-	int ret = seq_open_private(file, &r_ops,
-				   sizeof(struct reiserfs_seq_private));
-
-	if (!ret) {
-		struct seq_file *m = file->private_data;
-		priv = m->private;
-		priv->sb = proc_get_parent_data(inode);
-		priv->show = PDE_DATA(inode);
-	}
-	return ret;
+	return single_open(file, PDE_DATA(inode), 
+				proc_get_parent_data(inode));
 }
 
 static const struct file_operations r_file_operations = {
 	.open = r_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
-	.release = seq_release_private,
-	.owner = THIS_MODULE,
+	.release = single_release,
 };
 
 static struct proc_dir_entry *proc_info_root = NULL;
 static const char proc_info_root_name[] = "fs/reiserfs";
 
 static void add_file(struct super_block *sb, char *name,
-		     int (*func) (struct seq_file *, struct super_block *))
+		     int (*func) (struct seq_file *, void *))
 {
 	proc_create_data(name, 0, REISERFS_SB(sb)->procdir,
 			 &r_file_operations, func);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index f8a23c3..e2e202a 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -499,6 +499,7 @@
 static void reiserfs_kill_sb(struct super_block *s)
 {
 	if (REISERFS_SB(s)) {
+		reiserfs_proc_info_done(s);
 		/*
 		 * Force any pending inode evictions to occur now. Any
 		 * inodes to be removed that have extended attributes
@@ -554,8 +555,6 @@
 				 REISERFS_SB(s)->reserved_blocks);
 	}
 
-	reiserfs_proc_info_done(s);
-
 	reiserfs_write_unlock(s);
 	mutex_destroy(&REISERFS_SB(s)->lock);
 	kfree(s->s_fs_info);
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index 07d735a..e5869b5 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -39,6 +39,9 @@
  * There is a very similar struct icdinode in xfs_inode which matches the
  * layout of the first 96 bytes of this structure, but is kept in native
  * format instead of big endian.
+ *
+ * Note: di_flushiter is only used by v1/2 inodes - it's effectively a zeroed
+ * padding field for v3 inodes.
  */
 typedef struct xfs_dinode {
 	__be16		di_magic;	/* inode magic # = XFS_DINODE_MAGIC */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b78481f..bb262c2 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -896,7 +896,6 @@
 	to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
 	to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
 	memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
-	to->di_flushiter = cpu_to_be16(from->di_flushiter);
 	to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
 	to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
 	to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
@@ -924,6 +923,9 @@
 		to->di_lsn = cpu_to_be64(from->di_lsn);
 		memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
 		uuid_copy(&to->di_uuid, &from->di_uuid);
+		to->di_flushiter = 0;
+	} else {
+		to->di_flushiter = cpu_to_be16(from->di_flushiter);
 	}
 }
 
@@ -1029,10 +1031,14 @@
 /*
  * Read the disk inode attributes into the in-core inode structure.
  *
- * If we are initialising a new inode and we are not utilising the
- * XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new inode core
- * with a random generation number. If we are keeping inodes around, we need to
- * read the inode cluster to get the existing generation number off disk.
+ * For version 5 superblocks, if we are initialising a new inode and we are not
+ * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new
+ * inode core with a random generation number. If we are keeping inodes around,
+ * we need to read the inode cluster to get the existing generation number off
+ * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode
+ * format) then log recovery is dependent on the di_flushiter field being
+ * initialised from the current on-disk value and hence we must also read the
+ * inode off disk.
  */
 int
 xfs_iread(
@@ -1054,6 +1060,7 @@
 
 	/* shortcut IO on inode allocation if possible */
 	if ((iget_flags & XFS_IGET_CREATE) &&
+	    xfs_sb_version_hascrc(&mp->m_sb) &&
 	    !(mp->m_flags & XFS_MOUNT_IKEEP)) {
 		/* initialise the on-disk inode core */
 		memset(&ip->i_d, 0, sizeof(ip->i_d));
@@ -2882,12 +2889,18 @@
 			__func__, ip->i_ino, ip->i_d.di_forkoff, ip);
 		goto corrupt_out;
 	}
+
 	/*
-	 * bump the flush iteration count, used to detect flushes which
-	 * postdate a log record during recovery. This is redundant as we now
-	 * log every change and hence this can't happen. Still, it doesn't hurt.
+	 * Inode item log recovery for v1/v2 inodes are dependent on the
+	 * di_flushiter count for correct sequencing. We bump the flush
+	 * iteration count so we can detect flushes which postdate a log record
+	 * during recovery. This is redundant as we now log every change and
+	 * hence this can't happen but we need to still do it to ensure
+	 * backwards compatibility with old kernels that predate logging all
+	 * inode changes.
 	 */
-	ip->i_d.di_flushiter++;
+	if (ip->i_d.di_version < 3)
+		ip->i_d.di_flushiter++;
 
 	/*
 	 * Copy the dirty parts of the inode into the on-disk
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 6fcc910a..7681b19 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2592,8 +2592,16 @@
 		goto error;
 	}
 
-	/* Skip replay when the on disk inode is newer than the log one */
-	if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
+	/*
+	 * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes
+	 * are transactional and if ordering is necessary we can determine that
+	 * more accurately by the LSN field in the V3 inode core. Don't trust
+	 * the inode versions we might be changing them here - use the
+	 * superblock flag to determine whether we need to look at di_flushiter
+	 * to skip replay when the on disk inode is newer than the log one
+	 */
+	if (!xfs_sb_version_hascrc(&mp->m_sb) &&
+	    dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
 		/*
 		 * Deal with the wrap case, DI_MAX_FLUSH is less
 		 * than smaller numbers
@@ -2608,6 +2616,7 @@
 			goto error;
 		}
 	}
+
 	/* Take the opportunity to reset the flush iteration count */
 	dicp->di_flushiter = 0;
 
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 56e6b68..94383a7 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -274,15 +274,12 @@
 };
 
 struct acpi_device_physical_node {
-	u8 node_id;
+	unsigned int node_id;
 	struct list_head node;
 	struct device *dev;
 	bool put_online:1;
 };
 
-/* set maximum of physical nodes to 32 for expansibility */
-#define ACPI_MAX_PHYSICAL_NODE	32
-
 /* Device */
 struct acpi_device {
 	int device_type;
@@ -302,10 +299,9 @@
 	struct acpi_driver *driver;
 	void *driver_data;
 	struct device dev;
-	u8 physical_node_count;
+	unsigned int physical_node_count;
 	struct list_head physical_node_list;
 	struct mutex physical_node_lock;
-	DECLARE_BITMAP(physical_node_id_bitmap, ACPI_MAX_PHYSICAL_NODE);
 	struct list_head power_dependent;
 	void (*remove)(struct acpi_device *);
 };
@@ -445,7 +441,11 @@
 };
 
 /* helper */
-acpi_handle acpi_get_child(acpi_handle, u64);
+acpi_handle acpi_find_child(acpi_handle, u64, bool);
+static inline acpi_handle acpi_get_child(acpi_handle handle, u64 addr)
+{
+	return acpi_find_child(handle, addr, false);
+}
 int acpi_is_root_bridge(acpi_handle);
 struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle);
 #define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)ACPI_HANDLE(dev))
diff --git a/include/acpi/video.h b/include/acpi/video.h
index b26dc4f..61109f2 100644
--- a/include/acpi/video.h
+++ b/include/acpi/video.h
@@ -17,21 +17,12 @@
 #define ACPI_VIDEO_DISPLAY_LEGACY_TV      0x0200
 
 #if (defined CONFIG_ACPI_VIDEO || defined CONFIG_ACPI_VIDEO_MODULE)
-extern int __acpi_video_register(bool backlight_quirks);
-static inline int acpi_video_register(void)
-{
-	return __acpi_video_register(false);
-}
-static inline int acpi_video_register_with_quirks(void)
-{
-	return __acpi_video_register(true);
-}
+extern int acpi_video_register(void);
 extern void acpi_video_unregister(void);
 extern int acpi_video_get_edid(struct acpi_device *device, int type,
 			       int device_id, void **edid);
 #else
 static inline int acpi_video_register(void) { return 0; }
-static inline int acpi_video_register_with_quirks(void) { return 0; }
 static inline void acpi_video_unregister(void) { return; }
 static inline int acpi_video_get_edid(struct acpi_device *device, int type,
 				      int device_id, void **edid)
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 2f47ade..0807ddf 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -417,6 +417,36 @@
 {
 	return pmd;
 }
+
+static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
+{
+	return pte;
+}
+
+static inline int pte_swp_soft_dirty(pte_t pte)
+{
+	return 0;
+}
+
+static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
+{
+	return pte;
+}
+
+static inline pte_t pte_file_clear_soft_dirty(pte_t pte)
+{
+       return pte;
+}
+
+static inline pte_t pte_file_mksoft_dirty(pte_t pte)
+{
+       return pte;
+}
+
+static inline int pte_file_soft_dirty(pte_t pte)
+{
+       return 0;
+}
 #endif
 
 #ifndef __HAVE_PFNMAP_TRACKING
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 13821c3..5672d7e 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -112,7 +112,7 @@
 
 #define HAVE_GENERIC_MMU_GATHER
 
-void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm);
+void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end);
 void tlb_flush_mmu(struct mmu_gather *tlb);
 void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start,
 							unsigned long end);
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 12083dc..2907341 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -45,7 +45,6 @@
 #include <linux/kernel.h>
 #include <linux/miscdevice.h>
 #include <linux/fs.h>
-#include <linux/proc_fs.h>
 #include <linux/init.h>
 #include <linux/file.h>
 #include <linux/platform_device.h>
@@ -62,20 +61,18 @@
 #endif
 #include <asm/mman.h>
 #include <asm/uaccess.h>
-#if defined(CONFIG_AGP) || defined(CONFIG_AGP_MODULE)
 #include <linux/types.h>
 #include <linux/agp_backend.h>
-#endif
 #include <linux/workqueue.h>
 #include <linux/poll.h>
 #include <asm/pgalloc.h>
 #include <drm/drm.h>
 #include <drm/drm_sarea.h>
+#include <drm/drm_vma_manager.h>
 
 #include <linux/idr.h>
 
 #define __OS_HAS_AGP (defined(CONFIG_AGP) || (defined(CONFIG_AGP_MODULE) && defined(MODULE)))
-#define __OS_HAS_MTRR (defined(CONFIG_MTRR))
 
 struct module;
 
@@ -140,19 +137,15 @@
 /* driver capabilities and requirements mask */
 #define DRIVER_USE_AGP     0x1
 #define DRIVER_REQUIRE_AGP 0x2
-#define DRIVER_USE_MTRR    0x4
 #define DRIVER_PCI_DMA     0x8
 #define DRIVER_SG          0x10
 #define DRIVER_HAVE_DMA    0x20
 #define DRIVER_HAVE_IRQ    0x40
 #define DRIVER_IRQ_SHARED  0x80
-#define DRIVER_IRQ_VBL     0x100
-#define DRIVER_DMA_QUEUE   0x200
-#define DRIVER_FB_DMA      0x400
-#define DRIVER_IRQ_VBL2    0x800
 #define DRIVER_GEM         0x1000
 #define DRIVER_MODESET     0x2000
 #define DRIVER_PRIME       0x4000
+#define DRIVER_RENDER      0x8000
 
 #define DRIVER_BUS_PCI 0x1
 #define DRIVER_BUS_PLATFORM 0x2
@@ -168,13 +161,7 @@
 #define DRM_MAGIC_HASH_ORDER  4  /**< Size of key hash table. Must be power of 2. */
 #define DRM_KERNEL_CONTEXT    0	 /**< Change drm_resctx if changed */
 #define DRM_RESERVED_CONTEXTS 1	 /**< Change drm_resctx if changed */
-#define DRM_LOOPING_LIMIT     5000000
-#define DRM_TIME_SLICE	      (HZ/20)  /**< Time slice for GLXContexts */
-#define DRM_LOCK_SLICE	      1	/**< Time slice for lock, in jiffies */
 
-#define DRM_FLAG_DEBUG	  0x01
-
-#define DRM_MAX_CTXBITMAP (PAGE_SIZE * 8)
 #define DRM_MAP_HASH_OFFSET 0x10000000
 
 /*@}*/
@@ -263,9 +250,6 @@
 
 #define DRM_ARRAY_SIZE(x) ARRAY_SIZE(x)
 
-#define DRM_LEFTCOUNT(x) (((x)->rp + (x)->count - (x)->wp) % ((x)->count + 1))
-#define DRM_BUFCOUNT(x) ((x)->count - DRM_LEFTCOUNT(x))
-
 #define DRM_IF_VERSION(maj, min) (maj << 16 | min)
 
 /**
@@ -307,6 +291,7 @@
 #define DRM_ROOT_ONLY	0x4
 #define DRM_CONTROL_ALLOW 0x8
 #define DRM_UNLOCKED	0x10
+#define DRM_RENDER_ALLOW 0x20
 
 struct drm_ioctl_desc {
 	unsigned int cmd;
@@ -587,7 +572,6 @@
 	struct drm_local_map *map;	/**< mapping */
 	uint64_t user_token;
 	struct drm_master *master;
-	struct drm_mm_node *file_offset_node;	/**< fake offset */
 };
 
 /**
@@ -622,8 +606,7 @@
  * GEM specific mm private for tracking GEM objects
  */
 struct drm_gem_mm {
-	struct drm_mm offset_manager;	/**< Offset mgmt for buffer objects */
-	struct drm_open_hash offset_hash; /**< User token hash table for maps */
+	struct drm_vma_offset_manager vma_manager;
 };
 
 /**
@@ -634,8 +617,16 @@
 	/** Reference count of this object */
 	struct kref refcount;
 
-	/** Handle count of this object. Each handle also holds a reference */
-	atomic_t handle_count; /* number of handles on this object */
+	/**
+	 * handle_count - gem file_priv handle count of this object
+	 *
+	 * Each handle also holds a reference. Note that when the handle_count
+	 * drops to 0 any global names (e.g. the id in the flink namespace) will
+	 * be cleared.
+	 *
+	 * Protected by dev->object_name_lock.
+	 * */
+	unsigned handle_count;
 
 	/** Related drm device */
 	struct drm_device *dev;
@@ -644,7 +635,7 @@
 	struct file *filp;
 
 	/* Mapping info for this object */
-	struct drm_map_list map_list;
+	struct drm_vma_offset_node vma_node;
 
 	/**
 	 * Size of the object, in bytes.  Immutable over the object's
@@ -678,10 +669,32 @@
 
 	void *driver_private;
 
-	/* dma buf exported from this GEM object */
-	struct dma_buf *export_dma_buf;
+	/**
+	 * dma_buf - dma buf associated with this GEM object
+	 *
+	 * Pointer to the dma-buf associated with this gem object (either
+	 * through importing or exporting). We break the resulting reference
+	 * loop when the last gem handle for this object is released.
+	 *
+	 * Protected by obj->object_name_lock
+	 */
+	struct dma_buf *dma_buf;
 
-	/* dma buf attachment backing this object */
+	/**
+	 * import_attach - dma buf attachment backing this object
+	 *
+	 * Any foreign dma_buf imported as a gem object has this set to the
+	 * attachment point for the device. This is invariant over the lifetime
+	 * of a gem object.
+	 *
+	 * The driver's ->gem_free_object callback is responsible for cleaning
+	 * up the dma_buf attachment and references acquired at import time.
+	 *
+	 * Note that the drm gem/prime core does not depend upon drivers setting
+	 * this field any more. So for drivers where this doesn't make sense
+	 * (e.g. virtual devices or a displaylink behind an usb bus) they can
+	 * simply leave it as NULL.
+	 */
 	struct dma_buf_attachment *import_attach;
 };
 
@@ -737,6 +750,7 @@
 	int (*irq_by_busid)(struct drm_device *dev, struct drm_irq_busid *p);
 	/* hooks that are for PCI */
 	int (*agp_init)(struct drm_device *dev);
+	void (*agp_destroy)(struct drm_device *dev);
 
 };
 
@@ -885,8 +899,6 @@
 	void (*irq_preinstall) (struct drm_device *dev);
 	int (*irq_postinstall) (struct drm_device *dev);
 	void (*irq_uninstall) (struct drm_device *dev);
-	void (*set_version) (struct drm_device *dev,
-			     struct drm_set_version *sv);
 
 	/* Master routines */
 	int (*master_create)(struct drm_device *dev, struct drm_master *master);
@@ -966,7 +978,7 @@
 
 	u32 driver_features;
 	int dev_priv_size;
-	struct drm_ioctl_desc *ioctls;
+	const struct drm_ioctl_desc *ioctls;
 	int num_ioctls;
 	const struct file_operations *fops;
 	union {
@@ -1037,8 +1049,6 @@
 	struct device kdev;		/**< Linux device */
 	struct drm_device *dev;
 
-	struct proc_dir_entry *proc_root;  /**< proc directory entry */
-	struct drm_info_node proc_nodes;
 	struct dentry *debugfs_root;
 
 	struct list_head debugfs_list;
@@ -1131,12 +1141,7 @@
 	/*@{ */
 	int irq_enabled;		/**< True if irq handler is enabled */
 	__volatile__ long context_flag;	/**< Context swapping flag */
-	__volatile__ long interrupt_flag; /**< Interruption handler flag */
-	__volatile__ long dma_flag;	/**< DMA dispatch flag */
-	wait_queue_head_t context_wait;	/**< Processes waiting on ctx switch */
-	int last_checked;		/**< Last context checked for DMA */
 	int last_context;		/**< Last current context */
-	unsigned long last_switch;	/**< jiffies at last context switch */
 	/*@} */
 
 	struct work_struct work;
@@ -1174,12 +1179,6 @@
 	spinlock_t event_lock;
 
 	/*@} */
-	cycles_t ctx_start;
-	cycles_t lck_start;
-
-	struct fasync_struct *buf_async;/**< Processes waiting for SIGIO */
-	wait_queue_head_t buf_readers;	/**< Processes waiting to read */
-	wait_queue_head_t buf_writers;	/**< Processes waiting to ctx switch */
 
 	struct drm_agp_head *agp;	/**< AGP data */
 
@@ -1207,12 +1206,13 @@
 	unsigned int agp_buffer_token;
 	struct drm_minor *control;		/**< Control node for card */
 	struct drm_minor *primary;		/**< render type primary screen head */
+	struct drm_minor *render;		/**< render node for card */
 
         struct drm_mode_config mode_config;	/**< Current mode config */
 
 	/** \name GEM information */
 	/*@{ */
-	spinlock_t object_name_lock;
+	struct mutex object_name_lock;
 	struct idr object_name_idr;
 	/*@} */
 	int switch_power_state;
@@ -1223,6 +1223,7 @@
 #define DRM_SWITCH_POWER_ON 0
 #define DRM_SWITCH_POWER_OFF 1
 #define DRM_SWITCH_POWER_CHANGING 2
+#define DRM_SWITCH_POWER_DYNAMIC_OFF 3
 
 static __inline__ int drm_core_check_feature(struct drm_device *dev,
 					     int feature)
@@ -1235,25 +1236,6 @@
 	return dev->driver->bus->get_irq(dev);
 }
 
-
-#if __OS_HAS_AGP
-static inline int drm_core_has_AGP(struct drm_device *dev)
-{
-	return drm_core_check_feature(dev, DRIVER_USE_AGP);
-}
-#else
-#define drm_core_has_AGP(dev) (0)
-#endif
-
-#if __OS_HAS_MTRR
-static inline int drm_core_has_MTRR(struct drm_device *dev)
-{
-	return drm_core_check_feature(dev, DRIVER_USE_MTRR);
-}
-#else
-#define drm_core_has_MTRR(dev) (0)
-#endif
-
 static inline void drm_device_set_unplugged(struct drm_device *dev)
 {
 	smp_wmb();
@@ -1272,6 +1254,11 @@
 	return mutex_is_locked(&dev->mode_config.mutex);
 }
 
+static inline bool drm_is_render_client(struct drm_file *file_priv)
+{
+	return file_priv->minor->type == DRM_MINOR_RENDER;
+}
+
 /******************************************************************/
 /** \name Internal function definitions */
 /*@{*/
@@ -1287,7 +1274,6 @@
 extern struct mutex drm_global_mutex;
 extern int drm_open(struct inode *inode, struct file *filp);
 extern int drm_stub_open(struct inode *inode, struct file *filp);
-extern int drm_fasync(int fd, struct file *filp, int on);
 extern ssize_t drm_read(struct file *filp, char __user *buffer,
 			size_t count, loff_t *offset);
 extern int drm_release(struct inode *inode, struct file *filp);
@@ -1301,14 +1287,6 @@
 
 				/* Memory management support (drm_memory.h) */
 #include <drm/drm_memory.h>
-extern void drm_free_agp(DRM_AGP_MEM * handle, int pages);
-extern int drm_bind_agp(DRM_AGP_MEM * handle, unsigned int start);
-extern DRM_AGP_MEM *drm_agp_bind_pages(struct drm_device *dev,
-				       struct page **pages,
-				       unsigned long num_pages,
-				       uint32_t gtt_offset,
-				       uint32_t type);
-extern int drm_unbind_agp(DRM_AGP_MEM * handle);
 
 				/* Misc. IOCTL support (drm_ioctl.h) */
 extern int drm_irq_by_busid(struct drm_device *dev, void *data,
@@ -1335,8 +1313,6 @@
 		      struct drm_file *file_priv);
 extern int drm_addctx(struct drm_device *dev, void *data,
 		      struct drm_file *file_priv);
-extern int drm_modctx(struct drm_device *dev, void *data,
-		      struct drm_file *file_priv);
 extern int drm_getctx(struct drm_device *dev, void *data,
 		      struct drm_file *file_priv);
 extern int drm_switchctx(struct drm_device *dev, void *data,
@@ -1346,9 +1322,10 @@
 extern int drm_rmctx(struct drm_device *dev, void *data,
 		     struct drm_file *file_priv);
 
-extern int drm_ctxbitmap_init(struct drm_device *dev);
-extern void drm_ctxbitmap_cleanup(struct drm_device *dev);
-extern void drm_ctxbitmap_free(struct drm_device *dev, int ctx_handle);
+extern void drm_legacy_ctxbitmap_init(struct drm_device *dev);
+extern void drm_legacy_ctxbitmap_cleanup(struct drm_device *dev);
+extern void drm_legacy_ctxbitmap_release(struct drm_device *dev,
+					 struct drm_file *file_priv);
 
 extern int drm_setsareactx(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv);
@@ -1405,11 +1382,12 @@
 			struct drm_file *file_priv);
 extern int drm_mapbufs(struct drm_device *dev, void *data,
 		       struct drm_file *file_priv);
-extern int drm_order(unsigned long size);
+extern int drm_dma_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file_priv);
 
 				/* DMA support (drm_dma.h) */
-extern int drm_dma_setup(struct drm_device *dev);
-extern void drm_dma_takedown(struct drm_device *dev);
+extern int drm_legacy_dma_setup(struct drm_device *dev);
+extern void drm_legacy_dma_takedown(struct drm_device *dev);
 extern void drm_free_buffer(struct drm_device *dev, struct drm_buf * buf);
 extern void drm_core_reclaim_buffers(struct drm_device *dev,
 				     struct drm_file *filp);
@@ -1423,7 +1401,6 @@
 extern int drm_vblank_init(struct drm_device *dev, int num_crtcs);
 extern int drm_wait_vblank(struct drm_device *dev, void *data,
 			   struct drm_file *filp);
-extern int drm_vblank_wait(struct drm_device *dev, unsigned int *vbl_seq);
 extern u32 drm_vblank_count(struct drm_device *dev, int crtc);
 extern u32 drm_vblank_count_and_time(struct drm_device *dev, int crtc,
 				     struct timeval *vblanktime);
@@ -1465,31 +1442,8 @@
 			   struct drm_file *file_priv);
 
 				/* AGP/GART support (drm_agpsupport.h) */
-extern struct drm_agp_head *drm_agp_init(struct drm_device *dev);
-extern int drm_agp_acquire(struct drm_device *dev);
-extern int drm_agp_acquire_ioctl(struct drm_device *dev, void *data,
-				 struct drm_file *file_priv);
-extern int drm_agp_release(struct drm_device *dev);
-extern int drm_agp_release_ioctl(struct drm_device *dev, void *data,
-				 struct drm_file *file_priv);
-extern int drm_agp_enable(struct drm_device *dev, struct drm_agp_mode mode);
-extern int drm_agp_enable_ioctl(struct drm_device *dev, void *data,
-				struct drm_file *file_priv);
-extern int drm_agp_info(struct drm_device *dev, struct drm_agp_info *info);
-extern int drm_agp_info_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file_priv);
-extern int drm_agp_alloc(struct drm_device *dev, struct drm_agp_buffer *request);
-extern int drm_agp_alloc_ioctl(struct drm_device *dev, void *data,
-			 struct drm_file *file_priv);
-extern int drm_agp_free(struct drm_device *dev, struct drm_agp_buffer *request);
-extern int drm_agp_free_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file_priv);
-extern int drm_agp_unbind(struct drm_device *dev, struct drm_agp_binding *request);
-extern int drm_agp_unbind_ioctl(struct drm_device *dev, void *data,
-			  struct drm_file *file_priv);
-extern int drm_agp_bind(struct drm_device *dev, struct drm_agp_binding *request);
-extern int drm_agp_bind_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file_priv);
+
+#include <drm/drm_agpsupport.h>
 
 				/* Stub support (drm_stub.h) */
 extern int drm_setmaster_ioctl(struct drm_device *dev, void *data,
@@ -1504,23 +1458,19 @@
 extern int drm_put_minor(struct drm_minor **minor);
 extern void drm_unplug_dev(struct drm_device *dev);
 extern unsigned int drm_debug;
+extern unsigned int drm_rnodes;
 
 extern unsigned int drm_vblank_offdelay;
 extern unsigned int drm_timestamp_precision;
 extern unsigned int drm_timestamp_monotonic;
 
 extern struct class *drm_class;
-extern struct proc_dir_entry *drm_proc_root;
 extern struct dentry *drm_debugfs_root;
 
 extern struct idr drm_minors_idr;
 
 extern struct drm_local_map *drm_getsarea(struct drm_device *dev);
 
-				/* Proc support (drm_proc.h) */
-extern int drm_proc_init(struct drm_minor *minor, struct proc_dir_entry *root);
-extern int drm_proc_cleanup(struct drm_minor *minor, struct proc_dir_entry *root);
-
 				/* Debugfs support */
 #if defined(CONFIG_DEBUG_FS)
 extern int drm_debugfs_init(struct drm_minor *minor, int minor_id,
@@ -1550,6 +1500,7 @@
 		struct dma_buf *dma_buf);
 extern int drm_gem_prime_fd_to_handle(struct drm_device *dev,
 		struct drm_file *file_priv, int prime_fd, uint32_t *handle);
+extern void drm_gem_dmabuf_release(struct dma_buf *dma_buf);
 
 extern int drm_prime_handle_to_fd_ioctl(struct drm_device *dev, void *data,
 					struct drm_file *file_priv);
@@ -1561,25 +1512,22 @@
 extern struct sg_table *drm_prime_pages_to_sg(struct page **pages, int nr_pages);
 extern void drm_prime_gem_destroy(struct drm_gem_object *obj, struct sg_table *sg);
 
+int drm_gem_dumb_destroy(struct drm_file *file,
+			 struct drm_device *dev,
+			 uint32_t handle);
 
 void drm_prime_init_file_private(struct drm_prime_file_private *prime_fpriv);
 void drm_prime_destroy_file_private(struct drm_prime_file_private *prime_fpriv);
-int drm_prime_lookup_buf_handle(struct drm_prime_file_private *prime_fpriv, struct dma_buf *dma_buf, uint32_t *handle);
-void drm_prime_remove_buf_handle(struct drm_prime_file_private *prime_fpriv, struct dma_buf *dma_buf);
-
-int drm_prime_add_dma_buf(struct drm_device *dev, struct drm_gem_object *obj);
-int drm_prime_lookup_obj(struct drm_device *dev, struct dma_buf *buf,
-			 struct drm_gem_object **obj);
+void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpriv, struct dma_buf *dma_buf);
 
 #if DRM_DEBUG_CODE
 extern int drm_vma_info(struct seq_file *m, void *data);
 #endif
 
 				/* Scatter Gather Support (drm_scatter.h) */
-extern void drm_sg_cleanup(struct drm_sg_mem * entry);
-extern int drm_sg_alloc_ioctl(struct drm_device *dev, void *data,
+extern void drm_legacy_sg_cleanup(struct drm_device *dev);
+extern int drm_sg_alloc(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
-extern int drm_sg_alloc(struct drm_device *dev, struct drm_scatter_gather * request);
 extern int drm_sg_free(struct drm_device *dev, void *data,
 		       struct drm_file *file_priv);
 
@@ -1613,9 +1561,8 @@
 					    size_t size);
 int drm_gem_object_init(struct drm_device *dev,
 			struct drm_gem_object *obj, size_t size);
-int drm_gem_private_object_init(struct drm_device *dev,
-			struct drm_gem_object *obj, size_t size);
-void drm_gem_object_handle_free(struct drm_gem_object *obj);
+void drm_gem_private_object_init(struct drm_device *dev,
+				 struct drm_gem_object *obj, size_t size);
 void drm_gem_vm_open(struct vm_area_struct *vma);
 void drm_gem_vm_close(struct vm_area_struct *vma);
 int drm_gem_mmap_obj(struct drm_gem_object *obj, unsigned long obj_size,
@@ -1640,66 +1587,32 @@
 static inline void
 drm_gem_object_unreference_unlocked(struct drm_gem_object *obj)
 {
-	if (obj != NULL) {
+	if (obj && !atomic_add_unless(&obj->refcount.refcount, -1, 1)) {
 		struct drm_device *dev = obj->dev;
+
 		mutex_lock(&dev->struct_mutex);
-		kref_put(&obj->refcount, drm_gem_object_free);
+		if (likely(atomic_dec_and_test(&obj->refcount.refcount)))
+			drm_gem_object_free(&obj->refcount);
 		mutex_unlock(&dev->struct_mutex);
 	}
 }
 
+int drm_gem_handle_create_tail(struct drm_file *file_priv,
+			       struct drm_gem_object *obj,
+			       u32 *handlep);
 int drm_gem_handle_create(struct drm_file *file_priv,
 			  struct drm_gem_object *obj,
 			  u32 *handlep);
 int drm_gem_handle_delete(struct drm_file *filp, u32 handle);
 
-static inline void
-drm_gem_object_handle_reference(struct drm_gem_object *obj)
-{
-	drm_gem_object_reference(obj);
-	atomic_inc(&obj->handle_count);
-}
-
-static inline void
-drm_gem_object_handle_unreference(struct drm_gem_object *obj)
-{
-	if (obj == NULL)
-		return;
-
-	if (atomic_read(&obj->handle_count) == 0)
-		return;
-	/*
-	 * Must bump handle count first as this may be the last
-	 * ref, in which case the object would disappear before we
-	 * checked for a name
-	 */
-	if (atomic_dec_and_test(&obj->handle_count))
-		drm_gem_object_handle_free(obj);
-	drm_gem_object_unreference(obj);
-}
-
-static inline void
-drm_gem_object_handle_unreference_unlocked(struct drm_gem_object *obj)
-{
-	if (obj == NULL)
-		return;
-
-	if (atomic_read(&obj->handle_count) == 0)
-		return;
-
-	/*
-	* Must bump handle count first as this may be the last
-	* ref, in which case the object would disappear before we
-	* checked for a name
-	*/
-
-	if (atomic_dec_and_test(&obj->handle_count))
-		drm_gem_object_handle_free(obj);
-	drm_gem_object_unreference_unlocked(obj);
-}
 
 void drm_gem_free_mmap_offset(struct drm_gem_object *obj);
 int drm_gem_create_mmap_offset(struct drm_gem_object *obj);
+int drm_gem_create_mmap_offset_size(struct drm_gem_object *obj, size_t size);
+
+struct page **drm_gem_get_pages(struct drm_gem_object *obj, gfp_t gfpmask);
+void drm_gem_put_pages(struct drm_gem_object *obj, struct page **pages,
+		bool dirty, bool accessed);
 
 struct drm_gem_object *drm_gem_object_lookup(struct drm_device *dev,
 					     struct drm_file *filp,
@@ -1769,9 +1682,6 @@
 extern int drm_platform_init(struct drm_driver *driver, struct platform_device *platform_device);
 extern void drm_platform_exit(struct drm_driver *driver, struct platform_device *platform_device);
 
-extern int drm_get_platform_dev(struct platform_device *pdev,
-				struct drm_driver *driver);
-
 /* returns true if currently okay to sleep */
 static __inline__ bool drm_can_sleep(void)
 {
diff --git a/include/drm/drm_agpsupport.h b/include/drm/drm_agpsupport.h
new file mode 100644
index 0000000..a184eee
--- /dev/null
+++ b/include/drm/drm_agpsupport.h
@@ -0,0 +1,194 @@
+#ifndef _DRM_AGPSUPPORT_H_
+#define _DRM_AGPSUPPORT_H_
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/agp_backend.h>
+#include <drm/drmP.h>
+
+#if __OS_HAS_AGP
+
+void drm_free_agp(DRM_AGP_MEM * handle, int pages);
+int drm_bind_agp(DRM_AGP_MEM * handle, unsigned int start);
+int drm_unbind_agp(DRM_AGP_MEM * handle);
+DRM_AGP_MEM *drm_agp_bind_pages(struct drm_device *dev,
+				struct page **pages,
+				unsigned long num_pages,
+				uint32_t gtt_offset,
+				uint32_t type);
+
+struct drm_agp_head *drm_agp_init(struct drm_device *dev);
+void drm_agp_destroy(struct drm_agp_head *agp);
+void drm_agp_clear(struct drm_device *dev);
+int drm_agp_acquire(struct drm_device *dev);
+int drm_agp_acquire_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file_priv);
+int drm_agp_release(struct drm_device *dev);
+int drm_agp_release_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file_priv);
+int drm_agp_enable(struct drm_device *dev, struct drm_agp_mode mode);
+int drm_agp_enable_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file_priv);
+int drm_agp_info(struct drm_device *dev, struct drm_agp_info *info);
+int drm_agp_info_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *file_priv);
+int drm_agp_alloc(struct drm_device *dev, struct drm_agp_buffer *request);
+int drm_agp_alloc_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file_priv);
+int drm_agp_free(struct drm_device *dev, struct drm_agp_buffer *request);
+int drm_agp_free_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *file_priv);
+int drm_agp_unbind(struct drm_device *dev, struct drm_agp_binding *request);
+int drm_agp_unbind_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file_priv);
+int drm_agp_bind(struct drm_device *dev, struct drm_agp_binding *request);
+int drm_agp_bind_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *file_priv);
+
+static inline int drm_core_has_AGP(struct drm_device *dev)
+{
+	return drm_core_check_feature(dev, DRIVER_USE_AGP);
+}
+
+#else /* __OS_HAS_AGP */
+
+static inline void drm_free_agp(DRM_AGP_MEM * handle, int pages)
+{
+}
+
+static inline int drm_bind_agp(DRM_AGP_MEM * handle, unsigned int start)
+{
+	return -ENODEV;
+}
+
+static inline int drm_unbind_agp(DRM_AGP_MEM * handle)
+{
+	return -ENODEV;
+}
+
+static inline DRM_AGP_MEM *drm_agp_bind_pages(struct drm_device *dev,
+					      struct page **pages,
+					      unsigned long num_pages,
+					      uint32_t gtt_offset,
+					      uint32_t type)
+{
+	return NULL;
+}
+
+static inline struct drm_agp_head *drm_agp_init(struct drm_device *dev)
+{
+	return NULL;
+}
+
+static inline void drm_agp_destroy(struct drm_agp_head *agp)
+{
+}
+
+static inline void drm_agp_clear(struct drm_device *dev)
+{
+}
+
+static inline int drm_agp_acquire(struct drm_device *dev)
+{
+	return -ENODEV;
+}
+
+static inline int drm_agp_acquire_ioctl(struct drm_device *dev, void *data,
+					struct drm_file *file_priv)
+{
+	return -ENODEV;
+}
+
+static inline int drm_agp_release(struct drm_device *dev)
+{
+	return -ENODEV;
+}
+
+static inline int drm_agp_release_ioctl(struct drm_device *dev, void *data,
+					struct drm_file *file_priv)
+{
+	return -ENODEV;
+}
+
+static inline int drm_agp_enable(struct drm_device *dev,
+				 struct drm_agp_mode mode)
+{
+	return -ENODEV;
+}
+
+static inline int drm_agp_enable_ioctl(struct drm_device *dev, void *data,
+				       struct drm_file *file_priv)
+{
+	return -ENODEV;
+}
+
+static inline int drm_agp_info(struct drm_device *dev,
+			       struct drm_agp_info *info)
+{
+	return -ENODEV;
+}
+
+static inline int drm_agp_info_ioctl(struct drm_device *dev, void *data,
+				     struct drm_file *file_priv)
+{
+	return -ENODEV;
+}
+
+static inline int drm_agp_alloc(struct drm_device *dev,
+				struct drm_agp_buffer *request)
+{
+	return -ENODEV;
+}
+
+static inline int drm_agp_alloc_ioctl(struct drm_device *dev, void *data,
+				      struct drm_file *file_priv)
+{
+	return -ENODEV;
+}
+
+static inline int drm_agp_free(struct drm_device *dev,
+			       struct drm_agp_buffer *request)
+{
+	return -ENODEV;
+}
+
+static inline int drm_agp_free_ioctl(struct drm_device *dev, void *data,
+				     struct drm_file *file_priv)
+{
+	return -ENODEV;
+}
+
+static inline int drm_agp_unbind(struct drm_device *dev,
+				 struct drm_agp_binding *request)
+{
+	return -ENODEV;
+}
+
+static inline int drm_agp_unbind_ioctl(struct drm_device *dev, void *data,
+				       struct drm_file *file_priv)
+{
+	return -ENODEV;
+}
+
+static inline int drm_agp_bind(struct drm_device *dev,
+			       struct drm_agp_binding *request)
+{
+	return -ENODEV;
+}
+
+static inline int drm_agp_bind_ioctl(struct drm_device *dev, void *data,
+				     struct drm_file *file_priv)
+{
+	return -ENODEV;
+}
+
+static inline int drm_core_has_AGP(struct drm_device *dev)
+{
+	return 0;
+}
+
+#endif /* __OS_HAS_AGP */
+
+#endif /* _DRM_AGPSUPPORT_H_ */
diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h
index fa12a2f..24f4995 100644
--- a/include/drm/drm_crtc.h
+++ b/include/drm/drm_crtc.h
@@ -49,6 +49,7 @@
 #define DRM_MODE_OBJECT_FB 0xfbfbfbfb
 #define DRM_MODE_OBJECT_BLOB 0xbbbbbbbb
 #define DRM_MODE_OBJECT_PLANE 0xeeeeeeee
+#define DRM_MODE_OBJECT_BRIDGE 0xbdbdbdbd
 
 struct drm_mode_object {
 	uint32_t id;
@@ -305,6 +306,7 @@
 struct drm_encoder;
 struct drm_pending_vblank_event;
 struct drm_plane;
+struct drm_bridge;
 
 /**
  * drm_crtc_funcs - control CRTCs for a given device
@@ -363,7 +365,8 @@
 	 */
 	int (*page_flip)(struct drm_crtc *crtc,
 			 struct drm_framebuffer *fb,
-			 struct drm_pending_vblank_event *event);
+			 struct drm_pending_vblank_event *event,
+			 uint32_t flags);
 
 	int (*set_property)(struct drm_crtc *crtc,
 			    struct drm_property *property, uint64_t val);
@@ -494,8 +497,6 @@
 	void (*destroy)(struct drm_encoder *encoder);
 };
 
-#define DRM_CONNECTOR_MAX_UMODES 16
-#define DRM_CONNECTOR_LEN 32
 #define DRM_CONNECTOR_MAX_ENCODER 3
 
 /**
@@ -507,6 +508,7 @@
  * @possible_crtcs: bitmask of potential CRTC bindings
  * @possible_clones: bitmask of potential sibling encoders for cloning
  * @crtc: currently bound CRTC
+ * @bridge: bridge associated to the encoder
  * @funcs: control functions
  * @helper_private: mid-layer private data
  *
@@ -523,6 +525,7 @@
 	uint32_t possible_clones;
 
 	struct drm_crtc *crtc;
+	struct drm_bridge *bridge;
 	const struct drm_encoder_funcs *funcs;
 	void *helper_private;
 };
@@ -683,6 +686,48 @@
 };
 
 /**
+ * drm_bridge_funcs - drm_bridge control functions
+ * @mode_fixup: Try to fixup (or reject entirely) proposed mode for this bridge
+ * @disable: Called right before encoder prepare, disables the bridge
+ * @post_disable: Called right after encoder prepare, for lockstepped disable
+ * @mode_set: Set this mode to the bridge
+ * @pre_enable: Called right before encoder commit, for lockstepped commit
+ * @enable: Called right after encoder commit, enables the bridge
+ * @destroy: make object go away
+ */
+struct drm_bridge_funcs {
+	bool (*mode_fixup)(struct drm_bridge *bridge,
+			   const struct drm_display_mode *mode,
+			   struct drm_display_mode *adjusted_mode);
+	void (*disable)(struct drm_bridge *bridge);
+	void (*post_disable)(struct drm_bridge *bridge);
+	void (*mode_set)(struct drm_bridge *bridge,
+			 struct drm_display_mode *mode,
+			 struct drm_display_mode *adjusted_mode);
+	void (*pre_enable)(struct drm_bridge *bridge);
+	void (*enable)(struct drm_bridge *bridge);
+	void (*destroy)(struct drm_bridge *bridge);
+};
+
+/**
+ * drm_bridge - central DRM bridge control structure
+ * @dev: DRM device this bridge belongs to
+ * @head: list management
+ * @base: base mode object
+ * @funcs: control functions
+ * @driver_private: pointer to the bridge driver's internal context
+ */
+struct drm_bridge {
+	struct drm_device *dev;
+	struct list_head head;
+
+	struct drm_mode_object base;
+
+	const struct drm_bridge_funcs *funcs;
+	void *driver_private;
+};
+
+/**
  * drm_mode_set - new values for a CRTC config change
  * @head: list management
  * @fb: framebuffer to use for new config
@@ -742,6 +787,7 @@
 	uint32_t num_crtcs;
 	uint32_t num_encoders;
 	uint32_t num_connectors;
+	uint32_t num_bridges;
 
 	/* list of object IDs for this group */
 	uint32_t *id_list;
@@ -756,6 +802,8 @@
  * @fb_list: list of framebuffers available
  * @num_connector: number of connectors on this device
  * @connector_list: list of connector objects
+ * @num_bridge: number of bridges on this device
+ * @bridge_list: list of bridge objects
  * @num_encoder: number of encoders on this device
  * @encoder_list: list of encoder objects
  * @num_crtc: number of CRTCs on this device
@@ -793,6 +841,8 @@
 
 	int num_connector;
 	struct list_head connector_list;
+	int num_bridge;
+	struct list_head bridge_list;
 	int num_encoder;
 	struct list_head encoder_list;
 	int num_plane;
@@ -839,11 +889,13 @@
 
 	/* Optional properties */
 	struct drm_property *scaling_mode_property;
-	struct drm_property *dithering_mode_property;
 	struct drm_property *dirty_info_property;
 
 	/* dumb ioctl parameters */
 	uint32_t preferred_depth, prefer_shadow;
+
+	/* whether async page flip is supported or not */
+	bool async_page_flip;
 };
 
 #define obj_to_crtc(x) container_of(x, struct drm_crtc, base)
@@ -869,6 +921,8 @@
 			 const struct drm_crtc_funcs *funcs);
 extern void drm_crtc_cleanup(struct drm_crtc *crtc);
 
+extern void drm_connector_ida_init(void);
+extern void drm_connector_ida_destroy(void);
 extern int drm_connector_init(struct drm_device *dev,
 			      struct drm_connector *connector,
 			      const struct drm_connector_funcs *funcs,
@@ -878,6 +932,10 @@
 /* helper to unplug all connectors from sysfs for device */
 extern void drm_connector_unplug_all(struct drm_device *dev);
 
+extern int drm_bridge_init(struct drm_device *dev, struct drm_bridge *bridge,
+			   const struct drm_bridge_funcs *funcs);
+extern void drm_bridge_cleanup(struct drm_bridge *bridge);
+
 extern int drm_encoder_init(struct drm_device *dev,
 			    struct drm_encoder *encoder,
 			    const struct drm_encoder_funcs *funcs,
@@ -908,7 +966,6 @@
 				 struct i2c_adapter *adapter);
 extern int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid);
 extern void drm_mode_probed_add(struct drm_connector *connector, struct drm_display_mode *mode);
-extern void drm_mode_remove(struct drm_connector *connector, struct drm_display_mode *mode);
 extern void drm_mode_copy(struct drm_display_mode *dst, const struct drm_display_mode *src);
 extern struct drm_display_mode *drm_mode_duplicate(struct drm_device *dev,
 						   const struct drm_display_mode *mode);
@@ -925,14 +982,9 @@
 /* for us by fb module */
 extern struct drm_display_mode *drm_mode_create(struct drm_device *dev);
 extern void drm_mode_destroy(struct drm_device *dev, struct drm_display_mode *mode);
-extern void drm_mode_list_concat(struct list_head *head,
-				 struct list_head *new);
 extern void drm_mode_validate_size(struct drm_device *dev,
 				   struct list_head *mode_list,
 				   int maxX, int maxY, int maxPitch);
-extern void drm_mode_validate_clocks(struct drm_device *dev,
-				     struct list_head *mode_list,
-				     int *min, int *max, int n_ranges);
 extern void drm_mode_prune_invalid(struct drm_device *dev,
 				   struct list_head *mode_list, bool verbose);
 extern void drm_mode_sort(struct list_head *mode_list);
@@ -949,9 +1001,6 @@
 extern int drm_object_property_get_value(struct drm_mode_object *obj,
 					 struct drm_property *property,
 					 uint64_t *value);
-extern struct drm_display_mode *drm_crtc_mode_create(struct drm_device *dev);
-extern void drm_framebuffer_set_object(struct drm_device *dev,
-				       unsigned long handle);
 extern int drm_framebuffer_init(struct drm_device *dev,
 				struct drm_framebuffer *fb,
 				const struct drm_framebuffer_funcs *funcs);
@@ -962,10 +1011,6 @@
 extern void drm_framebuffer_remove(struct drm_framebuffer *fb);
 extern void drm_framebuffer_cleanup(struct drm_framebuffer *fb);
 extern void drm_framebuffer_unregister_private(struct drm_framebuffer *fb);
-extern int drmfb_probe(struct drm_device *dev, struct drm_crtc *crtc);
-extern int drmfb_remove(struct drm_device *dev, struct drm_framebuffer *fb);
-extern void drm_crtc_probe_connector_modes(struct drm_device *dev, int maxX, int maxY);
-extern bool drm_crtc_in_use(struct drm_crtc *crtc);
 
 extern void drm_object_attach_property(struct drm_mode_object *obj,
 				       struct drm_property *property,
@@ -990,7 +1035,6 @@
 extern int drm_mode_create_tv_properties(struct drm_device *dev, int num_formats,
 				     char *formats[]);
 extern int drm_mode_create_scaling_mode_property(struct drm_device *dev);
-extern int drm_mode_create_dithering_property(struct drm_device *dev);
 extern int drm_mode_create_dirty_info_property(struct drm_device *dev);
 extern const char *drm_get_encoder_name(const struct drm_encoder *encoder);
 
@@ -1040,17 +1084,12 @@
 				  void *data, struct drm_file *file_priv);
 extern int drm_mode_connector_property_set_ioctl(struct drm_device *dev,
 					      void *data, struct drm_file *file_priv);
-extern int drm_mode_hotplug_ioctl(struct drm_device *dev,
-				  void *data, struct drm_file *file_priv);
-extern int drm_mode_replacefb(struct drm_device *dev,
-			      void *data, struct drm_file *file_priv);
 extern int drm_mode_getencoder(struct drm_device *dev,
 			       void *data, struct drm_file *file_priv);
 extern int drm_mode_gamma_get_ioctl(struct drm_device *dev,
 				    void *data, struct drm_file *file_priv);
 extern int drm_mode_gamma_set_ioctl(struct drm_device *dev,
 				    void *data, struct drm_file *file_priv);
-extern u8 *drm_find_cea_extension(struct edid *edid);
 extern u8 drm_match_cea_mode(const struct drm_display_mode *to_match);
 extern bool drm_detect_hdmi_monitor(struct edid *edid);
 extern bool drm_detect_monitor_audio(struct edid *edid);
diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h
index e8e1417..ae8dbfb 100644
--- a/include/drm/drm_dp_helper.h
+++ b/include/drm/drm_dp_helper.h
@@ -342,13 +342,42 @@
 u8 drm_dp_get_adjust_request_pre_emphasis(u8 link_status[DP_LINK_STATUS_SIZE],
 					  int lane);
 
-#define DP_RECEIVER_CAP_SIZE	0xf
+#define DP_RECEIVER_CAP_SIZE		0xf
+#define EDP_PSR_RECEIVER_CAP_SIZE	2
+
 void drm_dp_link_train_clock_recovery_delay(u8 dpcd[DP_RECEIVER_CAP_SIZE]);
 void drm_dp_link_train_channel_eq_delay(u8 dpcd[DP_RECEIVER_CAP_SIZE]);
 
 u8 drm_dp_link_rate_to_bw_code(int link_rate);
 int drm_dp_bw_code_to_link_rate(u8 link_bw);
 
+struct edp_sdp_header {
+	u8 HB0; /* Secondary Data Packet ID */
+	u8 HB1; /* Secondary Data Packet Type */
+	u8 HB2; /* 7:5 reserved, 4:0 revision number */
+	u8 HB3; /* 7:5 reserved, 4:0 number of valid data bytes */
+} __packed;
+
+#define EDP_SDP_HEADER_REVISION_MASK		0x1F
+#define EDP_SDP_HEADER_VALID_PAYLOAD_BYTES	0x1F
+
+struct edp_vsc_psr {
+	struct edp_sdp_header sdp_header;
+	u8 DB0; /* Stereo Interface */
+	u8 DB1; /* 0 - PSR State; 1 - Update RFB; 2 - CRC Valid */
+	u8 DB2; /* CRC value bits 7:0 of the R or Cr component */
+	u8 DB3; /* CRC value bits 15:8 of the R or Cr component */
+	u8 DB4; /* CRC value bits 7:0 of the G or Y component */
+	u8 DB5; /* CRC value bits 15:8 of the G or Y component */
+	u8 DB6; /* CRC value bits 7:0 of the B or Cb component */
+	u8 DB7; /* CRC value bits 15:8 of the B or Cb component */
+	u8 DB8_31[24]; /* Reserved */
+} __packed;
+
+#define EDP_VSC_PSR_STATE_ACTIVE	(1<<0)
+#define EDP_VSC_PSR_UPDATE_RFB		(1<<1)
+#define EDP_VSC_PSR_CRC_VALUES_VALID	(1<<2)
+
 static inline int
 drm_dp_max_link_rate(u8 dpcd[DP_RECEIVER_CAP_SIZE])
 {
diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h
index fc481fc..a1441c5 100644
--- a/include/drm/drm_edid.h
+++ b/include/drm/drm_edid.h
@@ -256,9 +256,11 @@
 struct drm_connector;
 struct drm_display_mode;
 struct hdmi_avi_infoframe;
+struct hdmi_vendor_infoframe;
 
 void drm_edid_to_eld(struct drm_connector *connector, struct edid *edid);
 int drm_edid_to_sad(struct edid *edid, struct cea_sad **sads);
+int drm_edid_to_speaker_allocation(struct edid *edid, u8 **sadb);
 int drm_av_sync_delay(struct drm_connector *connector,
 		      struct drm_display_mode *mode);
 struct drm_connector *drm_select_eld(struct drm_encoder *encoder,
@@ -268,5 +270,8 @@
 int
 drm_hdmi_avi_infoframe_from_display_mode(struct hdmi_avi_infoframe *frame,
 					 const struct drm_display_mode *mode);
+int
+drm_hdmi_vendor_infoframe_from_display_mode(struct hdmi_vendor_infoframe *frame,
+					    const struct drm_display_mode *mode);
 
 #endif /* __DRM_EDID_H__ */
diff --git a/include/drm/drm_fb_cma_helper.h b/include/drm/drm_fb_cma_helper.h
index 4a3fc24..c54cf3d 100644
--- a/include/drm/drm_fb_cma_helper.h
+++ b/include/drm/drm_fb_cma_helper.h
@@ -24,7 +24,6 @@
 	unsigned int plane);
 
 #ifdef CONFIG_DEBUG_FS
-void drm_fb_cma_describe(struct drm_framebuffer *fb, struct seq_file *m);
 int drm_fb_cma_debugfs_show(struct seq_file *m, void *arg);
 #endif
 
diff --git a/include/drm/drm_fixed.h b/include/drm/drm_fixed.h
index f5e1168..d639049 100644
--- a/include/drm/drm_fixed.h
+++ b/include/drm/drm_fixed.h
@@ -84,12 +84,12 @@
 	return ((s64)a) >> DRM_FIXED_POINT;
 }
 
-static inline s64 drm_fixp_msbset(int64_t a)
+static inline unsigned drm_fixp_msbset(int64_t a)
 {
 	unsigned shift, sign = (a >> 63) & 1;
 
 	for (shift = 62; shift > 0; --shift)
-		if ((a >> shift) != sign)
+		if (((a >> shift) & 1) != sign)
 			return shift;
 
 	return 0;
@@ -100,9 +100,9 @@
 	unsigned shift = drm_fixp_msbset(a) + drm_fixp_msbset(b);
 	s64 result;
 
-	if (shift > 63) {
-		shift = shift - 63;
-		a >>= shift >> 1;
+	if (shift > 61) {
+		shift = shift - 61;
+		a >>= (shift >> 1) + (shift & 1);
 		b >>= shift >> 1;
 	} else
 		shift = 0;
@@ -120,7 +120,7 @@
 
 static inline s64 drm_fixp_div(s64 a, s64 b)
 {
-	unsigned shift = 63 - drm_fixp_msbset(a);
+	unsigned shift = 62 - drm_fixp_msbset(a);
 	s64 result;
 
 	a <<= shift;
@@ -154,7 +154,7 @@
 	}
 
 	if (x < 0)
-		sum = drm_fixp_div(1, sum);
+		sum = drm_fixp_div(DRM_FIXED_ONE, sum);
 
 	return sum;
 }
diff --git a/include/drm/drm_flip_work.h b/include/drm/drm_flip_work.h
new file mode 100644
index 0000000..35c776a
--- /dev/null
+++ b/include/drm/drm_flip_work.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef DRM_FLIP_WORK_H
+#define DRM_FLIP_WORK_H
+
+#include <linux/kfifo.h>
+#include <linux/workqueue.h>
+
+/**
+ * DOC: flip utils
+ *
+ * Util to queue up work to run from work-queue context after flip/vblank.
+ * Typically this can be used to defer unref of framebuffer's, cursor
+ * bo's, etc until after vblank.  The APIs are all safe (and lockless)
+ * for up to one producer and once consumer at a time.  The single-consumer
+ * aspect is ensured by committing the queued work to a single work-queue.
+ */
+
+struct drm_flip_work;
+
+/*
+ * drm_flip_func_t - callback function
+ *
+ * @work: the flip work
+ * @val: value queued via drm_flip_work_queue()
+ *
+ * Callback function to be called for each of the  queue'd work items after
+ * drm_flip_work_commit() is called.
+ */
+typedef void (*drm_flip_func_t)(struct drm_flip_work *work, void *val);
+
+/**
+ * struct drm_flip_work - flip work queue
+ * @name: debug name
+ * @pending: number of queued but not committed items
+ * @count: number of committed items
+ * @func: callback fxn called for each committed item
+ * @worker: worker which calls @func
+ */
+struct drm_flip_work {
+	const char *name;
+	atomic_t pending, count;
+	drm_flip_func_t func;
+	struct work_struct worker;
+	DECLARE_KFIFO_PTR(fifo, void *);
+};
+
+void drm_flip_work_queue(struct drm_flip_work *work, void *val);
+void drm_flip_work_commit(struct drm_flip_work *work,
+		struct workqueue_struct *wq);
+int drm_flip_work_init(struct drm_flip_work *work, int size,
+		const char *name, drm_flip_func_t func);
+void drm_flip_work_cleanup(struct drm_flip_work *work);
+
+#endif  /* DRM_FLIP_WORK_H */
diff --git a/include/drm/drm_gem_cma_helper.h b/include/drm/drm_gem_cma_helper.h
index c34f27f..89b4d7d 100644
--- a/include/drm/drm_gem_cma_helper.h
+++ b/include/drm/drm_gem_cma_helper.h
@@ -30,14 +30,6 @@
 /* set vm_flags and we can change the vm attribute to other one at here. */
 int drm_gem_cma_mmap(struct file *filp, struct vm_area_struct *vma);
 
-/*
- * destroy memory region allocated.
- *	- a gem handle and physical memory region pointed by a gem object
- *	would be released by drm_gem_handle_delete().
- */
-int drm_gem_cma_dumb_destroy(struct drm_file *file_priv,
-		struct drm_device *drm, unsigned int handle);
-
 /* allocate physical memory. */
 struct drm_gem_cma_object *drm_gem_cma_create(struct drm_device *drm,
 		unsigned int size);
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index 4d06edb..cba6786 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -36,11 +36,19 @@
 /*
  * Generic range manager structs
  */
+#include <linux/bug.h>
+#include <linux/kernel.h>
 #include <linux/list.h>
+#include <linux/spinlock.h>
 #ifdef CONFIG_DEBUG_FS
 #include <linux/seq_file.h>
 #endif
 
+enum drm_mm_search_flags {
+	DRM_MM_SEARCH_DEFAULT =		0,
+	DRM_MM_SEARCH_BEST =		1 << 0,
+};
+
 struct drm_mm_node {
 	struct list_head node_list;
 	struct list_head hole_stack;
@@ -62,9 +70,6 @@
 	/* head_node.node_list is the list of all memory nodes, ordered
 	 * according to the (increasing) start address of the memory node. */
 	struct drm_mm_node head_node;
-	struct list_head unused_nodes;
-	int num_unused;
-	spinlock_t unused_lock;
 	unsigned int scan_check_range : 1;
 	unsigned scan_alignment;
 	unsigned long scan_color;
@@ -115,13 +120,6 @@
 #define drm_mm_for_each_node(entry, mm) list_for_each_entry(entry, \
 						&(mm)->head_node.node_list, \
 						node_list)
-#define drm_mm_for_each_scanned_node_reverse(entry, n, mm) \
-	for (entry = (mm)->prev_scanned_node, \
-		next = entry ? list_entry(entry->node_list.next, \
-			struct drm_mm_node, node_list) : NULL; \
-	     entry != NULL; entry = next, \
-		next = entry ? list_entry(entry->node_list.next, \
-			struct drm_mm_node, node_list) : NULL) \
 
 /* Note that we need to unroll list_for_each_entry in order to inline
  * setting hole_start and hole_end on each iteration and keep the
@@ -138,124 +136,50 @@
 /*
  * Basic range manager support (drm_mm.c)
  */
-extern struct drm_mm_node *drm_mm_create_block(struct drm_mm *mm,
-					       unsigned long start,
-					       unsigned long size,
-					       bool atomic);
-extern struct drm_mm_node *drm_mm_get_block_generic(struct drm_mm_node *node,
-						    unsigned long size,
-						    unsigned alignment,
-						    unsigned long color,
-						    int atomic);
-extern struct drm_mm_node *drm_mm_get_block_range_generic(
-						struct drm_mm_node *node,
-						unsigned long size,
-						unsigned alignment,
-						unsigned long color,
-						unsigned long start,
-						unsigned long end,
-						int atomic);
-static inline struct drm_mm_node *drm_mm_get_block(struct drm_mm_node *parent,
-						   unsigned long size,
-						   unsigned alignment)
-{
-	return drm_mm_get_block_generic(parent, size, alignment, 0, 0);
-}
-static inline struct drm_mm_node *drm_mm_get_block_atomic(struct drm_mm_node *parent,
-							  unsigned long size,
-							  unsigned alignment)
-{
-	return drm_mm_get_block_generic(parent, size, alignment, 0, 1);
-}
-static inline struct drm_mm_node *drm_mm_get_block_range(
-						struct drm_mm_node *parent,
-						unsigned long size,
-						unsigned alignment,
-						unsigned long start,
-						unsigned long end)
-{
-	return drm_mm_get_block_range_generic(parent, size, alignment, 0,
-					      start, end, 0);
-}
-static inline struct drm_mm_node *drm_mm_get_block_atomic_range(
-						struct drm_mm_node *parent,
-						unsigned long size,
-						unsigned alignment,
-						unsigned long start,
-						unsigned long end)
-{
-	return drm_mm_get_block_range_generic(parent, size, alignment, 0,
-						start, end, 1);
-}
+extern int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node);
 
-extern int drm_mm_insert_node(struct drm_mm *mm,
-			      struct drm_mm_node *node,
-			      unsigned long size,
-			      unsigned alignment);
-extern int drm_mm_insert_node_in_range(struct drm_mm *mm,
-				       struct drm_mm_node *node,
-				       unsigned long size,
-				       unsigned alignment,
-				       unsigned long start,
-				       unsigned long end);
 extern int drm_mm_insert_node_generic(struct drm_mm *mm,
 				      struct drm_mm_node *node,
 				      unsigned long size,
 				      unsigned alignment,
-				      unsigned long color);
+				      unsigned long color,
+				      enum drm_mm_search_flags flags);
+static inline int drm_mm_insert_node(struct drm_mm *mm,
+				     struct drm_mm_node *node,
+				     unsigned long size,
+				     unsigned alignment,
+				     enum drm_mm_search_flags flags)
+{
+	return drm_mm_insert_node_generic(mm, node, size, alignment, 0, flags);
+}
+
 extern int drm_mm_insert_node_in_range_generic(struct drm_mm *mm,
 				       struct drm_mm_node *node,
 				       unsigned long size,
 				       unsigned alignment,
 				       unsigned long color,
 				       unsigned long start,
-				       unsigned long end);
-extern void drm_mm_put_block(struct drm_mm_node *cur);
-extern void drm_mm_remove_node(struct drm_mm_node *node);
-extern void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new);
-extern struct drm_mm_node *drm_mm_search_free_generic(const struct drm_mm *mm,
-						      unsigned long size,
-						      unsigned alignment,
-						      unsigned long color,
-						      bool best_match);
-extern struct drm_mm_node *drm_mm_search_free_in_range_generic(
-						const struct drm_mm *mm,
-						unsigned long size,
-						unsigned alignment,
-						unsigned long color,
-						unsigned long start,
-						unsigned long end,
-						bool best_match);
-static inline struct drm_mm_node *drm_mm_search_free(const struct drm_mm *mm,
-						     unsigned long size,
-						     unsigned alignment,
-						     bool best_match)
+				       unsigned long end,
+				       enum drm_mm_search_flags flags);
+static inline int drm_mm_insert_node_in_range(struct drm_mm *mm,
+					      struct drm_mm_node *node,
+					      unsigned long size,
+					      unsigned alignment,
+					      unsigned long start,
+					      unsigned long end,
+					      enum drm_mm_search_flags flags)
 {
-	return drm_mm_search_free_generic(mm,size, alignment, 0, best_match);
-}
-static inline  struct drm_mm_node *drm_mm_search_free_in_range(
-						const struct drm_mm *mm,
-						unsigned long size,
-						unsigned alignment,
-						unsigned long start,
-						unsigned long end,
-						bool best_match)
-{
-	return drm_mm_search_free_in_range_generic(mm, size, alignment, 0,
-						   start, end, best_match);
+	return drm_mm_insert_node_in_range_generic(mm, node, size, alignment,
+						   0, start, end, flags);
 }
 
+extern void drm_mm_remove_node(struct drm_mm_node *node);
+extern void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new);
 extern void drm_mm_init(struct drm_mm *mm,
 			unsigned long start,
 			unsigned long size);
 extern void drm_mm_takedown(struct drm_mm *mm);
 extern int drm_mm_clean(struct drm_mm *mm);
-extern int drm_mm_pre_get(struct drm_mm *mm);
-
-static inline struct drm_mm *drm_get_mm(struct drm_mm_node *block)
-{
-	return block->mm;
-}
 
 void drm_mm_init_scan(struct drm_mm *mm,
 		      unsigned long size,
diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index 34efaf6..fd54a14 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -1,4 +1,22 @@
 #define radeon_PCI_IDS \
+	{0x1002, 0x1304, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x1305, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x1306, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x1307, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x1309, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x130A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x130B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x130C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x130D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x130E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x130F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x1310, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x1311, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x1313, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x1315, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x1316, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x131B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
+	{0x1002, 0x131C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_KAVERI|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \
 	{0x1002, 0x3150, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \
 	{0x1002, 0x3151, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
 	{0x1002, 0x3152, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \
@@ -690,29 +708,6 @@
 	{0x102b, 0x2527, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MGA_CARD_TYPE_G550}, \
 	{0, 0, 0}
 
-#define mach64_PCI_IDS \
-	{0x1002, 0x4749, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
-	{0x1002, 0x4750, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
-	{0x1002, 0x4751, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
-	{0x1002, 0x4742, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
-	{0x1002, 0x4744, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
-	{0x1002, 0x4c49, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
-	{0x1002, 0x4c50, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
-	{0x1002, 0x4c51, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
-	{0x1002, 0x4c42, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
-	{0x1002, 0x4c44, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
-	{0x1002, 0x474c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
-	{0x1002, 0x474f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
-	{0x1002, 0x4752, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
-	{0x1002, 0x4753, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
-	{0x1002, 0x474d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
-	{0x1002, 0x474e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
-	{0x1002, 0x4c52, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
-	{0x1002, 0x4c53, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
-	{0x1002, 0x4c4d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
-	{0x1002, 0x4c4e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
-	{0, 0, 0}
-
 #define sisdrv_PCI_IDS \
 	{0x1039, 0x0300, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 	{0x1039, 0x5300, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
@@ -752,10 +747,6 @@
 	{0x8086, 0x1132, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 	{0, 0, 0}
 
-#define gamma_PCI_IDS \
-	{0x3d3d, 0x0008, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
-	{0, 0, 0}
-
 #define savage_PCI_IDS \
 	{0x5333, 0x8a20, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_SAVAGE3D}, \
 	{0x5333, 0x8a21, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_SAVAGE3D}, \
@@ -781,6 +772,3 @@
 	{0x5333, 0x8d03, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_PROSAVAGEDDR}, \
 	{0x5333, 0x8d04, PCI_ANY_ID, PCI_ANY_ID, 0, 0, S3_PROSAVAGEDDR}, \
 	{0, 0, 0}
-
-#define ffb_PCI_IDS \
-	{0, 0, 0}
diff --git a/include/drm/drm_vma_manager.h b/include/drm/drm_vma_manager.h
new file mode 100644
index 0000000..c18a593
--- /dev/null
+++ b/include/drm/drm_vma_manager.h
@@ -0,0 +1,257 @@
+#ifndef __DRM_VMA_MANAGER_H__
+#define __DRM_VMA_MANAGER_H__
+
+/*
+ * Copyright (c) 2013 David Herrmann <dh.herrmann@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <drm/drm_mm.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+struct drm_vma_offset_file {
+	struct rb_node vm_rb;
+	struct file *vm_filp;
+	unsigned long vm_count;
+};
+
+struct drm_vma_offset_node {
+	rwlock_t vm_lock;
+	struct drm_mm_node vm_node;
+	struct rb_node vm_rb;
+	struct rb_root vm_files;
+};
+
+struct drm_vma_offset_manager {
+	rwlock_t vm_lock;
+	struct rb_root vm_addr_space_rb;
+	struct drm_mm vm_addr_space_mm;
+};
+
+void drm_vma_offset_manager_init(struct drm_vma_offset_manager *mgr,
+				 unsigned long page_offset, unsigned long size);
+void drm_vma_offset_manager_destroy(struct drm_vma_offset_manager *mgr);
+
+struct drm_vma_offset_node *drm_vma_offset_lookup(struct drm_vma_offset_manager *mgr,
+						  unsigned long start,
+						  unsigned long pages);
+struct drm_vma_offset_node *drm_vma_offset_lookup_locked(struct drm_vma_offset_manager *mgr,
+							   unsigned long start,
+							   unsigned long pages);
+int drm_vma_offset_add(struct drm_vma_offset_manager *mgr,
+		       struct drm_vma_offset_node *node, unsigned long pages);
+void drm_vma_offset_remove(struct drm_vma_offset_manager *mgr,
+			   struct drm_vma_offset_node *node);
+
+int drm_vma_node_allow(struct drm_vma_offset_node *node, struct file *filp);
+void drm_vma_node_revoke(struct drm_vma_offset_node *node, struct file *filp);
+bool drm_vma_node_is_allowed(struct drm_vma_offset_node *node,
+			     struct file *filp);
+
+/**
+ * drm_vma_offset_exact_lookup() - Look up node by exact address
+ * @mgr: Manager object
+ * @start: Start address (page-based, not byte-based)
+ * @pages: Size of object (page-based)
+ *
+ * Same as drm_vma_offset_lookup() but does not allow any offset into the node.
+ * It only returns the exact object with the given start address.
+ *
+ * RETURNS:
+ * Node at exact start address @start.
+ */
+static inline struct drm_vma_offset_node *
+drm_vma_offset_exact_lookup(struct drm_vma_offset_manager *mgr,
+			    unsigned long start,
+			    unsigned long pages)
+{
+	struct drm_vma_offset_node *node;
+
+	node = drm_vma_offset_lookup(mgr, start, pages);
+	return (node && node->vm_node.start == start) ? node : NULL;
+}
+
+/**
+ * drm_vma_offset_lock_lookup() - Lock lookup for extended private use
+ * @mgr: Manager object
+ *
+ * Lock VMA manager for extended lookups. Only *_locked() VMA function calls
+ * are allowed while holding this lock. All other contexts are blocked from VMA
+ * until the lock is released via drm_vma_offset_unlock_lookup().
+ *
+ * Use this if you need to take a reference to the objects returned by
+ * drm_vma_offset_lookup_locked() before releasing this lock again.
+ *
+ * This lock must not be used for anything else than extended lookups. You must
+ * not call any other VMA helpers while holding this lock.
+ *
+ * Note: You're in atomic-context while holding this lock!
+ *
+ * Example:
+ *   drm_vma_offset_lock_lookup(mgr);
+ *   node = drm_vma_offset_lookup_locked(mgr);
+ *   if (node)
+ *       kref_get_unless_zero(container_of(node, sth, entr));
+ *   drm_vma_offset_unlock_lookup(mgr);
+ */
+static inline void drm_vma_offset_lock_lookup(struct drm_vma_offset_manager *mgr)
+{
+	read_lock(&mgr->vm_lock);
+}
+
+/**
+ * drm_vma_offset_unlock_lookup() - Unlock lookup for extended private use
+ * @mgr: Manager object
+ *
+ * Release lookup-lock. See drm_vma_offset_lock_lookup() for more information.
+ */
+static inline void drm_vma_offset_unlock_lookup(struct drm_vma_offset_manager *mgr)
+{
+	read_unlock(&mgr->vm_lock);
+}
+
+/**
+ * drm_vma_node_reset() - Initialize or reset node object
+ * @node: Node to initialize or reset
+ *
+ * Reset a node to its initial state. This must be called before using it with
+ * any VMA offset manager.
+ *
+ * This must not be called on an already allocated node, or you will leak
+ * memory.
+ */
+static inline void drm_vma_node_reset(struct drm_vma_offset_node *node)
+{
+	memset(node, 0, sizeof(*node));
+	node->vm_files = RB_ROOT;
+	rwlock_init(&node->vm_lock);
+}
+
+/**
+ * drm_vma_node_start() - Return start address for page-based addressing
+ * @node: Node to inspect
+ *
+ * Return the start address of the given node. This can be used as offset into
+ * the linear VM space that is provided by the VMA offset manager. Note that
+ * this can only be used for page-based addressing. If you need a proper offset
+ * for user-space mappings, you must apply "<< PAGE_SHIFT" or use the
+ * drm_vma_node_offset_addr() helper instead.
+ *
+ * RETURNS:
+ * Start address of @node for page-based addressing. 0 if the node does not
+ * have an offset allocated.
+ */
+static inline unsigned long drm_vma_node_start(struct drm_vma_offset_node *node)
+{
+	return node->vm_node.start;
+}
+
+/**
+ * drm_vma_node_size() - Return size (page-based)
+ * @node: Node to inspect
+ *
+ * Return the size as number of pages for the given node. This is the same size
+ * that was passed to drm_vma_offset_add(). If no offset is allocated for the
+ * node, this is 0.
+ *
+ * RETURNS:
+ * Size of @node as number of pages. 0 if the node does not have an offset
+ * allocated.
+ */
+static inline unsigned long drm_vma_node_size(struct drm_vma_offset_node *node)
+{
+	return node->vm_node.size;
+}
+
+/**
+ * drm_vma_node_has_offset() - Check whether node is added to offset manager
+ * @node: Node to be checked
+ *
+ * RETURNS:
+ * true iff the node was previously allocated an offset and added to
+ * an vma offset manager.
+ */
+static inline bool drm_vma_node_has_offset(struct drm_vma_offset_node *node)
+{
+	return drm_mm_node_allocated(&node->vm_node);
+}
+
+/**
+ * drm_vma_node_offset_addr() - Return sanitized offset for user-space mmaps
+ * @node: Linked offset node
+ *
+ * Same as drm_vma_node_start() but returns the address as a valid offset that
+ * can be used for user-space mappings during mmap().
+ * This must not be called on unlinked nodes.
+ *
+ * RETURNS:
+ * Offset of @node for byte-based addressing. 0 if the node does not have an
+ * object allocated.
+ */
+static inline __u64 drm_vma_node_offset_addr(struct drm_vma_offset_node *node)
+{
+	return ((__u64)node->vm_node.start) << PAGE_SHIFT;
+}
+
+/**
+ * drm_vma_node_unmap() - Unmap offset node
+ * @node: Offset node
+ * @file_mapping: Address space to unmap @node from
+ *
+ * Unmap all userspace mappings for a given offset node. The mappings must be
+ * associated with the @file_mapping address-space. If no offset exists or
+ * the address-space is invalid, nothing is done.
+ *
+ * This call is unlocked. The caller must guarantee that drm_vma_offset_remove()
+ * is not called on this node concurrently.
+ */
+static inline void drm_vma_node_unmap(struct drm_vma_offset_node *node,
+				      struct address_space *file_mapping)
+{
+	if (file_mapping && drm_vma_node_has_offset(node))
+		unmap_mapping_range(file_mapping,
+				    drm_vma_node_offset_addr(node),
+				    drm_vma_node_size(node) << PAGE_SHIFT, 1);
+}
+
+/**
+ * drm_vma_node_verify_access() - Access verification helper for TTM
+ * @node: Offset node
+ * @filp: Open-file
+ *
+ * This checks whether @filp is granted access to @node. It is the same as
+ * drm_vma_node_is_allowed() but suitable as drop-in helper for TTM
+ * verify_access() callbacks.
+ *
+ * RETURNS:
+ * 0 if access is granted, -EACCES otherwise.
+ */
+static inline int drm_vma_node_verify_access(struct drm_vma_offset_node *node,
+					     struct file *filp)
+{
+	return drm_vma_node_is_allowed(node, filp) ? 0 : -EACCES;
+}
+
+#endif /* __DRM_VMA_MANAGER_H__ */
diff --git a/include/drm/i2c/tda998x.h b/include/drm/i2c/tda998x.h
new file mode 100644
index 0000000..3e419d9
--- /dev/null
+++ b/include/drm/i2c/tda998x.h
@@ -0,0 +1,30 @@
+#ifndef __DRM_I2C_TDA998X_H__
+#define __DRM_I2C_TDA998X_H__
+
+struct tda998x_encoder_params {
+	u8 swap_b:3;
+	u8 mirr_b:1;
+	u8 swap_a:3;
+	u8 mirr_a:1;
+	u8 swap_d:3;
+	u8 mirr_d:1;
+	u8 swap_c:3;
+	u8 mirr_c:1;
+	u8 swap_f:3;
+	u8 mirr_f:1;
+	u8 swap_e:3;
+	u8 mirr_e:1;
+
+	u8 audio_cfg;
+	u8 audio_clk_cfg;
+	u8 audio_frame[6];
+
+	enum {
+		AFMT_SPDIF,
+		AFMT_I2S
+	} audio_format;
+
+	unsigned audio_sample_rate;
+};
+
+#endif
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 8a6aa56..751eaff 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -32,12 +32,12 @@
 #define _TTM_BO_API_H_
 
 #include <drm/drm_hashtab.h>
+#include <drm/drm_vma_manager.h>
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/wait.h>
 #include <linux/mutex.h>
 #include <linux/mm.h>
-#include <linux/rbtree.h>
 #include <linux/bitmap.h>
 #include <linux/reservation.h>
 
@@ -145,7 +145,6 @@
  * @type: The bo type.
  * @destroy: Destruction function. If NULL, kfree is used.
  * @num_pages: Actual number of pages.
- * @addr_space_offset: Address space offset.
  * @acc_size: Accounted size for this object.
  * @kref: Reference count of this buffer object. When this refcount reaches
  * zero, the object is put on the delayed delete list.
@@ -166,8 +165,7 @@
  * @swap: List head for swap LRU list.
  * @sync_obj: Pointer to a synchronization object.
  * @priv_flags: Flags describing buffer object internal state.
- * @vm_rb: Rb node for the vm rb tree.
- * @vm_node: Address space manager node.
+ * @vma_node: Address space manager node.
  * @offset: The current GPU offset, which can have different meanings
  * depending on the memory type. For SYSTEM type memory, it should be 0.
  * @cur_placement: Hint of current placement.
@@ -194,7 +192,6 @@
 	enum ttm_bo_type type;
 	void (*destroy) (struct ttm_buffer_object *);
 	unsigned long num_pages;
-	uint64_t addr_space_offset;
 	size_t acc_size;
 
 	/**
@@ -238,13 +235,7 @@
 	void *sync_obj;
 	unsigned long priv_flags;
 
-	/**
-	 * Members protected by the bdev::vm_lock
-	 */
-
-	struct rb_node vm_rb;
-	struct drm_mm_node *vm_node;
-
+	struct drm_vma_offset_node vma_node;
 
 	/**
 	 * Special members that are protected by the reserve lock
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 984fc2d..8639c85 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -36,6 +36,7 @@
 #include <ttm/ttm_placement.h>
 #include <drm/drm_mm.h>
 #include <drm/drm_global.h>
+#include <drm/drm_vma_manager.h>
 #include <linux/workqueue.h>
 #include <linux/fs.h>
 #include <linux/spinlock.h>
@@ -519,7 +520,7 @@
  * @man: An array of mem_type_managers.
  * @fence_lock: Protects the synchronizing members on *all* bos belonging
  * to this device.
- * @addr_space_mm: Range manager for the device address space.
+ * @vma_manager: Address space manager
  * lru_lock: Spinlock that protects the buffer+device lru lists and
  * ddestroy lists.
  * @val_seq: Current validation sequence.
@@ -537,14 +538,13 @@
 	struct list_head device_list;
 	struct ttm_bo_global *glob;
 	struct ttm_bo_driver *driver;
-	rwlock_t vm_lock;
 	struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES];
 	spinlock_t fence_lock;
+
 	/*
-	 * Protected by the vm lock.
+	 * Protected by internal locks.
 	 */
-	struct rb_root addr_space_rb;
-	struct drm_mm addr_space_mm;
+	struct drm_vma_offset_manager vma_manager;
 
 	/*
 	 * Protected by the global:lru lock.
diff --git a/include/dt-bindings/clock/vf610-clock.h b/include/dt-bindings/clock/vf610-clock.h
index 15e997f..4aa2b48c 100644
--- a/include/dt-bindings/clock/vf610-clock.h
+++ b/include/dt-bindings/clock/vf610-clock.h
@@ -158,6 +158,8 @@
 #define VF610_CLK_GPU_SEL		145
 #define VF610_CLK_GPU_EN		146
 #define VF610_CLK_GPU2D			147
-#define VF610_CLK_END			148
+#define VF610_CLK_ENET0			148
+#define VF610_CLK_ENET1			149
+#define VF610_CLK_END			150
 
 #endif /* __DT_BINDINGS_CLOCK_VF610_H */
diff --git a/include/dt-bindings/pinctrl/am33xx.h b/include/dt-bindings/pinctrl/am33xx.h
index 469e032..2fbc804 100644
--- a/include/dt-bindings/pinctrl/am33xx.h
+++ b/include/dt-bindings/pinctrl/am33xx.h
@@ -5,7 +5,7 @@
 #ifndef _DT_BINDINGS_PINCTRL_AM33XX_H
 #define _DT_BINDINGS_PINCTRL_AM33XX_H
 
-#include <include/dt-bindings/pinctrl/omap.h>
+#include <dt-bindings/pinctrl/omap.h>
 
 /* am33xx specific mux bit defines */
 #undef PULL_ENA
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 6ad72f9..353ba25 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -191,7 +191,6 @@
 #define ACPI_VIDEO_BACKLIGHT_DMI_VIDEO			0x0200
 #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VENDOR		0x0400
 #define ACPI_VIDEO_OUTPUT_SWITCHING_DMI_VIDEO		0x0800
-#define ACPI_VIDEO_SKIP_BACKLIGHT			0x1000
 
 #if defined(CONFIG_ACPI_VIDEO) || defined(CONFIG_ACPI_VIDEO_MODULE)
 
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 297462b..e9ac882 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -542,8 +542,7 @@
 bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor);
 
 int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen);
-int task_cgroup_path_from_hierarchy(struct task_struct *task, int hierarchy_id,
-				    char *buf, size_t buflen);
+int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
 
 int cgroup_task_count(const struct cgroup *cgrp);
 
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 6e7ec64..b613ffd 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -1,86 +1,55 @@
-/* Add subsystem definitions of the form SUBSYS(<name>) in this
- * file. Surround each one by a line of comment markers so that
- * patches don't collide
+/*
+ * List of cgroup subsystems.
+ *
+ * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS.
  */
-
-/* */
-
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CPUSETS)
 SUBSYS(cpuset)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_DEBUG)
 SUBSYS(debug)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_SCHED)
 SUBSYS(cpu_cgroup)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_CPUACCT)
 SUBSYS(cpuacct)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_MEMCG)
 SUBSYS(mem_cgroup)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_DEVICE)
 SUBSYS(devices)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_FREEZER)
 SUBSYS(freezer)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_NET_CLS_CGROUP)
 SUBSYS(net_cls)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_BLK_CGROUP)
 SUBSYS(blkio)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_PERF)
 SUBSYS(perf)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_NETPRIO_CGROUP)
 SUBSYS(net_prio)
 #endif
 
-/* */
-
 #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_HUGETLB)
 SUBSYS(hugetlb)
 #endif
-
-/* */
-
-#ifdef CONFIG_CGROUP_BCACHE
-SUBSYS(bcache)
-#endif
-
-/* */
+/*
+ * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS.
+ */
diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h
index b3cb71f..a9c96d8 100644
--- a/include/linux/crc-t10dif.h
+++ b/include/linux/crc-t10dif.h
@@ -3,10 +3,6 @@
 
 #include <linux/types.h>
 
-#define CRC_T10DIF_DIGEST_SIZE 2
-#define CRC_T10DIF_BLOCK_SIZE 1
-
-__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len);
 __u16 crc_t10dif(unsigned char const *, size_t);
 
 #endif
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index b90337c..4a12532 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -336,6 +336,7 @@
  * helper function for dentry_operations.d_dname() members
  */
 extern char *dynamic_dname(struct dentry *, char *, int, const char *, ...);
+extern char *simple_dname(struct dentry *, char *, int);
 
 extern char *__d_path(const struct path *, const struct path *, char *, int);
 extern char *d_absolute_path(const struct path *, char *, int);
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 1b4d4ee..de7d74a 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -177,7 +177,11 @@
 	ERR_NEED_APV_100	= 163,
 	ERR_NEED_ALLOW_TWO_PRI  = 164,
 	ERR_MD_UNCLEAN          = 165,
-
+	ERR_MD_LAYOUT_CONNECTED = 166,
+	ERR_MD_LAYOUT_TOO_BIG   = 167,
+	ERR_MD_LAYOUT_TOO_SMALL = 168,
+	ERR_MD_LAYOUT_NO_FIT    = 169,
+	ERR_IMPLICIT_SHRINK     = 170,
 	/* insert new ones above this line */
 	AFTER_LAST_ERR_CODE
 };
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index d0d8fac..e8c4457 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -181,6 +181,8 @@
 	__u64_field(1, DRBD_GENLA_F_MANDATORY,	resize_size)
 	__flg_field(2, DRBD_GENLA_F_MANDATORY,	resize_force)
 	__flg_field(3, DRBD_GENLA_F_MANDATORY,	no_resync)
+	__u32_field_def(4, 0 /* OPTIONAL */, al_stripes, DRBD_AL_STRIPES_DEF)
+	__u32_field_def(5, 0 /* OPTIONAL */, al_stripe_size, DRBD_AL_STRIPE_SIZE_DEF)
 )
 
 GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info,
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 1fedf2b..17e50bb 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -215,4 +215,13 @@
 #define DRBD_ALWAYS_ASBP_DEF	0
 #define DRBD_USE_RLE_DEF	1
 
+#define DRBD_AL_STRIPES_MIN     1
+#define DRBD_AL_STRIPES_MAX     1024
+#define DRBD_AL_STRIPES_DEF     1
+#define DRBD_AL_STRIPES_SCALE   '1'
+
+#define DRBD_AL_STRIPE_SIZE_MIN   4
+#define DRBD_AL_STRIPE_SIZE_MAX   16777216
+#define DRBD_AL_STRIPE_SIZE_DEF   32
+#define DRBD_AL_STRIPE_SIZE_SCALE 'k' /* kilobytes */
 #endif
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 0b76327..5c6d7fb 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -622,7 +622,7 @@
  */
 struct mem_ctl_info {
 	struct device			dev;
-	struct bus_type			bus;
+	struct bus_type			*bus;
 
 	struct list_head link;	/* for global list of mem_ctl_info structs */
 
@@ -742,4 +742,9 @@
 #endif
 };
 
+/*
+ * Maximum number of memory controllers in the coherent fabric.
+ */
+#define EDAC_MAX_MCS	16
+
 #endif
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 3b0e820..5d7782e 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -436,6 +436,7 @@
 	int type;
 	int channel;
 	int speed;
+	bool drop_overflow_headers;
 	size_t header_size;
 	union {
 		fw_iso_callback_t sc;
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 4372658..120d57a 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -78,6 +78,11 @@
 	/* trace_seq for __print_flags() and __print_symbolic() etc. */
 	struct trace_seq	tmp_seq;
 
+	cpumask_var_t		started;
+
+	/* it's true when current open file is snapshot */
+	bool			snapshot;
+
 	/* The below is zeroed out in pipe_read */
 	struct trace_seq	seq;
 	struct trace_entry	*ent;
@@ -90,10 +95,7 @@
 	loff_t			pos;
 	long			idx;
 
-	cpumask_var_t		started;
-
-	/* it's true when current open file is snapshot */
-	bool			snapshot;
+	/* All new field here will be zeroed out in pipe_read */
 };
 
 enum trace_iter_flags {
@@ -332,7 +334,7 @@
 			      const char *name, int offset, int size,
 			      int is_signed, int filter_type);
 extern int trace_add_event_call(struct ftrace_event_call *call);
-extern void trace_remove_event_call(struct ftrace_event_call *call);
+extern int trace_remove_event_call(struct ftrace_event_call *call);
 
 #define is_signed_type(type)	(((type)(-1)) < (type)1)
 
diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h
index 3b58944..9231be9 100644
--- a/include/linux/hdmi.h
+++ b/include/linux/hdmi.h
@@ -18,11 +18,21 @@
 	HDMI_INFOFRAME_TYPE_AUDIO = 0x84,
 };
 
+#define HDMI_IEEE_OUI 0x000c03
 #define HDMI_INFOFRAME_HEADER_SIZE  4
 #define HDMI_AVI_INFOFRAME_SIZE    13
 #define HDMI_SPD_INFOFRAME_SIZE    25
 #define HDMI_AUDIO_INFOFRAME_SIZE  10
 
+#define HDMI_INFOFRAME_SIZE(type)	\
+	(HDMI_INFOFRAME_HEADER_SIZE + HDMI_ ## type ## _INFOFRAME_SIZE)
+
+struct hdmi_any_infoframe {
+	enum hdmi_infoframe_type type;
+	unsigned char version;
+	unsigned char length;
+};
+
 enum hdmi_colorspace {
 	HDMI_COLORSPACE_RGB,
 	HDMI_COLORSPACE_YUV422,
@@ -100,9 +110,6 @@
 	unsigned char version;
 	unsigned char length;
 	enum hdmi_colorspace colorspace;
-	bool active_info_valid;
-	bool horizontal_bar_valid;
-	bool vertical_bar_valid;
 	enum hdmi_scan_mode scan_mode;
 	enum hdmi_colorimetry colorimetry;
 	enum hdmi_picture_aspect picture_aspect;
@@ -218,14 +225,52 @@
 ssize_t hdmi_audio_infoframe_pack(struct hdmi_audio_infoframe *frame,
 				  void *buffer, size_t size);
 
+enum hdmi_3d_structure {
+	HDMI_3D_STRUCTURE_INVALID = -1,
+	HDMI_3D_STRUCTURE_FRAME_PACKING = 0,
+	HDMI_3D_STRUCTURE_FIELD_ALTERNATIVE,
+	HDMI_3D_STRUCTURE_LINE_ALTERNATIVE,
+	HDMI_3D_STRUCTURE_SIDE_BY_SIDE_FULL,
+	HDMI_3D_STRUCTURE_L_DEPTH,
+	HDMI_3D_STRUCTURE_L_DEPTH_GFX_GFX_DEPTH,
+	HDMI_3D_STRUCTURE_TOP_AND_BOTTOM,
+	HDMI_3D_STRUCTURE_SIDE_BY_SIDE_HALF = 8,
+};
+
+
 struct hdmi_vendor_infoframe {
 	enum hdmi_infoframe_type type;
 	unsigned char version;
 	unsigned char length;
-	u8 data[27];
+	unsigned int oui;
+	u8 vic;
+	enum hdmi_3d_structure s3d_struct;
+	unsigned int s3d_ext_data;
 };
 
+int hdmi_vendor_infoframe_init(struct hdmi_vendor_infoframe *frame);
 ssize_t hdmi_vendor_infoframe_pack(struct hdmi_vendor_infoframe *frame,
 				   void *buffer, size_t size);
 
+union hdmi_vendor_any_infoframe {
+	struct {
+		enum hdmi_infoframe_type type;
+		unsigned char version;
+		unsigned char length;
+		unsigned int oui;
+	} any;
+	struct hdmi_vendor_infoframe hdmi;
+};
+
+union hdmi_infoframe {
+	struct hdmi_any_infoframe any;
+	struct hdmi_avi_infoframe avi;
+	struct hdmi_spd_infoframe spd;
+	union hdmi_vendor_any_infoframe vendor;
+	struct hdmi_audio_infoframe audio;
+};
+
+ssize_t
+hdmi_infoframe_pack(union hdmi_infoframe *frame, void *buffer, size_t size);
+
 #endif /* _DRM_HDMI_H */
diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h
index 3869c52..369cf2c 100644
--- a/include/linux/iio/trigger.h
+++ b/include/linux/iio/trigger.h
@@ -8,6 +8,7 @@
  */
 #include <linux/irq.h>
 #include <linux/module.h>
+#include <linux/atomic.h>
 
 #ifndef _IIO_TRIGGER_H_
 #define _IIO_TRIGGER_H_
@@ -61,7 +62,7 @@
 
 	struct list_head		list;
 	struct list_head		alloc_list;
-	int use_count;
+	atomic_t			use_count;
 
 	struct irq_chip			subirq_chip;
 	int				subirq_base;
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index b99cd23..79640e01 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -5,45 +5,13 @@
 
 #include <linux/bitmap.h>
 #include <linux/if.h>
+#include <linux/ip.h>
 #include <linux/netdevice.h>
 #include <linux/rcupdate.h>
 #include <linux/timer.h>
 #include <linux/sysctl.h>
 #include <linux/rtnetlink.h>
 
-enum
-{
-	IPV4_DEVCONF_FORWARDING=1,
-	IPV4_DEVCONF_MC_FORWARDING,
-	IPV4_DEVCONF_PROXY_ARP,
-	IPV4_DEVCONF_ACCEPT_REDIRECTS,
-	IPV4_DEVCONF_SECURE_REDIRECTS,
-	IPV4_DEVCONF_SEND_REDIRECTS,
-	IPV4_DEVCONF_SHARED_MEDIA,
-	IPV4_DEVCONF_RP_FILTER,
-	IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE,
-	IPV4_DEVCONF_BOOTP_RELAY,
-	IPV4_DEVCONF_LOG_MARTIANS,
-	IPV4_DEVCONF_TAG,
-	IPV4_DEVCONF_ARPFILTER,
-	IPV4_DEVCONF_MEDIUM_ID,
-	IPV4_DEVCONF_NOXFRM,
-	IPV4_DEVCONF_NOPOLICY,
-	IPV4_DEVCONF_FORCE_IGMP_VERSION,
-	IPV4_DEVCONF_ARP_ANNOUNCE,
-	IPV4_DEVCONF_ARP_IGNORE,
-	IPV4_DEVCONF_PROMOTE_SECONDARIES,
-	IPV4_DEVCONF_ARP_ACCEPT,
-	IPV4_DEVCONF_ARP_NOTIFY,
-	IPV4_DEVCONF_ACCEPT_LOCAL,
-	IPV4_DEVCONF_SRC_VMARK,
-	IPV4_DEVCONF_PROXY_ARP_PVLAN,
-	IPV4_DEVCONF_ROUTE_LOCALNET,
-	__IPV4_DEVCONF_MAX
-};
-
-#define IPV4_DEVCONF_MAX (__IPV4_DEVCONF_MAX - 1)
-
 struct ipv4_devconf {
 	void	*sysctl;
 	int	data[IPV4_DEVCONF_MAX];
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 850e95b..b8b7dc7 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -101,6 +101,7 @@
 #define IP6SKB_FORWARDED	2
 #define IP6SKB_REROUTED		4
 #define IP6SKB_ROUTERALERT	8
+#define IP6SKB_FRAGMENTED      16
 };
 
 #define IP6CB(skb)	((struct inet6_skb_parm*)((skb)->cb))
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3bef14c..482ad2d 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -629,7 +629,7 @@
 static inline void tracing_start(void) { }
 static inline void tracing_stop(void) { }
 static inline void ftrace_off_permanent(void) { }
-static inline void trace_dump_stack(void) { }
+static inline void trace_dump_stack(int skip) { }
 
 static inline void tracing_on(void) { }
 static inline void tracing_off(void) { }
diff --git a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
index dab34a1..b6bdcd6 100644
--- a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
+++ b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
@@ -103,15 +103,15 @@
 #define IMX6Q_GPR1_EXC_MON_MASK			BIT(22)
 #define IMX6Q_GPR1_EXC_MON_OKAY			0x0
 #define IMX6Q_GPR1_EXC_MON_SLVE			BIT(22)
-#define IMX6Q_GPR1_MIPI_IPU2_SEL_MASK		BIT(21)
-#define IMX6Q_GPR1_MIPI_IPU2_SEL_GASKET		0x0
-#define IMX6Q_GPR1_MIPI_IPU2_SEL_IOMUX		BIT(21)
-#define IMX6Q_GPR1_MIPI_IPU1_MUX_MASK		BIT(20)
-#define IMX6Q_GPR1_MIPI_IPU1_MUX_GASKET		0x0
-#define IMX6Q_GPR1_MIPI_IPU1_MUX_IOMUX		BIT(20)
-#define IMX6Q_GPR1_MIPI_IPU2_MUX_MASK		BIT(19)
+#define IMX6Q_GPR1_ENET_CLK_SEL_MASK		BIT(21)
+#define IMX6Q_GPR1_ENET_CLK_SEL_PAD		0
+#define IMX6Q_GPR1_ENET_CLK_SEL_ANATOP		BIT(21)
+#define IMX6Q_GPR1_MIPI_IPU2_MUX_MASK		BIT(20)
 #define IMX6Q_GPR1_MIPI_IPU2_MUX_GASKET		0x0
-#define IMX6Q_GPR1_MIPI_IPU2_MUX_IOMUX		BIT(19)
+#define IMX6Q_GPR1_MIPI_IPU2_MUX_IOMUX		BIT(20)
+#define IMX6Q_GPR1_MIPI_IPU1_MUX_MASK		BIT(19)
+#define IMX6Q_GPR1_MIPI_IPU1_MUX_GASKET		0x0
+#define IMX6Q_GPR1_MIPI_IPU1_MUX_IOMUX		BIT(19)
 #define IMX6Q_GPR1_PCIE_TEST_PD			BIT(18)
 #define IMX6Q_GPR1_IPU_VPU_MUX_MASK		BIT(17)
 #define IMX6Q_GPR1_IPU_VPU_MUX_IPU1		0x0
@@ -279,41 +279,88 @@
 #define IMX6Q_GPR13_CAN2_STOP_REQ		BIT(29)
 #define IMX6Q_GPR13_CAN1_STOP_REQ		BIT(28)
 #define IMX6Q_GPR13_ENET_STOP_REQ		BIT(27)
-#define IMX6Q_GPR13_SATA_PHY_8_MASK		(0x7 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_0_5_DB		(0x0 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_1_0_DB		(0x1 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_1_5_DB		(0x2 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_2_0_DB		(0x3 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_2_5_DB		(0x4 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_3_0_DB		(0x5 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_3_5_DB		(0x6 << 24)
-#define IMX6Q_GPR13_SATA_PHY_8_4_0_DB		(0x7 << 24)
-#define IMX6Q_GPR13_SATA_PHY_7_MASK		(0x1f << 19)
-#define IMX6Q_GPR13_SATA_PHY_7_SATA1I		(0x10 << 19)
-#define IMX6Q_GPR13_SATA_PHY_7_SATA1M		(0x10 << 19)
-#define IMX6Q_GPR13_SATA_PHY_7_SATA1X		(0x1a << 19)
-#define IMX6Q_GPR13_SATA_PHY_7_SATA2I		(0x12 << 19)
-#define IMX6Q_GPR13_SATA_PHY_7_SATA2M		(0x12 << 19)
-#define IMX6Q_GPR13_SATA_PHY_7_SATA2X		(0x1a << 19)
-#define IMX6Q_GPR13_SATA_PHY_6_MASK		(0x7 << 16)
-#define IMX6Q_GPR13_SATA_SPEED_MASK		BIT(15)
-#define IMX6Q_GPR13_SATA_SPEED_1P5G		0x0
-#define IMX6Q_GPR13_SATA_SPEED_3P0G		BIT(15)
-#define IMX6Q_GPR13_SATA_PHY_5			BIT(14)
-#define IMX6Q_GPR13_SATA_PHY_4_MASK		(0x7 << 11)
-#define IMX6Q_GPR13_SATA_PHY_4_16_16		(0x0 << 11)
-#define IMX6Q_GPR13_SATA_PHY_4_14_16		(0x1 << 11)
-#define IMX6Q_GPR13_SATA_PHY_4_12_16		(0x2 << 11)
-#define IMX6Q_GPR13_SATA_PHY_4_10_16		(0x3 << 11)
-#define IMX6Q_GPR13_SATA_PHY_4_9_16		(0x4 << 11)
-#define IMX6Q_GPR13_SATA_PHY_4_8_16		(0x5 << 11)
-#define IMX6Q_GPR13_SATA_PHY_3_MASK		(0xf << 7)
-#define IMX6Q_GPR13_SATA_PHY_3_OFF		0x7
-#define IMX6Q_GPR13_SATA_PHY_2_MASK		(0x1f << 2)
-#define IMX6Q_GPR13_SATA_PHY_2_OFF		0x2
-#define IMX6Q_GPR13_SATA_PHY_1_MASK		(0x3 << 0)
-#define IMX6Q_GPR13_SATA_PHY_1_FAST		(0x0 << 0)
-#define IMX6Q_GPR13_SATA_PHY_1_MED		(0x1 << 0)
-#define IMX6Q_GPR13_SATA_PHY_1_SLOW		(0x2 << 0)
-
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_MASK		(0x7 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_0_5_DB	(0x0 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_1_0_DB	(0x1 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_1_5_DB	(0x2 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_2_0_DB	(0x3 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_2_5_DB	(0x4 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_3_0_DB	(0x5 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_3_5_DB	(0x6 << 24)
+#define IMX6Q_GPR13_SATA_RX_EQ_VAL_4_0_DB	(0x7 << 24)
+#define IMX6Q_GPR13_SATA_RX_LOS_LVL_MASK	(0x1f << 19)
+#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA1I	(0x10 << 19)
+#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA1M	(0x10 << 19)
+#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA1X	(0x1a << 19)
+#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2I	(0x12 << 19)
+#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2M	(0x12 << 19)
+#define IMX6Q_GPR13_SATA_RX_LOS_LVL_SATA2X	(0x1a << 19)
+#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_MASK	(0x7 << 16)
+#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_1P_1F	(0x0 << 16)
+#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_2P_2F	(0x1 << 16)
+#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_1P_4F	(0x2 << 16)
+#define IMX6Q_GPR13_SATA_RX_DPLL_MODE_2P_4F	(0x3 << 16)
+#define IMX6Q_GPR13_SATA_SPD_MODE_MASK		BIT(15)
+#define IMX6Q_GPR13_SATA_SPD_MODE_1P5G		0x0
+#define IMX6Q_GPR13_SATA_SPD_MODE_3P0G		BIT(15)
+#define IMX6Q_GPR13_SATA_MPLL_SS_EN		BIT(14)
+#define IMX6Q_GPR13_SATA_TX_ATTEN_MASK		(0x7 << 11)
+#define IMX6Q_GPR13_SATA_TX_ATTEN_16_16		(0x0 << 11)
+#define IMX6Q_GPR13_SATA_TX_ATTEN_14_16		(0x1 << 11)
+#define IMX6Q_GPR13_SATA_TX_ATTEN_12_16		(0x2 << 11)
+#define IMX6Q_GPR13_SATA_TX_ATTEN_10_16		(0x3 << 11)
+#define IMX6Q_GPR13_SATA_TX_ATTEN_9_16		(0x4 << 11)
+#define IMX6Q_GPR13_SATA_TX_ATTEN_8_16		(0x5 << 11)
+#define IMX6Q_GPR13_SATA_TX_BOOST_MASK		(0xf << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_0_00_DB	(0x0 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_0_37_DB	(0x1 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_0_74_DB	(0x2 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_1_11_DB	(0x3 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_1_48_DB	(0x4 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_1_85_DB	(0x5 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_2_22_DB	(0x6 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_2_59_DB	(0x7 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_2_96_DB	(0x8 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_3_33_DB	(0x9 << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_3_70_DB	(0xa << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_4_07_DB	(0xb << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_4_44_DB	(0xc << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_4_81_DB	(0xd << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_5_28_DB	(0xe << 7)
+#define IMX6Q_GPR13_SATA_TX_BOOST_5_75_DB	(0xf << 7)
+#define IMX6Q_GPR13_SATA_TX_LVL_MASK		(0x1f << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_0_937_V		(0x00 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_0_947_V		(0x01 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_0_957_V		(0x02 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_0_966_V		(0x03 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_0_976_V		(0x04 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_0_986_V		(0x05 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_0_996_V		(0x06 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_005_V		(0x07 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_015_V		(0x08 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_025_V		(0x09 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_035_V		(0x0a << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_045_V		(0x0b << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_054_V		(0x0c << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_064_V		(0x0d << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_074_V		(0x0e << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_084_V		(0x0f << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_094_V		(0x10 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_104_V		(0x11 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_113_V		(0x12 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_123_V		(0x13 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_133_V		(0x14 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_143_V		(0x15 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_152_V		(0x16 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_162_V		(0x17 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_172_V		(0x18 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_182_V		(0x19 << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_191_V		(0x1a << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_201_V		(0x1b << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_211_V		(0x1c << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_221_V		(0x1d << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_230_V		(0x1e << 2)
+#define IMX6Q_GPR13_SATA_TX_LVL_1_240_V		(0x1f << 2)
+#define IMX6Q_GPR13_SATA_MPLL_CLK_EN		BIT(1)
+#define IMX6Q_GPR13_SATA_TX_EDGE_RATE		BIT(0)
 #endif /* __LINUX_IMX6Q_IOMUXC_GPR_H */
diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index 8d73fe2..db1791b 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -113,11 +113,27 @@
 #define CNTRLREG_8WIRE		CNTRLREG_AFE_CTRL(3)
 #define CNTRLREG_TSCENB		BIT(7)
 
+/* FIFO READ Register */
+#define FIFOREAD_DATA_MASK (0xfff << 0)
+#define FIFOREAD_CHNLID_MASK (0xf << 16)
+
+/* Sequencer Status */
+#define SEQ_STATUS BIT(5)
+
 #define ADC_CLK			3000000
 #define	MAX_CLK_DIV		7
 #define TOTAL_STEPS		16
 #define TOTAL_CHANNELS		8
 
+/*
+* ADC runs at 3MHz, and it takes
+* 15 cycles to latch one data output.
+* Hence the idle time for ADC to
+* process one sample data would be
+* around 5 micro seconds.
+*/
+#define IDLE_TIMEOUT 5 /* microsec */
+
 #define TSCADC_CELLS		2
 
 struct ti_tscadc_dev {
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 8de8d8f..68029b3 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -309,21 +309,20 @@
 	__be16	max_desc_sz_rq;
 	u8	rsvd21[2];
 	__be16	max_desc_sz_sq_dc;
-	u8	rsvd22[4];
-	__be16	max_qp_mcg;
-	u8	rsvd23;
+	__be32	max_qp_mcg;
+	u8	rsvd22[3];
 	u8	log_max_mcg;
-	u8	rsvd24;
+	u8	rsvd23;
 	u8	log_max_pd;
-	u8	rsvd25;
+	u8	rsvd24;
 	u8	log_max_xrcd;
-	u8	rsvd26[42];
+	u8	rsvd25[42];
 	__be16  log_uar_page_sz;
-	u8	rsvd27[28];
+	u8	rsvd26[28];
 	u8	log_msx_atomic_size_qp;
-	u8	rsvd28[2];
+	u8	rsvd27[2];
 	u8	log_msx_atomic_size_dc;
-	u8	rsvd29[76];
+	u8	rsvd28[76];
 };
 
 
@@ -472,9 +471,8 @@
 struct mlx5_eqe_page_req {
 	u8		rsvd0[2];
 	__be16		func_id;
-	u8		rsvd1[2];
-	__be16		num_pages;
-	__be32		rsvd2[5];
+	__be32		num_pages;
+	__be32		rsvd1[5];
 };
 
 union ev_data {
@@ -690,6 +688,26 @@
 	__be64			pas[0];
 };
 
+struct mlx5_enable_hca_mbox_in {
+	struct mlx5_inbox_hdr	hdr;
+	u8			rsvd[8];
+};
+
+struct mlx5_enable_hca_mbox_out {
+	struct mlx5_outbox_hdr	hdr;
+	u8			rsvd[8];
+};
+
+struct mlx5_disable_hca_mbox_in {
+	struct mlx5_inbox_hdr	hdr;
+	u8			rsvd[8];
+};
+
+struct mlx5_disable_hca_mbox_out {
+	struct mlx5_outbox_hdr	hdr;
+	u8			rsvd[8];
+};
+
 struct mlx5_eq_context {
 	u8			status;
 	u8			ec_oi;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index f22e441..8888381 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -101,6 +101,8 @@
 	MLX5_CMD_OP_QUERY_ADAPTER		= 0x101,
 	MLX5_CMD_OP_INIT_HCA			= 0x102,
 	MLX5_CMD_OP_TEARDOWN_HCA		= 0x103,
+	MLX5_CMD_OP_ENABLE_HCA			= 0x104,
+	MLX5_CMD_OP_DISABLE_HCA			= 0x105,
 	MLX5_CMD_OP_QUERY_PAGES			= 0x107,
 	MLX5_CMD_OP_MANAGE_PAGES		= 0x108,
 	MLX5_CMD_OP_SET_HCA_CAP			= 0x109,
@@ -356,7 +358,7 @@
 	u32	reserved_lkey;
 	u8	local_ca_ack_delay;
 	u8	log_max_mcg;
-	u16	max_qp_mcg;
+	u32	max_qp_mcg;
 	int	min_page_sz;
 };
 
@@ -689,8 +691,8 @@
 int mlx5_pagealloc_start(struct mlx5_core_dev *dev);
 void mlx5_pagealloc_stop(struct mlx5_core_dev *dev);
 void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
-				 s16 npages);
-int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev);
+				 s32 npages);
+int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot);
 int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev);
 void mlx5_register_debugfs(void);
 void mlx5_unregister_debugfs(void);
@@ -729,9 +731,6 @@
 int mlx5_db_alloc(struct mlx5_core_dev *dev, struct mlx5_db *db);
 void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db);
 
-typedef void (*health_handler_t)(struct pci_dev *pdev, struct health_buffer __iomem *buf, int size);
-int mlx5_register_health_report_handler(health_handler_t handler);
-void mlx5_unregister_health_report_handler(void);
 const char *mlx5_command_str(int command);
 int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev);
 void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index fb425aa..faf4b7c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -332,6 +332,7 @@
 				unsigned long pgoff, unsigned long flags);
 #endif
 	unsigned long mmap_base;		/* base of mmap area */
+	unsigned long mmap_legacy_base;         /* base of mmap area in bottom-up allocations */
 	unsigned long task_size;		/* size of task vm space */
 	unsigned long highest_vm_end;		/* highest vma end address */
 	pgd_t * pgd;
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index b62d4af..45e9214 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -361,7 +361,8 @@
 	__u16	vendor;
 	__u16	coreid;
 	__u8	revision;
-};
+	__u8	__pad;
+} __attribute__((packed, aligned(2)));
 #define SSB_DEVICE(_vendor, _coreid, _revision)  \
 	{ .vendor = _vendor, .coreid = _coreid, .revision = _revision, }
 #define SSB_DEVTABLE_END  \
@@ -377,7 +378,7 @@
 	__u16	id;
 	__u8	rev;
 	__u8	class;
-};
+} __attribute__((packed,aligned(2)));
 #define BCMA_CORE(_manuf, _id, _rev, _class)  \
 	{ .manuf = _manuf, .id = _id, .rev = _rev, .class = _class, }
 #define BCMA_CORETABLE_END  \
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0741a1e..9a41568 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -973,7 +973,7 @@
 						     gfp_t gfp);
 	void			(*ndo_netpoll_cleanup)(struct net_device *dev);
 #endif
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 	int			(*ndo_busy_poll)(struct napi_struct *dev);
 #endif
 	int			(*ndo_set_vf_mac)(struct net_device *dev,
diff --git a/include/linux/platform_data/mmc-pxamci.h b/include/linux/platform_data/mmc-pxamci.h
index 9eb515b..1706b35 100644
--- a/include/linux/platform_data/mmc-pxamci.h
+++ b/include/linux/platform_data/mmc-pxamci.h
@@ -12,7 +12,7 @@
 	unsigned long detect_delay_ms;		/* delay in millisecond before detecting cards after interrupt */
 	int (*init)(struct device *, irq_handler_t , void *);
 	int (*get_ro)(struct device *);
-	void (*setpower)(struct device *, unsigned int);
+	int (*setpower)(struct device *, unsigned int);
 	void (*exit)(struct device *, void *);
 	int gpio_card_detect;			/* gpio detecting card insertion */
 	int gpio_card_ro;			/* gpio detecting read only toggle */
diff --git a/include/linux/platform_data/rcar-du.h b/include/linux/platform_data/rcar-du.h
index 80587fd..1a2e990 100644
--- a/include/linux/platform_data/rcar-du.h
+++ b/include/linux/platform_data/rcar-du.h
@@ -16,8 +16,18 @@
 
 #include <drm/drm_mode.h>
 
+enum rcar_du_output {
+	RCAR_DU_OUTPUT_DPAD0,
+	RCAR_DU_OUTPUT_DPAD1,
+	RCAR_DU_OUTPUT_LVDS0,
+	RCAR_DU_OUTPUT_LVDS1,
+	RCAR_DU_OUTPUT_TCON,
+	RCAR_DU_OUTPUT_MAX,
+};
+
 enum rcar_du_encoder_type {
 	RCAR_DU_ENCODER_UNUSED = 0,
+	RCAR_DU_ENCODER_NONE,
 	RCAR_DU_ENCODER_VGA,
 	RCAR_DU_ENCODER_LVDS,
 };
@@ -28,22 +38,32 @@
 	struct drm_mode_modeinfo mode;
 };
 
-struct rcar_du_encoder_lvds_data {
+struct rcar_du_connector_lvds_data {
 	struct rcar_du_panel_data panel;
 };
 
-struct rcar_du_encoder_vga_data {
+struct rcar_du_connector_vga_data {
 	/* TODO: Add DDC information for EDID retrieval */
 };
 
+/*
+ * struct rcar_du_encoder_data - Encoder platform data
+ * @type: the encoder type (RCAR_DU_ENCODER_*)
+ * @output: the DU output the connector is connected to (RCAR_DU_OUTPUT_*)
+ * @connector.lvds: platform data for LVDS connectors
+ * @connector.vga: platform data for VGA connectors
+ *
+ * Encoder platform data describes an on-board encoder, its associated DU SoC
+ * output, and the connector.
+ */
 struct rcar_du_encoder_data {
-	enum rcar_du_encoder_type encoder;
-	unsigned int output;
+	enum rcar_du_encoder_type type;
+	enum rcar_du_output output;
 
 	union {
-		struct rcar_du_encoder_lvds_data lvds;
-		struct rcar_du_encoder_vga_data vga;
-	} u;
+		struct rcar_du_connector_lvds_data lvds;
+		struct rcar_du_connector_vga_data vga;
+	} connector;
 };
 
 struct rcar_du_platform_data {
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 75981d0..580a532 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -15,6 +15,7 @@
 
 #include <linux/list.h>
 #include <linux/rbtree.h>
+#include <linux/err.h>
 
 struct module;
 struct device;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 50d04b9..078066d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1532,6 +1532,8 @@
  * Test if a process is not yet dead (at most zombie state)
  * If pid_alive fails, then pointers within the task structure
  * can be stale and must not be dereferenced.
+ *
+ * Return: 1 if the process is alive. 0 otherwise.
  */
 static inline int pid_alive(struct task_struct *p)
 {
@@ -1543,6 +1545,8 @@
  * @tsk: Task structure to be checked.
  *
  * Check if a task structure is the first user space task the kernel created.
+ *
+ * Return: 1 if the task structure is init. 0 otherwise.
  */
 static inline int is_global_init(struct task_struct *tsk)
 {
@@ -1628,6 +1632,7 @@
 #define PF_MEMPOLICY	0x10000000	/* Non-default NUMA mempolicy */
 #define PF_MUTEX_TESTER	0x20000000	/* Thread belongs to the rt mutex tester */
 #define PF_FREEZER_SKIP	0x40000000	/* Freezer should not count it as freezable */
+#define PF_SUSPEND_TASK 0x80000000      /* this thread called freeze_processes and should not be frozen */
 
 /*
  * Only the _current_ task can read/write to tsk->flags, but other
@@ -1893,6 +1898,8 @@
 /**
  * is_idle_task - is the specified task an idle task?
  * @p: the task in question.
+ *
+ * Return: 1 if @p is an idle task. 0 otherwise.
  */
 static inline bool is_idle_task(const struct task_struct *p)
 {
diff --git a/include/linux/shdma-base.h b/include/linux/shdma-base.h
index 382cf71..5b1c984 100644
--- a/include/linux/shdma-base.h
+++ b/include/linux/shdma-base.h
@@ -124,6 +124,10 @@
 int shdma_init(struct device *dev, struct shdma_dev *sdev,
 		    int chan_num);
 void shdma_cleanup(struct shdma_dev *sdev);
+#if IS_ENABLED(CONFIG_SH_DMAE_BASE)
 bool shdma_chan_filter(struct dma_chan *chan, void *arg);
+#else
+#define shdma_chan_filter NULL
+#endif
 
 #endif
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5afefa0..3b71a4e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -501,7 +501,7 @@
 	/* 7/9 bit hole (depending on ndisc_nodetype presence) */
 	kmemcheck_bitfield_end(flags2);
 
-#if defined CONFIG_NET_DMA || defined CONFIG_NET_LL_RX_POLL
+#if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
 	union {
 		unsigned int	napi_id;
 		dma_cookie_t	dma_cookie;
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 7d537ce..75f3494 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -117,9 +117,17 @@
 #endif /*arch_spin_is_contended*/
 #endif
 
-/* The lock does not imply full memory barrier. */
-#ifndef ARCH_HAS_SMP_MB_AFTER_LOCK
-static inline void smp_mb__after_lock(void) { smp_mb(); }
+/*
+ * Despite its name it doesn't necessarily has to be a full barrier.
+ * It should only guarantee that a STORE before the critical section
+ * can not be reordered with a LOAD inside this section.
+ * spin_lock() is the one-way barrier, this LOAD can not escape out
+ * of the region. So the default implementation simply ensures that
+ * a STORE can not move into the critical section, smp_wmb() should
+ * serialize it with another STORE done by spin_lock().
+ */
+#ifndef smp_mb__before_spinlock
+#define smp_mb__before_spinlock()	smp_wmb()
 #endif
 
 /**
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 6d87035..1821445 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -121,6 +121,7 @@
 #define RPC_TASK_SOFTCONN	0x0400		/* Fail if can't connect */
 #define RPC_TASK_SENT		0x0800		/* message was sent */
 #define RPC_TASK_TIMEOUT	0x1000		/* fail with ETIMEDOUT on timeout */
+#define RPC_TASK_NOCONNECT	0x2000		/* return ENOTCONN if not connected */
 
 #define RPC_IS_ASYNC(t)		((t)->tk_flags & RPC_TASK_ASYNC)
 #define RPC_IS_SWAPPER(t)	((t)->tk_flags & RPC_TASK_SWAPPER)
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index c5fd30d..8d4fa82 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -67,6 +67,8 @@
 	swp_entry_t arch_entry;
 
 	BUG_ON(pte_file(pte));
+	if (pte_swp_soft_dirty(pte))
+		pte = pte_swp_clear_soft_dirty(pte);
 	arch_entry = __pte_to_swp_entry(pte);
 	return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
 }
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 4147d70..84662ec 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -802,9 +802,14 @@
 asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, int,
 	       int __user *);
 #else
+#ifdef CONFIG_CLONE_BACKWARDS3
+asmlinkage long sys_clone(unsigned long, unsigned long, int, int __user *,
+			  int __user *, int);
+#else
 asmlinkage long sys_clone(unsigned long, unsigned long, int __user *,
 	       int __user *, int);
 #endif
+#endif
 
 asmlinkage long sys_execve(const char __user *filename,
 		const char __user *const __user *argv,
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 9180f4b..62bd8b7 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -174,10 +174,4 @@
 #endif
 
 
-# ifdef CONFIG_CPU_IDLE_GOV_MENU
-extern void menu_hrtimer_cancel(void);
-# else
-static inline void menu_hrtimer_cancel(void) {}
-# endif /* CONFIG_CPU_IDLE_GOV_MENU */
-
 #endif
diff --git a/include/linux/usb.h b/include/linux/usb.h
index a232b7e..0eec268 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -367,17 +367,6 @@
 
 /* ----------------------------------------------------------------------- */
 
-/* This is arbitrary.
- * From USB 2.0 spec Table 11-13, offset 7, a hub can
- * have up to 255 ports. The most yet reported is 10.
- *
- * Current Wireless USB host hardware (Intel i1480 for example) allows
- * up to 22 devices to connect. Upcoming hardware might raise that
- * limit. Because the arrays need to add a bit for hub status data, we
- * do 31, so plus one evens out to four bytes.
- */
-#define USB_MAXCHILDREN		(31)
-
 struct usb_tt;
 
 enum usb_device_removable {
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index b6b215f..14105c2 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -23,6 +23,7 @@
 	struct uid_gid_map	projid_map;
 	atomic_t		count;
 	struct user_namespace	*parent;
+	int			level;
 	kuid_t			owner;
 	kgid_t			group;
 	unsigned int		proc_inum;
diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index ddb419c..502073a 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -45,7 +45,8 @@
 #if defined(CONFIG_VGA_SWITCHEROO)
 void vga_switcheroo_unregister_client(struct pci_dev *dev);
 int vga_switcheroo_register_client(struct pci_dev *dev,
-				   const struct vga_switcheroo_client_ops *ops);
+				   const struct vga_switcheroo_client_ops *ops,
+				   bool driver_power_control);
 int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 					 const struct vga_switcheroo_client_ops *ops,
 					 int id, bool active);
@@ -60,11 +61,15 @@
 
 int vga_switcheroo_get_client_state(struct pci_dev *dev);
 
+void vga_switcheroo_set_dynamic_switch(struct pci_dev *pdev, enum vga_switcheroo_state dynamic);
+
+int vga_switcheroo_init_domain_pm_ops(struct device *dev, struct dev_pm_domain *domain);
+int vga_switcheroo_init_domain_pm_optimus_hdmi_audio(struct device *dev, struct dev_pm_domain *domain);
 #else
 
 static inline void vga_switcheroo_unregister_client(struct pci_dev *dev) {}
 static inline int vga_switcheroo_register_client(struct pci_dev *dev,
-		const struct vga_switcheroo_client_ops *ops) { return 0; }
+		const struct vga_switcheroo_client_ops *ops, bool driver_power_control) { return 0; }
 static inline void vga_switcheroo_client_fb_set(struct pci_dev *dev, struct fb_info *info) {}
 static inline int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler) { return 0; }
 static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
@@ -74,6 +79,10 @@
 static inline int vga_switcheroo_process_delayed_switch(void) { return 0; }
 static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_ON; }
 
+static inline void vga_switcheroo_set_dynamic_switch(struct pci_dev *pdev, enum vga_switcheroo_state dynamic) {}
+
+static inline int vga_switcheroo_init_domain_pm_ops(struct device *dev, struct dev_pm_domain *domain) { return -EINVAL; }
+static inline int vga_switcheroo_init_domain_pm_optimus_hdmi_audio(struct device *dev, struct dev_pm_domain *domain) { return -EINVAL; }
 
 #endif
 #endif /* _LINUX_VGA_SWITCHEROO_H_ */
diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h
index 76be077..7dc17e2 100644
--- a/include/linux/vmpressure.h
+++ b/include/linux/vmpressure.h
@@ -12,7 +12,7 @@
 	unsigned long scanned;
 	unsigned long reclaimed;
 	/* The lock is used to keep the scanned/reclaimed above in sync. */
-	struct mutex sr_lock;
+	struct spinlock sr_lock;
 
 	/* The list of vmpressure_event structs. */
 	struct list_head events;
@@ -30,6 +30,7 @@
 extern void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio);
 
 extern void vmpressure_init(struct vmpressure *vmpr);
+extern void vmpressure_cleanup(struct vmpressure *vmpr);
 extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg);
 extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr);
 extern struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css);
diff --git a/include/linux/wait.h b/include/linux/wait.h
index f487a47..a67fc16 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -811,6 +811,63 @@
 	__ret;								\
 })
 
+#define __wait_event_interruptible_lock_irq_timeout(wq, condition,	\
+						    lock, ret)		\
+do {									\
+	DEFINE_WAIT(__wait);						\
+									\
+	for (;;) {							\
+		prepare_to_wait(&wq, &__wait, TASK_INTERRUPTIBLE);	\
+		if (condition)						\
+			break;						\
+		if (signal_pending(current)) {				\
+			ret = -ERESTARTSYS;				\
+			break;						\
+		}							\
+		spin_unlock_irq(&lock);					\
+		ret = schedule_timeout(ret);				\
+		spin_lock_irq(&lock);					\
+		if (!ret)						\
+			break;						\
+	}								\
+	finish_wait(&wq, &__wait);					\
+} while (0)
+
+/**
+ * wait_event_interruptible_lock_irq_timeout - sleep until a condition gets true or a timeout elapses.
+ *		The condition is checked under the lock. This is expected
+ *		to be called with the lock taken.
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @lock: a locked spinlock_t, which will be released before schedule()
+ *	  and reacquired afterwards.
+ * @timeout: timeout, in jiffies
+ *
+ * The process is put to sleep (TASK_INTERRUPTIBLE) until the
+ * @condition evaluates to true or signal is received. The @condition is
+ * checked each time the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * This is supposed to be called while holding the lock. The lock is
+ * dropped before going to sleep and is reacquired afterwards.
+ *
+ * The function returns 0 if the @timeout elapsed, -ERESTARTSYS if it
+ * was interrupted by a signal, and the remaining jiffies otherwise
+ * if the condition evaluated to true before the timeout elapsed.
+ */
+#define wait_event_interruptible_lock_irq_timeout(wq, condition, lock,	\
+						  timeout)		\
+({									\
+	int __ret = timeout;						\
+									\
+	if (!(condition))						\
+		__wait_event_interruptible_lock_irq_timeout(		\
+					wq, condition, lock, __ret);	\
+	__ret;								\
+})
+
 
 /*
  * These are the old interfaces to sleep waiting for an event.
diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h
index 7343a27..47ada23 100644
--- a/include/media/v4l2-ctrls.h
+++ b/include/media/v4l2-ctrls.h
@@ -22,6 +22,7 @@
 #define _V4L2_CTRLS_H
 
 #include <linux/list.h>
+#include <linux/mutex.h>
 #include <linux/videodev2.h>
 
 /* forward references */
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index a14339c..8a358a2 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -27,7 +27,7 @@
 #include <linux/netdevice.h>
 #include <net/ip.h>
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 
 struct napi_struct;
 extern unsigned int sysctl_net_busy_read __read_mostly;
@@ -122,7 +122,7 @@
 		if (rc > 0)
 			/* local bh are disabled so it is ok to use _BH */
 			NET_ADD_STATS_BH(sock_net(sk),
-					 LINUX_MIB_LOWLATENCYRXPACKETS, rc);
+					 LINUX_MIB_BUSYPOLLRXPACKETS, rc);
 
 	} while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) &&
 		 !need_resched() && !busy_loop_timeout(end_time));
@@ -146,7 +146,7 @@
 	sk->sk_napi_id = skb->napi_id;
 }
 
-#else /* CONFIG_NET_LL_RX_POLL */
+#else /* CONFIG_NET_RX_BUSY_POLL */
 static inline unsigned long net_busy_loop_on(void)
 {
 	return 0;
@@ -162,11 +162,6 @@
 	return false;
 }
 
-static inline bool sk_busy_poll(struct sock *sk, int nonblock)
-{
-	return false;
-}
-
 static inline void skb_mark_napi_id(struct sk_buff *skb,
 				    struct napi_struct *napi)
 {
@@ -181,5 +176,10 @@
 	return true;
 }
 
-#endif /* CONFIG_NET_LL_RX_POLL */
+static inline bool sk_busy_loop(struct sock *sk, int nonblock)
+{
+	return false;
+}
+
+#endif /* CONFIG_NET_RX_BUSY_POLL */
 #endif /* _LINUX_NET_BUSY_POLL_H */
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 2a601e7..48ec25a 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -300,7 +300,7 @@
 						struct nl_info *info);
 
 extern void			fib6_run_gc(unsigned long expires,
-					    struct net *net);
+					    struct net *net, bool force);
 
 extern void			fib6_gc_cleanup(void);
 
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 260f83f..f667248 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -135,6 +135,8 @@
 extern void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk,
 			       __be32 mtu);
 extern void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark);
+extern void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
+				   u32 mark);
 extern void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk);
 
 struct netlink_callback;
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 781b3cf..a354db5 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -145,20 +145,6 @@
 	return INET_ECN_encapsulate(tos, inner);
 }
 
-static inline void tunnel_ip_select_ident(struct sk_buff *skb,
-					  const struct iphdr  *old_iph,
-					  struct dst_entry *dst)
-{
-	struct iphdr *iph = ip_hdr(skb);
-
-	/* Use inner packet iph-id if possible. */
-	if (skb->protocol == htons(ETH_P_IP) && old_iph->id)
-		iph->id	= old_iph->id;
-	else
-		__ip_select_ident(iph, dst,
-				  (skb_shinfo(skb)->gso_segs ?: 1) - 1);
-}
-
 int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto);
 int iptunnel_xmit(struct net *net, struct rtable *rt,
 		  struct sk_buff *skb,
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 949d775..6fea323 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -119,7 +119,7 @@
  * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may
  * also need a pad of 2.
  */
-static int ndisc_addr_option_pad(unsigned short type)
+static inline int ndisc_addr_option_pad(unsigned short type)
 {
 	switch (type) {
 	case ARPHRD_INFINIBAND: return 2;
diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h
index 0af851c..b64b7bc 100644
--- a/include/net/nfc/hci.h
+++ b/include/net/nfc/hci.h
@@ -59,7 +59,7 @@
 			      struct nfc_target *target);
 	int (*event_received)(struct nfc_hci_dev *hdev, u8 gate, u8 event,
 			      struct sk_buff *skb);
-	int (*fw_upload)(struct nfc_hci_dev *hdev, const char *firmware_name);
+	int (*fw_download)(struct nfc_hci_dev *hdev, const char *firmware_name);
 	int (*discover_se)(struct nfc_hci_dev *dev);
 	int (*enable_se)(struct nfc_hci_dev *dev, u32 se_idx);
 	int (*disable_se)(struct nfc_hci_dev *dev, u32 se_idx);
diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h
index 0e353f1..5f286b7 100644
--- a/include/net/nfc/nfc.h
+++ b/include/net/nfc/nfc.h
@@ -68,7 +68,7 @@
 			     void *cb_context);
 	int (*tm_send)(struct nfc_dev *dev, struct sk_buff *skb);
 	int (*check_presence)(struct nfc_dev *dev, struct nfc_target *target);
-	int (*fw_upload)(struct nfc_dev *dev, const char *firmware_name);
+	int (*fw_download)(struct nfc_dev *dev, const char *firmware_name);
 
 	/* Secure Element API */
 	int (*discover_se)(struct nfc_dev *dev);
@@ -127,7 +127,7 @@
 	int targets_generation;
 	struct device dev;
 	bool dev_up;
-	bool fw_upload_in_progress;
+	bool fw_download_in_progress;
 	u8 rf_mode;
 	bool polling;
 	struct nfc_target *active_target;
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 6eab6336..e5ae0c5 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -683,13 +683,19 @@
 	u64	rate_bytes_ps; /* bytes per second */
 	u32	mult;
 	u16	overhead;
+	u8	linklayer;
 	u8	shift;
 };
 
 static inline u64 psched_l2t_ns(const struct psched_ratecfg *r,
 				unsigned int len)
 {
-	return ((u64)(len + r->overhead) * r->mult) >> r->shift;
+	len += r->overhead;
+
+	if (unlikely(r->linklayer == TC_LINKLAYER_ATM))
+		return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift;
+
+	return ((u64)len * r->mult) >> r->shift;
 }
 
 extern void psched_ratecfg_precompute(struct psched_ratecfg *r, const struct tc_ratespec *conf);
@@ -700,6 +706,7 @@
 	memset(res, 0, sizeof(*res));
 	res->rate = r->rate_bytes_ps;
 	res->overhead = r->overhead;
+	res->linklayer = (r->linklayer & TC_LINKLAYER_MASK);
 }
 
 #endif
diff --git a/include/net/sock.h b/include/net/sock.h
index 95a5a2c..31d5cfb 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -327,7 +327,7 @@
 #ifdef CONFIG_RPS
 	__u32			sk_rxhash;
 #endif
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 	unsigned int		sk_napi_id;
 	unsigned int		sk_ll_usec;
 #endif
diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h
index 3cc5a0b..5ebda97 100644
--- a/include/trace/events/bcache.h
+++ b/include/trace/events/bcache.h
@@ -9,9 +9,7 @@
 struct search;
 
 DECLARE_EVENT_CLASS(bcache_request,
-
 	TP_PROTO(struct search *s, struct bio *bio),
-
 	TP_ARGS(s, bio),
 
 	TP_STRUCT__entry(
@@ -22,7 +20,6 @@
 		__field(dev_t,		orig_sector		)
 		__field(unsigned int,	nr_sector		)
 		__array(char,		rwbs,	6		)
-		__array(char,		comm,	TASK_COMM_LEN	)
 	),
 
 	TP_fast_assign(
@@ -33,36 +30,66 @@
 		__entry->orig_sector	= bio->bi_sector - 16;
 		__entry->nr_sector	= bio->bi_size >> 9;
 		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
-		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
 
-	TP_printk("%d,%d %s %llu + %u [%s] (from %d,%d @ %llu)",
+	TP_printk("%d,%d %s %llu + %u (from %d,%d @ %llu)",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->rwbs,
-		  (unsigned long long)__entry->sector,
-		  __entry->nr_sector, __entry->comm,
-		  __entry->orig_major, __entry->orig_minor,
+		  __entry->rwbs, (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->orig_major, __entry->orig_minor,
 		  (unsigned long long)__entry->orig_sector)
 );
 
+DECLARE_EVENT_CLASS(bkey,
+	TP_PROTO(struct bkey *k),
+	TP_ARGS(k),
+
+	TP_STRUCT__entry(
+		__field(u32,	size				)
+		__field(u32,	inode				)
+		__field(u64,	offset				)
+		__field(bool,	dirty				)
+	),
+
+	TP_fast_assign(
+		__entry->inode	= KEY_INODE(k);
+		__entry->offset	= KEY_OFFSET(k);
+		__entry->size	= KEY_SIZE(k);
+		__entry->dirty	= KEY_DIRTY(k);
+	),
+
+	TP_printk("%u:%llu len %u dirty %u", __entry->inode,
+		  __entry->offset, __entry->size, __entry->dirty)
+);
+
+DECLARE_EVENT_CLASS(btree_node,
+	TP_PROTO(struct btree *b),
+	TP_ARGS(b),
+
+	TP_STRUCT__entry(
+		__field(size_t,		bucket			)
+	),
+
+	TP_fast_assign(
+		__entry->bucket	= PTR_BUCKET_NR(b->c, &b->key, 0);
+	),
+
+	TP_printk("bucket %zu", __entry->bucket)
+);
+
+/* request.c */
+
 DEFINE_EVENT(bcache_request, bcache_request_start,
-
 	TP_PROTO(struct search *s, struct bio *bio),
-
 	TP_ARGS(s, bio)
 );
 
 DEFINE_EVENT(bcache_request, bcache_request_end,
-
 	TP_PROTO(struct search *s, struct bio *bio),
-
 	TP_ARGS(s, bio)
 );
 
 DECLARE_EVENT_CLASS(bcache_bio,
-
 	TP_PROTO(struct bio *bio),
-
 	TP_ARGS(bio),
 
 	TP_STRUCT__entry(
@@ -70,7 +97,6 @@
 		__field(sector_t,	sector			)
 		__field(unsigned int,	nr_sector		)
 		__array(char,		rwbs,	6		)
-		__array(char,		comm,	TASK_COMM_LEN	)
 	),
 
 	TP_fast_assign(
@@ -78,191 +104,328 @@
 		__entry->sector		= bio->bi_sector;
 		__entry->nr_sector	= bio->bi_size >> 9;
 		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
-		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
 
-	TP_printk("%d,%d  %s %llu + %u [%s]",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->rwbs,
-		  (unsigned long long)__entry->sector,
-		  __entry->nr_sector, __entry->comm)
+	TP_printk("%d,%d  %s %llu + %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  (unsigned long long)__entry->sector, __entry->nr_sector)
 );
 
-
-DEFINE_EVENT(bcache_bio, bcache_passthrough,
-
+DEFINE_EVENT(bcache_bio, bcache_bypass_sequential,
 	TP_PROTO(struct bio *bio),
-
 	TP_ARGS(bio)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_cache_hit,
-
+DEFINE_EVENT(bcache_bio, bcache_bypass_congested,
 	TP_PROTO(struct bio *bio),
-
 	TP_ARGS(bio)
 );
 
-DEFINE_EVENT(bcache_bio, bcache_cache_miss,
-
-	TP_PROTO(struct bio *bio),
-
-	TP_ARGS(bio)
-);
-
-DEFINE_EVENT(bcache_bio, bcache_read_retry,
-
-	TP_PROTO(struct bio *bio),
-
-	TP_ARGS(bio)
-);
-
-DEFINE_EVENT(bcache_bio, bcache_writethrough,
-
-	TP_PROTO(struct bio *bio),
-
-	TP_ARGS(bio)
-);
-
-DEFINE_EVENT(bcache_bio, bcache_writeback,
-
-	TP_PROTO(struct bio *bio),
-
-	TP_ARGS(bio)
-);
-
-DEFINE_EVENT(bcache_bio, bcache_write_skip,
-
-	TP_PROTO(struct bio *bio),
-
-	TP_ARGS(bio)
-);
-
-DEFINE_EVENT(bcache_bio, bcache_btree_read,
-
-	TP_PROTO(struct bio *bio),
-
-	TP_ARGS(bio)
-);
-
-DEFINE_EVENT(bcache_bio, bcache_btree_write,
-
-	TP_PROTO(struct bio *bio),
-
-	TP_ARGS(bio)
-);
-
-DEFINE_EVENT(bcache_bio, bcache_write_dirty,
-
-	TP_PROTO(struct bio *bio),
-
-	TP_ARGS(bio)
-);
-
-DEFINE_EVENT(bcache_bio, bcache_read_dirty,
-
-	TP_PROTO(struct bio *bio),
-
-	TP_ARGS(bio)
-);
-
-DEFINE_EVENT(bcache_bio, bcache_write_moving,
-
-	TP_PROTO(struct bio *bio),
-
-	TP_ARGS(bio)
-);
-
-DEFINE_EVENT(bcache_bio, bcache_read_moving,
-
-	TP_PROTO(struct bio *bio),
-
-	TP_ARGS(bio)
-);
-
-DEFINE_EVENT(bcache_bio, bcache_journal_write,
-
-	TP_PROTO(struct bio *bio),
-
-	TP_ARGS(bio)
-);
-
-DECLARE_EVENT_CLASS(bcache_cache_bio,
-
-	TP_PROTO(struct bio *bio,
-		 sector_t orig_sector,
-		 struct block_device* orig_bdev),
-
-	TP_ARGS(bio, orig_sector, orig_bdev),
+TRACE_EVENT(bcache_read,
+	TP_PROTO(struct bio *bio, bool hit, bool bypass),
+	TP_ARGS(bio, hit, bypass),
 
 	TP_STRUCT__entry(
 		__field(dev_t,		dev			)
-		__field(dev_t,		orig_dev		)
 		__field(sector_t,	sector			)
-		__field(sector_t,	orig_sector		)
 		__field(unsigned int,	nr_sector		)
 		__array(char,		rwbs,	6		)
-		__array(char,		comm,	TASK_COMM_LEN	)
+		__field(bool,		cache_hit		)
+		__field(bool,		bypass			)
 	),
 
 	TP_fast_assign(
 		__entry->dev		= bio->bi_bdev->bd_dev;
-		__entry->orig_dev	= orig_bdev->bd_dev;
 		__entry->sector		= bio->bi_sector;
-		__entry->orig_sector	= orig_sector;
 		__entry->nr_sector	= bio->bi_size >> 9;
 		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
-		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+		__entry->cache_hit = hit;
+		__entry->bypass = bypass;
 	),
 
-	TP_printk("%d,%d  %s %llu + %u [%s] (from %d,%d %llu)",
+	TP_printk("%d,%d  %s %llu + %u hit %u bypass %u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->rwbs,
-		  (unsigned long long)__entry->sector,
-		  __entry->nr_sector, __entry->comm,
-		  MAJOR(__entry->orig_dev), MINOR(__entry->orig_dev),
-		  (unsigned long long)__entry->orig_sector)
+		  __entry->rwbs, (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->cache_hit, __entry->bypass)
 );
 
-DEFINE_EVENT(bcache_cache_bio, bcache_cache_insert,
-
-	TP_PROTO(struct bio *bio,
-		 sector_t orig_sector,
-		 struct block_device *orig_bdev),
-
-	TP_ARGS(bio, orig_sector, orig_bdev)
-);
-
-DECLARE_EVENT_CLASS(bcache_gc,
-
-	TP_PROTO(uint8_t *uuid),
-
-	TP_ARGS(uuid),
+TRACE_EVENT(bcache_write,
+	TP_PROTO(struct bio *bio, bool writeback, bool bypass),
+	TP_ARGS(bio, writeback, bypass),
 
 	TP_STRUCT__entry(
-		__field(uint8_t *,	uuid)
+		__field(dev_t,		dev			)
+		__field(sector_t,	sector			)
+		__field(unsigned int,	nr_sector		)
+		__array(char,		rwbs,	6		)
+		__field(bool,		writeback		)
+		__field(bool,		bypass			)
 	),
 
 	TP_fast_assign(
-		__entry->uuid		= uuid;
+		__entry->dev		= bio->bi_bdev->bd_dev;
+		__entry->sector		= bio->bi_sector;
+		__entry->nr_sector	= bio->bi_size >> 9;
+		blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+		__entry->writeback = writeback;
+		__entry->bypass = bypass;
+	),
+
+	TP_printk("%d,%d  %s %llu + %u hit %u bypass %u",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->rwbs, (unsigned long long)__entry->sector,
+		  __entry->nr_sector, __entry->writeback, __entry->bypass)
+);
+
+DEFINE_EVENT(bcache_bio, bcache_read_retry,
+	TP_PROTO(struct bio *bio),
+	TP_ARGS(bio)
+);
+
+DEFINE_EVENT(bkey, bcache_cache_insert,
+	TP_PROTO(struct bkey *k),
+	TP_ARGS(k)
+);
+
+/* Journal */
+
+DECLARE_EVENT_CLASS(cache_set,
+	TP_PROTO(struct cache_set *c),
+	TP_ARGS(c),
+
+	TP_STRUCT__entry(
+		__array(char,		uuid,	16 )
+	),
+
+	TP_fast_assign(
+		memcpy(__entry->uuid, c->sb.set_uuid, 16);
 	),
 
 	TP_printk("%pU", __entry->uuid)
 );
 
-
-DEFINE_EVENT(bcache_gc, bcache_gc_start,
-
-	     TP_PROTO(uint8_t *uuid),
-
-	     TP_ARGS(uuid)
+DEFINE_EVENT(bkey, bcache_journal_replay_key,
+	TP_PROTO(struct bkey *k),
+	TP_ARGS(k)
 );
 
-DEFINE_EVENT(bcache_gc, bcache_gc_end,
+DEFINE_EVENT(cache_set, bcache_journal_full,
+	TP_PROTO(struct cache_set *c),
+	TP_ARGS(c)
+);
 
-	     TP_PROTO(uint8_t *uuid),
+DEFINE_EVENT(cache_set, bcache_journal_entry_full,
+	TP_PROTO(struct cache_set *c),
+	TP_ARGS(c)
+);
 
-	     TP_ARGS(uuid)
+DEFINE_EVENT(bcache_bio, bcache_journal_write,
+	TP_PROTO(struct bio *bio),
+	TP_ARGS(bio)
+);
+
+/* Btree */
+
+DEFINE_EVENT(cache_set, bcache_btree_cache_cannibalize,
+	TP_PROTO(struct cache_set *c),
+	TP_ARGS(c)
+);
+
+DEFINE_EVENT(btree_node, bcache_btree_read,
+	TP_PROTO(struct btree *b),
+	TP_ARGS(b)
+);
+
+TRACE_EVENT(bcache_btree_write,
+	TP_PROTO(struct btree *b),
+	TP_ARGS(b),
+
+	TP_STRUCT__entry(
+		__field(size_t,		bucket			)
+		__field(unsigned,	block			)
+		__field(unsigned,	keys			)
+	),
+
+	TP_fast_assign(
+		__entry->bucket	= PTR_BUCKET_NR(b->c, &b->key, 0);
+		__entry->block	= b->written;
+		__entry->keys	= b->sets[b->nsets].data->keys;
+	),
+
+	TP_printk("bucket %zu", __entry->bucket)
+);
+
+DEFINE_EVENT(btree_node, bcache_btree_node_alloc,
+	TP_PROTO(struct btree *b),
+	TP_ARGS(b)
+);
+
+DEFINE_EVENT(btree_node, bcache_btree_node_alloc_fail,
+	TP_PROTO(struct btree *b),
+	TP_ARGS(b)
+);
+
+DEFINE_EVENT(btree_node, bcache_btree_node_free,
+	TP_PROTO(struct btree *b),
+	TP_ARGS(b)
+);
+
+TRACE_EVENT(bcache_btree_gc_coalesce,
+	TP_PROTO(unsigned nodes),
+	TP_ARGS(nodes),
+
+	TP_STRUCT__entry(
+		__field(unsigned,	nodes			)
+	),
+
+	TP_fast_assign(
+		__entry->nodes	= nodes;
+	),
+
+	TP_printk("coalesced %u nodes", __entry->nodes)
+);
+
+DEFINE_EVENT(cache_set, bcache_gc_start,
+	TP_PROTO(struct cache_set *c),
+	TP_ARGS(c)
+);
+
+DEFINE_EVENT(cache_set, bcache_gc_end,
+	TP_PROTO(struct cache_set *c),
+	TP_ARGS(c)
+);
+
+DEFINE_EVENT(bkey, bcache_gc_copy,
+	TP_PROTO(struct bkey *k),
+	TP_ARGS(k)
+);
+
+DEFINE_EVENT(bkey, bcache_gc_copy_collision,
+	TP_PROTO(struct bkey *k),
+	TP_ARGS(k)
+);
+
+TRACE_EVENT(bcache_btree_insert_key,
+	TP_PROTO(struct btree *b, struct bkey *k, unsigned op, unsigned status),
+	TP_ARGS(b, k, op, status),
+
+	TP_STRUCT__entry(
+		__field(u64,	btree_node			)
+		__field(u32,	btree_level			)
+		__field(u32,	inode				)
+		__field(u64,	offset				)
+		__field(u32,	size				)
+		__field(u8,	dirty				)
+		__field(u8,	op				)
+		__field(u8,	status				)
+	),
+
+	TP_fast_assign(
+		__entry->btree_node = PTR_BUCKET_NR(b->c, &b->key, 0);
+		__entry->btree_level = b->level;
+		__entry->inode	= KEY_INODE(k);
+		__entry->offset	= KEY_OFFSET(k);
+		__entry->size	= KEY_SIZE(k);
+		__entry->dirty	= KEY_DIRTY(k);
+		__entry->op = op;
+		__entry->status = status;
+	),
+
+	TP_printk("%u for %u at %llu(%u): %u:%llu len %u dirty %u",
+		  __entry->status, __entry->op,
+		  __entry->btree_node, __entry->btree_level,
+		  __entry->inode, __entry->offset,
+		  __entry->size, __entry->dirty)
+);
+
+DECLARE_EVENT_CLASS(btree_split,
+	TP_PROTO(struct btree *b, unsigned keys),
+	TP_ARGS(b, keys),
+
+	TP_STRUCT__entry(
+		__field(size_t,		bucket			)
+		__field(unsigned,	keys			)
+	),
+
+	TP_fast_assign(
+		__entry->bucket	= PTR_BUCKET_NR(b->c, &b->key, 0);
+		__entry->keys	= keys;
+	),
+
+	TP_printk("bucket %zu keys %u", __entry->bucket, __entry->keys)
+);
+
+DEFINE_EVENT(btree_split, bcache_btree_node_split,
+	TP_PROTO(struct btree *b, unsigned keys),
+	TP_ARGS(b, keys)
+);
+
+DEFINE_EVENT(btree_split, bcache_btree_node_compact,
+	TP_PROTO(struct btree *b, unsigned keys),
+	TP_ARGS(b, keys)
+);
+
+DEFINE_EVENT(btree_node, bcache_btree_set_root,
+	TP_PROTO(struct btree *b),
+	TP_ARGS(b)
+);
+
+/* Allocator */
+
+TRACE_EVENT(bcache_alloc_invalidate,
+	TP_PROTO(struct cache *ca),
+	TP_ARGS(ca),
+
+	TP_STRUCT__entry(
+		__field(unsigned,	free			)
+		__field(unsigned,	free_inc		)
+		__field(unsigned,	free_inc_size		)
+		__field(unsigned,	unused			)
+	),
+
+	TP_fast_assign(
+		__entry->free		= fifo_used(&ca->free);
+		__entry->free_inc	= fifo_used(&ca->free_inc);
+		__entry->free_inc_size	= ca->free_inc.size;
+		__entry->unused		= fifo_used(&ca->unused);
+	),
+
+	TP_printk("free %u free_inc %u/%u unused %u", __entry->free,
+		  __entry->free_inc, __entry->free_inc_size, __entry->unused)
+);
+
+TRACE_EVENT(bcache_alloc_fail,
+	TP_PROTO(struct cache *ca),
+	TP_ARGS(ca),
+
+	TP_STRUCT__entry(
+		__field(unsigned,	free			)
+		__field(unsigned,	free_inc		)
+		__field(unsigned,	unused			)
+		__field(unsigned,	blocked			)
+	),
+
+	TP_fast_assign(
+		__entry->free		= fifo_used(&ca->free);
+		__entry->free_inc	= fifo_used(&ca->free_inc);
+		__entry->unused		= fifo_used(&ca->unused);
+		__entry->blocked	= atomic_read(&ca->set->prio_blocked);
+	),
+
+	TP_printk("free %u free_inc %u unused %u blocked %u", __entry->free,
+		  __entry->free_inc, __entry->unused, __entry->blocked)
+);
+
+/* Background writeback */
+
+DEFINE_EVENT(bkey, bcache_writeback,
+	TP_PROTO(struct bkey *k),
+	TP_ARGS(k)
+);
+
+DEFINE_EVENT(bkey, bcache_writeback_collision,
+	TP_PROTO(struct bkey *k),
+	TP_ARGS(k)
 );
 
 #endif /* _TRACE_BCACHE_H */
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index d615f78..41a6643 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -670,10 +670,6 @@
 			     sizeof(u64));				\
 	__entry_size -= sizeof(u32);					\
 									\
-	if (WARN_ONCE(__entry_size > PERF_MAX_TRACE_SIZE,		\
-		      "profile buffer not large enough"))		\
-		return;							\
-									\
 	entry = (struct ftrace_raw_##call *)perf_trace_buf_prepare(	\
 		__entry_size, event_call->event.type, &__regs, &rctx);	\
 	if (!entry)							\
diff --git a/include/uapi/drm/Kbuild b/include/uapi/drm/Kbuild
index 119487e..2d9a25d 100644
--- a/include/uapi/drm/Kbuild
+++ b/include/uapi/drm/Kbuild
@@ -16,3 +16,4 @@
 header-y += tegra_drm.h
 header-y += via_drm.h
 header-y += vmwgfx_drm.h
+header-y += msm_drm.h
diff --git a/include/uapi/drm/drm.h b/include/uapi/drm/drm.h
index 238a166..ece8678 100644
--- a/include/uapi/drm/drm.h
+++ b/include/uapi/drm/drm.h
@@ -181,7 +181,7 @@
 	_DRM_AGP = 3,		  /**< AGP/GART */
 	_DRM_SCATTER_GATHER = 4,  /**< Scatter/gather memory for PCI DMA */
 	_DRM_CONSISTENT = 5,	  /**< Consistent memory for PCI DMA */
-	_DRM_GEM = 6,		  /**< GEM object */
+	_DRM_GEM = 6,		  /**< GEM object (obsolete) */
 };
 
 /**
@@ -780,6 +780,7 @@
 #define DRM_CAP_DUMB_PREFER_SHADOW 0x4
 #define DRM_CAP_PRIME 0x5
 #define DRM_CAP_TIMESTAMP_MONOTONIC 0x6
+#define DRM_CAP_ASYNC_PAGE_FLIP 0x7
 
 #define DRM_PRIME_CAP_IMPORT 0x1
 #define DRM_PRIME_CAP_EXPORT 0x2
diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h
index 53db7ce..5508117 100644
--- a/include/uapi/drm/drm_mode.h
+++ b/include/uapi/drm/drm_mode.h
@@ -412,7 +412,8 @@
 };
 
 #define DRM_MODE_PAGE_FLIP_EVENT 0x01
-#define DRM_MODE_PAGE_FLIP_FLAGS DRM_MODE_PAGE_FLIP_EVENT
+#define DRM_MODE_PAGE_FLIP_ASYNC 0x02
+#define DRM_MODE_PAGE_FLIP_FLAGS (DRM_MODE_PAGE_FLIP_EVENT|DRM_MODE_PAGE_FLIP_ASYNC)
 
 /*
  * Request a page flip on the specified crtc.
@@ -426,11 +427,14 @@
  * flip is already pending as the ioctl is called, EBUSY will be
  * returned.
  *
- * The ioctl supports one flag, DRM_MODE_PAGE_FLIP_EVENT, which will
- * request that drm sends back a vblank event (see drm.h: struct
- * drm_event_vblank) when the page flip is done.  The user_data field
- * passed in with this ioctl will be returned as the user_data field
- * in the vblank event struct.
+ * Flag DRM_MODE_PAGE_FLIP_EVENT requests that drm sends back a vblank
+ * event (see drm.h: struct drm_event_vblank) when the page flip is
+ * done.  The user_data field passed in with this ioctl will be
+ * returned as the user_data field in the vblank event struct.
+ *
+ * Flag DRM_MODE_PAGE_FLIP_ASYNC requests that the flip happen
+ * 'as soon as possible', meaning that it not delay waiting for vblank.
+ * This may cause tearing on the screen.
  *
  * The reserved field must be zero until we figure out something
  * clever to use it for.
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 923ed7f..55bb572 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -33,6 +33,30 @@
  * subject to backwards-compatibility constraints.
  */
 
+/**
+ * DOC: uevents generated by i915 on it's device node
+ *
+ * I915_L3_PARITY_UEVENT - Generated when the driver receives a parity mismatch
+ *	event from the gpu l3 cache. Additional information supplied is ROW,
+ *	BANK, SUBBANK of the affected cacheline. Userspace should keep track of
+ *	these events and if a specific cache-line seems to have a persistent
+ *	error remap it with the l3 remapping tool supplied in intel-gpu-tools.
+ *	The value supplied with the event is always 1.
+ *
+ * I915_ERROR_UEVENT - Generated upon error detection, currently only via
+ *	hangcheck. The error detection event is a good indicator of when things
+ *	began to go badly. The value supplied with the event is a 1 upon error
+ *	detection, and a 0 upon reset completion, signifying no more error
+ *	exists. NOTE: Disabling hangcheck or reset via module parameter will
+ *	cause the related events to not be seen.
+ *
+ * I915_RESET_UEVENT - Event is generated just before an attempt to reset the
+ *	the GPU. The value supplied with the event is always 1. NOTE: Disable
+ *	reset via module parameter will cause this event to not be seen.
+ */
+#define I915_L3_PARITY_UEVENT		"L3_PARITY_ERROR"
+#define I915_ERROR_UEVENT		"ERROR"
+#define I915_RESET_UEVENT		"RESET"
 
 /* Each region is a minimum of 16k, and there are at most 255 of them.
  */
@@ -310,6 +334,7 @@
 #define I915_PARAM_HAS_PINNED_BATCHES	 24
 #define I915_PARAM_HAS_EXEC_NO_RELOC	 25
 #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
+#define I915_PARAM_HAS_WT     	 	 27
 
 typedef struct drm_i915_getparam {
 	int param;
@@ -744,8 +769,32 @@
 	__u32 busy;
 };
 
+/**
+ * I915_CACHING_NONE
+ *
+ * GPU access is not coherent with cpu caches. Default for machines without an
+ * LLC.
+ */
 #define I915_CACHING_NONE		0
+/**
+ * I915_CACHING_CACHED
+ *
+ * GPU access is coherent with cpu caches and furthermore the data is cached in
+ * last-level caches shared between cpu cores and the gpu GT. Default on
+ * machines with HAS_LLC.
+ */
 #define I915_CACHING_CACHED		1
+/**
+ * I915_CACHING_DISPLAY
+ *
+ * Special GPU caching mode which is coherent with the scanout engines.
+ * Transparently falls back to I915_CACHING_NONE on platforms where no special
+ * cache mode (like write-through or gfdt flushing) is available. The kernel
+ * automatically sets this mode when using a buffer as a scanout target.
+ * Userspace can manually set this mode to avoid a costly stall and clflush in
+ * the hotpath of drawing the first frame.
+ */
+#define I915_CACHING_DISPLAY		2
 
 struct drm_i915_gem_caching {
 	/**
diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
new file mode 100644
index 0000000..d3c6207
--- /dev/null
+++ b/include/uapi/drm/msm_drm.h
@@ -0,0 +1,207 @@
+/*
+ * Copyright (C) 2013 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __MSM_DRM_H__
+#define __MSM_DRM_H__
+
+#include <stddef.h>
+#include <drm/drm.h>
+
+/* Please note that modifications to all structs defined here are
+ * subject to backwards-compatibility constraints:
+ *  1) Do not use pointers, use uint64_t instead for 32 bit / 64 bit
+ *     user/kernel compatibility
+ *  2) Keep fields aligned to their size
+ *  3) Because of how drm_ioctl() works, we can add new fields at
+ *     the end of an ioctl if some care is taken: drm_ioctl() will
+ *     zero out the new fields at the tail of the ioctl, so a zero
+ *     value should have a backwards compatible meaning.  And for
+ *     output params, userspace won't see the newly added output
+ *     fields.. so that has to be somehow ok.
+ */
+
+#define MSM_PIPE_NONE        0x00
+#define MSM_PIPE_2D0         0x01
+#define MSM_PIPE_2D1         0x02
+#define MSM_PIPE_3D0         0x10
+
+/* timeouts are specified in clock-monotonic absolute times (to simplify
+ * restarting interrupted ioctls).  The following struct is logically the
+ * same as 'struct timespec' but 32/64b ABI safe.
+ */
+struct drm_msm_timespec {
+	int64_t tv_sec;          /* seconds */
+	int64_t tv_nsec;         /* nanoseconds */
+};
+
+#define MSM_PARAM_GPU_ID     0x01
+#define MSM_PARAM_GMEM_SIZE  0x02
+
+struct drm_msm_param {
+	uint32_t pipe;           /* in, MSM_PIPE_x */
+	uint32_t param;          /* in, MSM_PARAM_x */
+	uint64_t value;          /* out (get_param) or in (set_param) */
+};
+
+/*
+ * GEM buffers:
+ */
+
+#define MSM_BO_SCANOUT       0x00000001     /* scanout capable */
+#define MSM_BO_GPU_READONLY  0x00000002
+#define MSM_BO_CACHE_MASK    0x000f0000
+/* cache modes */
+#define MSM_BO_CACHED        0x00010000
+#define MSM_BO_WC            0x00020000
+#define MSM_BO_UNCACHED      0x00040000
+
+struct drm_msm_gem_new {
+	uint64_t size;           /* in */
+	uint32_t flags;          /* in, mask of MSM_BO_x */
+	uint32_t handle;         /* out */
+};
+
+struct drm_msm_gem_info {
+	uint32_t handle;         /* in */
+	uint32_t pad;
+	uint64_t offset;         /* out, offset to pass to mmap() */
+};
+
+#define MSM_PREP_READ        0x01
+#define MSM_PREP_WRITE       0x02
+#define MSM_PREP_NOSYNC      0x04
+
+struct drm_msm_gem_cpu_prep {
+	uint32_t handle;         /* in */
+	uint32_t op;             /* in, mask of MSM_PREP_x */
+	struct drm_msm_timespec timeout;   /* in */
+};
+
+struct drm_msm_gem_cpu_fini {
+	uint32_t handle;         /* in */
+};
+
+/*
+ * Cmdstream Submission:
+ */
+
+/* The value written into the cmdstream is logically:
+ *
+ *   ((relocbuf->gpuaddr + reloc_offset) << shift) | or
+ *
+ * When we have GPU's w/ >32bit ptrs, it should be possible to deal
+ * with this by emit'ing two reloc entries with appropriate shift
+ * values.  Or a new MSM_SUBMIT_CMD_x type would also be an option.
+ *
+ * NOTE that reloc's must be sorted by order of increasing submit_offset,
+ * otherwise EINVAL.
+ */
+struct drm_msm_gem_submit_reloc {
+	uint32_t submit_offset;  /* in, offset from submit_bo */
+	uint32_t or;             /* in, value OR'd with result */
+	int32_t  shift;          /* in, amount of left shift (can be negative) */
+	uint32_t reloc_idx;      /* in, index of reloc_bo buffer */
+	uint64_t reloc_offset;   /* in, offset from start of reloc_bo */
+};
+
+/* submit-types:
+ *   BUF - this cmd buffer is executed normally.
+ *   IB_TARGET_BUF - this cmd buffer is an IB target.  Reloc's are
+ *      processed normally, but the kernel does not setup an IB to
+ *      this buffer in the first-level ringbuffer
+ *   CTX_RESTORE_BUF - only executed if there has been a GPU context
+ *      switch since the last SUBMIT ioctl
+ */
+#define MSM_SUBMIT_CMD_BUF             0x0001
+#define MSM_SUBMIT_CMD_IB_TARGET_BUF   0x0002
+#define MSM_SUBMIT_CMD_CTX_RESTORE_BUF 0x0003
+struct drm_msm_gem_submit_cmd {
+	uint32_t type;           /* in, one of MSM_SUBMIT_CMD_x */
+	uint32_t submit_idx;     /* in, index of submit_bo cmdstream buffer */
+	uint32_t submit_offset;  /* in, offset into submit_bo */
+	uint32_t size;           /* in, cmdstream size */
+	uint32_t pad;
+	uint32_t nr_relocs;      /* in, number of submit_reloc's */
+	uint64_t __user relocs;  /* in, ptr to array of submit_reloc's */
+};
+
+/* Each buffer referenced elsewhere in the cmdstream submit (ie. the
+ * cmdstream buffer(s) themselves or reloc entries) has one (and only
+ * one) entry in the submit->bos[] table.
+ *
+ * As a optimization, the current buffer (gpu virtual address) can be
+ * passed back through the 'presumed' field.  If on a subsequent reloc,
+ * userspace passes back a 'presumed' address that is still valid,
+ * then patching the cmdstream for this entry is skipped.  This can
+ * avoid kernel needing to map/access the cmdstream bo in the common
+ * case.
+ */
+#define MSM_SUBMIT_BO_READ             0x0001
+#define MSM_SUBMIT_BO_WRITE            0x0002
+struct drm_msm_gem_submit_bo {
+	uint32_t flags;          /* in, mask of MSM_SUBMIT_BO_x */
+	uint32_t handle;         /* in, GEM handle */
+	uint64_t presumed;       /* in/out, presumed buffer address */
+};
+
+/* Each cmdstream submit consists of a table of buffers involved, and
+ * one or more cmdstream buffers.  This allows for conditional execution
+ * (context-restore), and IB buffers needed for per tile/bin draw cmds.
+ */
+struct drm_msm_gem_submit {
+	uint32_t pipe;           /* in, MSM_PIPE_x */
+	uint32_t fence;          /* out */
+	uint32_t nr_bos;         /* in, number of submit_bo's */
+	uint32_t nr_cmds;        /* in, number of submit_cmd's */
+	uint64_t __user bos;     /* in, ptr to array of submit_bo's */
+	uint64_t __user cmds;    /* in, ptr to array of submit_cmd's */
+};
+
+/* The normal way to synchronize with the GPU is just to CPU_PREP on
+ * a buffer if you need to access it from the CPU (other cmdstream
+ * submission from same or other contexts, PAGE_FLIP ioctl, etc, all
+ * handle the required synchronization under the hood).  This ioctl
+ * mainly just exists as a way to implement the gallium pipe_fence
+ * APIs without requiring a dummy bo to synchronize on.
+ */
+struct drm_msm_wait_fence {
+	uint32_t fence;          /* in */
+	uint32_t pad;
+	struct drm_msm_timespec timeout;   /* in */
+};
+
+#define DRM_MSM_GET_PARAM              0x00
+/* placeholder:
+#define DRM_MSM_SET_PARAM              0x01
+ */
+#define DRM_MSM_GEM_NEW                0x02
+#define DRM_MSM_GEM_INFO               0x03
+#define DRM_MSM_GEM_CPU_PREP           0x04
+#define DRM_MSM_GEM_CPU_FINI           0x05
+#define DRM_MSM_GEM_SUBMIT             0x06
+#define DRM_MSM_WAIT_FENCE             0x07
+#define DRM_MSM_NUM_IOCTLS             0x08
+
+#define DRM_IOCTL_MSM_GET_PARAM        DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GET_PARAM, struct drm_msm_param)
+#define DRM_IOCTL_MSM_GEM_NEW          DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_NEW, struct drm_msm_gem_new)
+#define DRM_IOCTL_MSM_GEM_INFO         DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_INFO, struct drm_msm_gem_info)
+#define DRM_IOCTL_MSM_GEM_CPU_PREP     DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_GEM_CPU_PREP, struct drm_msm_gem_cpu_prep)
+#define DRM_IOCTL_MSM_GEM_CPU_FINI     DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_GEM_CPU_FINI, struct drm_msm_gem_cpu_fini)
+#define DRM_IOCTL_MSM_GEM_SUBMIT       DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_GEM_SUBMIT, struct drm_msm_gem_submit)
+#define DRM_IOCTL_MSM_WAIT_FENCE       DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_WAIT_FENCE, struct drm_msm_wait_fence)
+
+#endif /* __MSM_DRM_H__ */
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index 321d4ac..fa8b3ad 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -979,6 +979,8 @@
 #define RADEON_INFO_RING_WORKING	0x15
 /* SI tile mode array */
 #define RADEON_INFO_SI_TILE_MODE_ARRAY	0x16
+/* query if CP DMA is supported on the compute ring */
+#define RADEON_INFO_SI_CP_DMA_COMPUTE	0x17
 
 
 struct drm_radeon_info {
diff --git a/include/uapi/linux/firewire-cdev.h b/include/uapi/linux/firewire-cdev.h
index d500369..1db453e 100644
--- a/include/uapi/linux/firewire-cdev.h
+++ b/include/uapi/linux/firewire-cdev.h
@@ -215,8 +215,8 @@
  * with the %FW_CDEV_ISO_INTERRUPT bit set, when explicitly requested with
  * %FW_CDEV_IOC_FLUSH_ISO, or when there have been so many completed packets
  * without the interrupt bit set that the kernel's internal buffer for @header
- * is about to overflow.  (In the last case, kernels with ABI version < 5 drop
- * header data up to the next interrupt packet.)
+ * is about to overflow.  (In the last case, ABI versions < 5 drop header data
+ * up to the next interrupt packet.)
  *
  * Isochronous transmit events (context type %FW_CDEV_ISO_CONTEXT_TRANSMIT):
  *
diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h
index 6cf06bf..2fee45b 100644
--- a/include/uapi/linux/ip.h
+++ b/include/uapi/linux/ip.h
@@ -133,4 +133,38 @@
 	__u8 reserved;
 };
 
+/* index values for the variables in ipv4_devconf */
+enum
+{
+	IPV4_DEVCONF_FORWARDING=1,
+	IPV4_DEVCONF_MC_FORWARDING,
+	IPV4_DEVCONF_PROXY_ARP,
+	IPV4_DEVCONF_ACCEPT_REDIRECTS,
+	IPV4_DEVCONF_SECURE_REDIRECTS,
+	IPV4_DEVCONF_SEND_REDIRECTS,
+	IPV4_DEVCONF_SHARED_MEDIA,
+	IPV4_DEVCONF_RP_FILTER,
+	IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE,
+	IPV4_DEVCONF_BOOTP_RELAY,
+	IPV4_DEVCONF_LOG_MARTIANS,
+	IPV4_DEVCONF_TAG,
+	IPV4_DEVCONF_ARPFILTER,
+	IPV4_DEVCONF_MEDIUM_ID,
+	IPV4_DEVCONF_NOXFRM,
+	IPV4_DEVCONF_NOPOLICY,
+	IPV4_DEVCONF_FORCE_IGMP_VERSION,
+	IPV4_DEVCONF_ARP_ANNOUNCE,
+	IPV4_DEVCONF_ARP_IGNORE,
+	IPV4_DEVCONF_PROMOTE_SECONDARIES,
+	IPV4_DEVCONF_ARP_ACCEPT,
+	IPV4_DEVCONF_ARP_NOTIFY,
+	IPV4_DEVCONF_ACCEPT_LOCAL,
+	IPV4_DEVCONF_SRC_VMARK,
+	IPV4_DEVCONF_PROXY_ARP_PVLAN,
+	IPV4_DEVCONF_ROUTE_LOCALNET,
+	__IPV4_DEVCONF_MAX
+};
+
+#define IPV4_DEVCONF_MAX (__IPV4_DEVCONF_MAX - 1)
+
 #endif /* _UAPI_LINUX_IP_H */
diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h
index caed0f3..8137dd8 100644
--- a/include/uapi/linux/nfc.h
+++ b/include/uapi/linux/nfc.h
@@ -69,8 +69,8 @@
  *	starting a poll from a device which has a secure element enabled means
  *	we want to do SE based card emulation.
  * @NFC_CMD_DISABLE_SE: Disable the physical link to a specific secure element.
- * @NFC_CMD_FW_UPLOAD: Request to Load/flash firmware, or event to inform that
- *	some firmware was loaded
+ * @NFC_CMD_FW_DOWNLOAD: Request to Load/flash firmware, or event to inform
+ *	that some firmware was loaded
  */
 enum nfc_commands {
 	NFC_CMD_UNSPEC,
@@ -94,7 +94,7 @@
 	NFC_CMD_DISABLE_SE,
 	NFC_CMD_LLC_SDREQ,
 	NFC_EVENT_LLC_SDRES,
-	NFC_CMD_FW_UPLOAD,
+	NFC_CMD_FW_DOWNLOAD,
 	NFC_EVENT_SE_ADDED,
 	NFC_EVENT_SE_REMOVED,
 /* private: internal use only */
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index dbd71b0..09d62b92 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -73,9 +73,17 @@
 #define TC_H_ROOT	(0xFFFFFFFFU)
 #define TC_H_INGRESS    (0xFFFFFFF1U)
 
+/* Need to corrospond to iproute2 tc/tc_core.h "enum link_layer" */
+enum tc_link_layer {
+	TC_LINKLAYER_UNAWARE, /* Indicate unaware old iproute2 util */
+	TC_LINKLAYER_ETHERNET,
+	TC_LINKLAYER_ATM,
+};
+#define TC_LINKLAYER_MASK 0x0F /* limit use to lower 4 bits */
+
 struct tc_ratespec {
 	unsigned char	cell_log;
-	unsigned char	__reserved;
+	__u8		linklayer; /* lower 4 bits */
 	unsigned short	overhead;
 	short		cell_align;
 	unsigned short	mpu;
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index af0a674..a1356d3 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -253,7 +253,7 @@
 	LINUX_MIB_TCPFASTOPENLISTENOVERFLOW,	/* TCPFastOpenListenOverflow */
 	LINUX_MIB_TCPFASTOPENCOOKIEREQD,	/* TCPFastOpenCookieReqd */
 	LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES, /* TCPSpuriousRtxHostQueues */
-	LINUX_MIB_LOWLATENCYRXPACKETS,		/* LowLatencyRxPackets */
+	LINUX_MIB_BUSYPOLLRXPACKETS,		/* BusyPollRxPackets */
 	__LINUX_MIB_MAX
 };
 
diff --git a/include/uapi/linux/usb/ch11.h b/include/uapi/linux/usb/ch11.h
index 7692dc6..331499d 100644
--- a/include/uapi/linux/usb/ch11.h
+++ b/include/uapi/linux/usb/ch11.h
@@ -11,6 +11,17 @@
 
 #include <linux/types.h>	/* __u8 etc */
 
+/* This is arbitrary.
+ * From USB 2.0 spec Table 11-13, offset 7, a hub can
+ * have up to 255 ports. The most yet reported is 10.
+ *
+ * Current Wireless USB host hardware (Intel i1480 for example) allows
+ * up to 22 devices to connect. Upcoming hardware might raise that
+ * limit. Because the arrays need to add a bit for hub status data, we
+ * use 31, so plus one evens out to four bytes.
+ */
+#define USB_MAXCHILDREN		31
+
 /*
  * Hub request types
  */
diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h
index ffd4652..65e1209 100644
--- a/include/xen/interface/io/blkif.h
+++ b/include/xen/interface/io/blkif.h
@@ -103,12 +103,46 @@
 #define BLKIF_OP_DISCARD           5
 
 /*
+ * Recognized if "feature-max-indirect-segments" in present in the backend
+ * xenbus info. The "feature-max-indirect-segments" node contains the maximum
+ * number of segments allowed by the backend per request. If the node is
+ * present, the frontend might use blkif_request_indirect structs in order to
+ * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The
+ * maximum number of indirect segments is fixed by the backend, but the
+ * frontend can issue requests with any number of indirect segments as long as
+ * it's less than the number provided by the backend. The indirect_grefs field
+ * in blkif_request_indirect should be filled by the frontend with the
+ * grant references of the pages that are holding the indirect segments.
+ * This pages are filled with an array of blkif_request_segment_aligned
+ * that hold the information about the segments. The number of indirect
+ * pages to use is determined by the maximum number of segments
+ * a indirect request contains. Every indirect page can contain a maximum
+ * of 512 segments (PAGE_SIZE/sizeof(blkif_request_segment_aligned)),
+ * so to calculate the number of indirect pages to use we have to do
+ * ceil(indirect_segments/512).
+ *
+ * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not*
+ * create the "feature-max-indirect-segments" node!
+ */
+#define BLKIF_OP_INDIRECT          6
+
+/*
  * Maximum scatter/gather segments per request.
  * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE.
  * NB. This could be 12 if the ring indexes weren't stored in the same page.
  */
 #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
 
+#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8
+
+struct blkif_request_segment_aligned {
+	grant_ref_t gref;        /* reference to I/O buffer frame        */
+	/* @first_sect: first sector in frame to transfer (inclusive).   */
+	/* @last_sect: last sector in frame to transfer (inclusive).     */
+	uint8_t     first_sect, last_sect;
+	uint16_t    _pad; /* padding to make it 8 bytes, so it's cache-aligned */
+} __attribute__((__packed__));
+
 struct blkif_request_rw {
 	uint8_t        nr_segments;  /* number of segments                   */
 	blkif_vdev_t   handle;       /* only for read/write requests         */
@@ -147,12 +181,31 @@
 	uint64_t     id;           /* private guest value, echoed in resp  */
 } __attribute__((__packed__));
 
+struct blkif_request_indirect {
+	uint8_t        indirect_op;
+	uint16_t       nr_segments;
+#ifdef CONFIG_X86_64
+	uint32_t       _pad1;        /* offsetof(blkif_...,u.indirect.id) == 8 */
+#endif
+	uint64_t       id;
+	blkif_sector_t sector_number;
+	blkif_vdev_t   handle;
+	uint16_t       _pad2;
+	grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
+#ifdef CONFIG_X86_64
+	uint32_t      _pad3;         /* make it 64 byte aligned */
+#else
+	uint64_t      _pad3;         /* make it 64 byte aligned */
+#endif
+} __attribute__((__packed__));
+
 struct blkif_request {
 	uint8_t        operation;    /* BLKIF_OP_???                         */
 	union {
 		struct blkif_request_rw rw;
 		struct blkif_request_discard discard;
 		struct blkif_request_other other;
+		struct blkif_request_indirect indirect;
 	} u;
 } __attribute__((__packed__));
 
diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h
index 75271b9..7d28aff 100644
--- a/include/xen/interface/io/ring.h
+++ b/include/xen/interface/io/ring.h
@@ -188,6 +188,11 @@
 #define RING_REQUEST_CONS_OVERFLOW(_r, _cons)				\
     (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r))
 
+/* Ill-behaved frontend determination: Can there be this many requests? */
+#define RING_REQUEST_PROD_OVERFLOW(_r, _prod)               \
+    (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r))
+
+
 #define RING_PUSH_REQUESTS(_r) do {					\
     wmb(); /* back sees requests /before/ updated producer index */	\
     (_r)->sring->req_prod = (_r)->req_prod_pvt;				\
diff --git a/init/Kconfig b/init/Kconfig
index 247084b..fed81b5 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -955,7 +955,7 @@
 	  Memory Resource Controller Swap Extension comes with its price in
 	  a bigger memory consumption. General purpose distribution kernels
 	  which want to enable the feature but keep it disabled by default
-	  and let the user enable it by swapaccount boot command line
+	  and let the user enable it by swapaccount=1 boot command line
 	  parameter should have this option unselected.
 	  For those who want to have the feature enabled by default should
 	  select this option (if, for some reason, they need to disable it
diff --git a/kernel/Makefile b/kernel/Makefile
index 470839d..35ef118 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the linux kernel.
 #
 
-obj-y     = fork.o exec_domain.o panic.o printk.o \
+obj-y     = fork.o exec_domain.o panic.o \
 	    cpu.o exit.o itimer.o time.o softirq.o resource.o \
 	    sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \
 	    signal.o sys.o kmod.o workqueue.o pid.o task_work.o \
@@ -24,6 +24,7 @@
 
 obj-y += sched/
 obj-y += power/
+obj-y += printk/
 obj-y += cpu/
 
 obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0e0b20b..781845a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1845,36 +1845,43 @@
 EXPORT_SYMBOL_GPL(cgroup_path);
 
 /**
- * task_cgroup_path_from_hierarchy - cgroup path of a task on a hierarchy
+ * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
  * @task: target task
- * @hierarchy_id: the hierarchy to look up @task's cgroup from
  * @buf: the buffer to write the path into
  * @buflen: the length of the buffer
  *
- * Determine @task's cgroup on the hierarchy specified by @hierarchy_id and
- * copy its path into @buf.  This function grabs cgroup_mutex and shouldn't
- * be used inside locks used by cgroup controller callbacks.
+ * Determine @task's cgroup on the first (the one with the lowest non-zero
+ * hierarchy_id) cgroup hierarchy and copy its path into @buf.  This
+ * function grabs cgroup_mutex and shouldn't be used inside locks used by
+ * cgroup controller callbacks.
+ *
+ * Returns 0 on success, fails with -%ENAMETOOLONG if @buflen is too short.
  */
-int task_cgroup_path_from_hierarchy(struct task_struct *task, int hierarchy_id,
-				    char *buf, size_t buflen)
+int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
 {
 	struct cgroupfs_root *root;
-	struct cgroup *cgrp = NULL;
-	int ret = -ENOENT;
+	struct cgroup *cgrp;
+	int hierarchy_id = 1, ret = 0;
+
+	if (buflen < 2)
+		return -ENAMETOOLONG;
 
 	mutex_lock(&cgroup_mutex);
 
-	root = idr_find(&cgroup_hierarchy_idr, hierarchy_id);
+	root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
+
 	if (root) {
 		cgrp = task_cgroup_from_root(task, root);
 		ret = cgroup_path(cgrp, buf, buflen);
+	} else {
+		/* if no hierarchy exists, everyone is in "/" */
+		memcpy(buf, "/", 2);
 	}
 
 	mutex_unlock(&cgroup_mutex);
-
 	return ret;
 }
-EXPORT_SYMBOL_GPL(task_cgroup_path_from_hierarchy);
+EXPORT_SYMBOL_GPL(task_cgroup_path);
 
 /*
  * Control Group taskset
@@ -4328,8 +4335,10 @@
 		}
 
 		err = percpu_ref_init(&css->refcnt, css_release);
-		if (err)
+		if (err) {
+			ss->css_free(cgrp);
 			goto err_free_all;
+		}
 
 		init_cgroup_css(css, ss, cgrp);
 
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index e565778..ea1966d 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -475,13 +475,17 @@
 
 	/*
 	 * Cpusets with tasks - existing or newly being attached - can't
-	 * have empty cpus_allowed or mems_allowed.
+	 * be changed to have empty cpus_allowed or mems_allowed.
 	 */
 	ret = -ENOSPC;
-	if ((cgroup_task_count(cur->css.cgroup) || cur->attach_in_progress) &&
-	    (cpumask_empty(trial->cpus_allowed) &&
-	     nodes_empty(trial->mems_allowed)))
-		goto out;
+	if ((cgroup_task_count(cur->css.cgroup) || cur->attach_in_progress)) {
+		if (!cpumask_empty(cur->cpus_allowed) &&
+		    cpumask_empty(trial->cpus_allowed))
+			goto out;
+		if (!nodes_empty(cur->mems_allowed) &&
+		    nodes_empty(trial->mems_allowed))
+			goto out;
+	}
 
 	ret = 0;
 out:
@@ -1608,11 +1612,13 @@
 {
 	struct cpuset *cs = cgroup_cs(cgrp);
 	cpuset_filetype_t type = cft->private;
-	int retval = -ENODEV;
+	int retval = 0;
 
 	mutex_lock(&cpuset_mutex);
-	if (!is_cpuset_online(cs))
+	if (!is_cpuset_online(cs)) {
+		retval = -ENODEV;
 		goto out_unlock;
+	}
 
 	switch (type) {
 	case FILE_CPU_EXCLUSIVE:
diff --git a/kernel/fork.c b/kernel/fork.c
index 403d2bb..e23bb19 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1679,6 +1679,12 @@
 		 int __user *, parent_tidptr,
 		 int __user *, child_tidptr,
 		 int, tls_val)
+#elif defined(CONFIG_CLONE_BACKWARDS3)
+SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp,
+		int, stack_size,
+		int __user *, parent_tidptr,
+		int __user *, child_tidptr,
+		int, tls_val)
 #else
 SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
 		 int __user *, parent_tidptr,
diff --git a/kernel/freezer.c b/kernel/freezer.c
index 8b2afc1..b462fa1 100644
--- a/kernel/freezer.c
+++ b/kernel/freezer.c
@@ -33,7 +33,7 @@
  */
 bool freezing_slow_path(struct task_struct *p)
 {
-	if (p->flags & PF_NOFREEZE)
+	if (p->flags & (PF_NOFREEZE | PF_SUSPEND_TASK))
 		return false;
 
 	if (pm_nosig_freezing || cgroup_freezing(p))
diff --git a/kernel/mutex.c b/kernel/mutex.c
index ff05f4b..a52ee7bb 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -686,7 +686,7 @@
 	might_sleep();
 	ret =  __mutex_lock_common(&lock->base, TASK_UNINTERRUPTIBLE,
 				   0, &ctx->dep_map, _RET_IP_, ctx);
-	if (!ret && ctx->acquired > 0)
+	if (!ret && ctx->acquired > 1)
 		return ww_mutex_deadlock_injection(lock, ctx);
 
 	return ret;
@@ -702,7 +702,7 @@
 	ret = __mutex_lock_common(&lock->base, TASK_INTERRUPTIBLE,
 				  0, &ctx->dep_map, _RET_IP_, ctx);
 
-	if (!ret && ctx->acquired > 0)
+	if (!ret && ctx->acquired > 1)
 		return ww_mutex_deadlock_injection(lock, ctx);
 
 	return ret;
diff --git a/kernel/power/process.c b/kernel/power/process.c
index fc0df84..06ec8869 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -109,6 +109,8 @@
 
 /**
  * freeze_processes - Signal user space processes to enter the refrigerator.
+ * The current thread will not be frozen.  The same process that calls
+ * freeze_processes must later call thaw_processes.
  *
  * On success, returns 0.  On failure, -errno and system is fully thawed.
  */
@@ -120,6 +122,9 @@
 	if (error)
 		return error;
 
+	/* Make sure this task doesn't get frozen */
+	current->flags |= PF_SUSPEND_TASK;
+
 	if (!pm_freezing)
 		atomic_inc(&system_freezing_cnt);
 
@@ -168,6 +173,7 @@
 void thaw_processes(void)
 {
 	struct task_struct *g, *p;
+	struct task_struct *curr = current;
 
 	if (pm_freezing)
 		atomic_dec(&system_freezing_cnt);
@@ -182,10 +188,15 @@
 
 	read_lock(&tasklist_lock);
 	do_each_thread(g, p) {
+		/* No other threads should have PF_SUSPEND_TASK set */
+		WARN_ON((p != curr) && (p->flags & PF_SUSPEND_TASK));
 		__thaw_task(p);
 	} while_each_thread(g, p);
 	read_unlock(&tasklist_lock);
 
+	WARN_ON(!(curr->flags & PF_SUSPEND_TASK));
+	curr->flags &= ~PF_SUSPEND_TASK;
+
 	usermodehelper_enable();
 
 	schedule();
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 06fe285..a394297 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -296,6 +296,17 @@
 }
 EXPORT_SYMBOL_GPL(pm_qos_request_active);
 
+static void __pm_qos_update_request(struct pm_qos_request *req,
+			   s32 new_value)
+{
+	trace_pm_qos_update_request(req->pm_qos_class, new_value);
+
+	if (new_value != req->node.prio)
+		pm_qos_update_target(
+			pm_qos_array[req->pm_qos_class]->constraints,
+			&req->node, PM_QOS_UPDATE_REQ, new_value);
+}
+
 /**
  * pm_qos_work_fn - the timeout handler of pm_qos_update_request_timeout
  * @work: work struct for the delayed work (timeout)
@@ -308,7 +319,7 @@
 						  struct pm_qos_request,
 						  work);
 
-	pm_qos_update_request(req, PM_QOS_DEFAULT_VALUE);
+	__pm_qos_update_request(req, PM_QOS_DEFAULT_VALUE);
 }
 
 /**
@@ -364,12 +375,7 @@
 	}
 
 	cancel_delayed_work_sync(&req->work);
-
-	trace_pm_qos_update_request(req->pm_qos_class, new_value);
-	if (new_value != req->node.prio)
-		pm_qos_update_target(
-			pm_qos_array[req->pm_qos_class]->constraints,
-			&req->node, PM_QOS_UPDATE_REQ, new_value);
+	__pm_qos_update_request(req, new_value);
 }
 EXPORT_SYMBOL_GPL(pm_qos_update_request);
 
diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile
new file mode 100644
index 0000000..85405bd
--- /dev/null
+++ b/kernel/printk/Makefile
@@ -0,0 +1,2 @@
+obj-y	= printk.o
+obj-$(CONFIG_A11Y_BRAILLE_CONSOLE)	+= braille.o
diff --git a/kernel/printk/braille.c b/kernel/printk/braille.c
new file mode 100644
index 0000000..276762f
--- /dev/null
+++ b/kernel/printk/braille.c
@@ -0,0 +1,49 @@
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/console.h>
+#include <linux/string.h>
+
+#include "console_cmdline.h"
+#include "braille.h"
+
+char *_braille_console_setup(char **str, char **brl_options)
+{
+	if (!memcmp(*str, "brl,", 4)) {
+		*brl_options = "";
+		*str += 4;
+	} else if (!memcmp(str, "brl=", 4)) {
+		*brl_options = *str + 4;
+		*str = strchr(*brl_options, ',');
+		if (!*str)
+			pr_err("need port name after brl=\n");
+		else
+			*((*str)++) = 0;
+	} else
+		return NULL;
+
+	return *str;
+}
+
+int
+_braille_register_console(struct console *console, struct console_cmdline *c)
+{
+	int rtn = 0;
+
+	if (c->brl_options) {
+		console->flags |= CON_BRL;
+		rtn = braille_register_console(console, c->index, c->options,
+					       c->brl_options);
+	}
+
+	return rtn;
+}
+
+int
+_braille_unregister_console(struct console *console)
+{
+	if (console->flags & CON_BRL)
+		return braille_unregister_console(console);
+
+	return 0;
+}
diff --git a/kernel/printk/braille.h b/kernel/printk/braille.h
new file mode 100644
index 0000000..769d771
--- /dev/null
+++ b/kernel/printk/braille.h
@@ -0,0 +1,48 @@
+#ifndef _PRINTK_BRAILLE_H
+#define _PRINTK_BRAILLE_H
+
+#ifdef CONFIG_A11Y_BRAILLE_CONSOLE
+
+static inline void
+braille_set_options(struct console_cmdline *c, char *brl_options)
+{
+	c->brl_options = brl_options;
+}
+
+char *
+_braille_console_setup(char **str, char **brl_options);
+
+int
+_braille_register_console(struct console *console, struct console_cmdline *c);
+
+int
+_braille_unregister_console(struct console *console);
+
+#else
+
+static inline void
+braille_set_options(struct console_cmdline *c, char *brl_options)
+{
+}
+
+static inline char *
+_braille_console_setup(char **str, char **brl_options)
+{
+	return NULL;
+}
+
+static inline int
+_braille_register_console(struct console *console, struct console_cmdline *c)
+{
+	return 0;
+}
+
+static inline int
+_braille_unregister_console(struct console *console)
+{
+	return 0;
+}
+
+#endif
+
+#endif
diff --git a/kernel/printk/console_cmdline.h b/kernel/printk/console_cmdline.h
new file mode 100644
index 0000000..cbd69d8
--- /dev/null
+++ b/kernel/printk/console_cmdline.h
@@ -0,0 +1,14 @@
+#ifndef _CONSOLE_CMDLINE_H
+#define _CONSOLE_CMDLINE_H
+
+struct console_cmdline
+{
+	char	name[8];			/* Name of the driver	    */
+	int	index;				/* Minor dev. to use	    */
+	char	*options;			/* Options for the driver   */
+#ifdef CONFIG_A11Y_BRAILLE_CONSOLE
+	char	*brl_options;			/* Options for braille driver */
+#endif
+};
+
+#endif
diff --git a/kernel/printk.c b/kernel/printk/printk.c
similarity index 94%
rename from kernel/printk.c
rename to kernel/printk/printk.c
index 69b0890..5b5a708 100644
--- a/kernel/printk.c
+++ b/kernel/printk/printk.c
@@ -51,6 +51,9 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/printk.h>
 
+#include "console_cmdline.h"
+#include "braille.h"
+
 /* printk's without a loglevel use this.. */
 #define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL
 
@@ -105,19 +108,11 @@
 /*
  *	Array of consoles built from command line options (console=)
  */
-struct console_cmdline
-{
-	char	name[8];			/* Name of the driver	    */
-	int	index;				/* Minor dev. to use	    */
-	char	*options;			/* Options for the driver   */
-#ifdef CONFIG_A11Y_BRAILLE_CONSOLE
-	char	*brl_options;			/* Options for braille driver */
-#endif
-};
 
 #define MAX_CMDLINECONSOLES 8
 
 static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES];
+
 static int selected_console = -1;
 static int preferred_console = -1;
 int console_set_on_cmdline;
@@ -178,7 +173,7 @@
  *         67                           "g"
  *   0032     00 00 00                  padding to next message header
  *
- * The 'struct log' buffer header must never be directly exported to
+ * The 'struct printk_log' buffer header must never be directly exported to
  * userspace, it is a kernel-private implementation detail that might
  * need to be changed in the future, when the requirements change.
  *
@@ -200,7 +195,7 @@
 	LOG_CONT	= 8,	/* text is a fragment of a continuation line */
 };
 
-struct log {
+struct printk_log {
 	u64 ts_nsec;		/* timestamp in nanoseconds */
 	u16 len;		/* length of entire record */
 	u16 text_len;		/* length of text buffer */
@@ -248,7 +243,7 @@
 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
 #define LOG_ALIGN 4
 #else
-#define LOG_ALIGN __alignof__(struct log)
+#define LOG_ALIGN __alignof__(struct printk_log)
 #endif
 #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT)
 static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
@@ -259,35 +254,35 @@
 static volatile unsigned int logbuf_cpu = UINT_MAX;
 
 /* human readable text of the record */
-static char *log_text(const struct log *msg)
+static char *log_text(const struct printk_log *msg)
 {
-	return (char *)msg + sizeof(struct log);
+	return (char *)msg + sizeof(struct printk_log);
 }
 
 /* optional key/value pair dictionary attached to the record */
-static char *log_dict(const struct log *msg)
+static char *log_dict(const struct printk_log *msg)
 {
-	return (char *)msg + sizeof(struct log) + msg->text_len;
+	return (char *)msg + sizeof(struct printk_log) + msg->text_len;
 }
 
 /* get record by index; idx must point to valid msg */
-static struct log *log_from_idx(u32 idx)
+static struct printk_log *log_from_idx(u32 idx)
 {
-	struct log *msg = (struct log *)(log_buf + idx);
+	struct printk_log *msg = (struct printk_log *)(log_buf + idx);
 
 	/*
 	 * A length == 0 record is the end of buffer marker. Wrap around and
 	 * read the message at the start of the buffer.
 	 */
 	if (!msg->len)
-		return (struct log *)log_buf;
+		return (struct printk_log *)log_buf;
 	return msg;
 }
 
 /* get next record; idx must point to valid msg */
 static u32 log_next(u32 idx)
 {
-	struct log *msg = (struct log *)(log_buf + idx);
+	struct printk_log *msg = (struct printk_log *)(log_buf + idx);
 
 	/* length == 0 indicates the end of the buffer; wrap */
 	/*
@@ -296,7 +291,7 @@
 	 * return the one after that.
 	 */
 	if (!msg->len) {
-		msg = (struct log *)log_buf;
+		msg = (struct printk_log *)log_buf;
 		return msg->len;
 	}
 	return idx + msg->len;
@@ -308,11 +303,11 @@
 		      const char *dict, u16 dict_len,
 		      const char *text, u16 text_len)
 {
-	struct log *msg;
+	struct printk_log *msg;
 	u32 size, pad_len;
 
 	/* number of '\0' padding bytes to next message */
-	size = sizeof(struct log) + text_len + dict_len;
+	size = sizeof(struct printk_log) + text_len + dict_len;
 	pad_len = (-size) & (LOG_ALIGN - 1);
 	size += pad_len;
 
@@ -324,7 +319,7 @@
 		else
 			free = log_first_idx - log_next_idx;
 
-		if (free > size + sizeof(struct log))
+		if (free > size + sizeof(struct printk_log))
 			break;
 
 		/* drop old messages until we have enough contiuous space */
@@ -332,18 +327,18 @@
 		log_first_seq++;
 	}
 
-	if (log_next_idx + size + sizeof(struct log) >= log_buf_len) {
+	if (log_next_idx + size + sizeof(struct printk_log) >= log_buf_len) {
 		/*
 		 * This message + an additional empty header does not fit
 		 * at the end of the buffer. Add an empty header with len == 0
 		 * to signify a wrap around.
 		 */
-		memset(log_buf + log_next_idx, 0, sizeof(struct log));
+		memset(log_buf + log_next_idx, 0, sizeof(struct printk_log));
 		log_next_idx = 0;
 	}
 
 	/* fill message */
-	msg = (struct log *)(log_buf + log_next_idx);
+	msg = (struct printk_log *)(log_buf + log_next_idx);
 	memcpy(log_text(msg), text, text_len);
 	msg->text_len = text_len;
 	memcpy(log_dict(msg), dict, dict_len);
@@ -356,7 +351,7 @@
 	else
 		msg->ts_nsec = local_clock();
 	memset(log_dict(msg) + dict_len, 0, pad_len);
-	msg->len = sizeof(struct log) + text_len + dict_len + pad_len;
+	msg->len = sizeof(struct printk_log) + text_len + dict_len + pad_len;
 
 	/* insert message */
 	log_next_idx += msg->len;
@@ -479,7 +474,7 @@
 			    size_t count, loff_t *ppos)
 {
 	struct devkmsg_user *user = file->private_data;
-	struct log *msg;
+	struct printk_log *msg;
 	u64 ts_usec;
 	size_t i;
 	char cont = '-';
@@ -724,14 +719,14 @@
 	VMCOREINFO_SYMBOL(log_first_idx);
 	VMCOREINFO_SYMBOL(log_next_idx);
 	/*
-	 * Export struct log size and field offsets. User space tools can
+	 * Export struct printk_log size and field offsets. User space tools can
 	 * parse it and detect any changes to structure down the line.
 	 */
-	VMCOREINFO_STRUCT_SIZE(log);
-	VMCOREINFO_OFFSET(log, ts_nsec);
-	VMCOREINFO_OFFSET(log, len);
-	VMCOREINFO_OFFSET(log, text_len);
-	VMCOREINFO_OFFSET(log, dict_len);
+	VMCOREINFO_STRUCT_SIZE(printk_log);
+	VMCOREINFO_OFFSET(printk_log, ts_nsec);
+	VMCOREINFO_OFFSET(printk_log, len);
+	VMCOREINFO_OFFSET(printk_log, text_len);
+	VMCOREINFO_OFFSET(printk_log, dict_len);
 }
 #endif
 
@@ -884,7 +879,7 @@
 		       (unsigned long)ts, rem_nsec / 1000);
 }
 
-static size_t print_prefix(const struct log *msg, bool syslog, char *buf)
+static size_t print_prefix(const struct printk_log *msg, bool syslog, char *buf)
 {
 	size_t len = 0;
 	unsigned int prefix = (msg->facility << 3) | msg->level;
@@ -907,7 +902,7 @@
 	return len;
 }
 
-static size_t msg_print_text(const struct log *msg, enum log_flags prev,
+static size_t msg_print_text(const struct printk_log *msg, enum log_flags prev,
 			     bool syslog, char *buf, size_t size)
 {
 	const char *text = log_text(msg);
@@ -969,7 +964,7 @@
 static int syslog_print(char __user *buf, int size)
 {
 	char *text;
-	struct log *msg;
+	struct printk_log *msg;
 	int len = 0;
 
 	text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL);
@@ -1060,7 +1055,7 @@
 		idx = clear_idx;
 		prev = 0;
 		while (seq < log_next_seq) {
-			struct log *msg = log_from_idx(idx);
+			struct printk_log *msg = log_from_idx(idx);
 
 			len += msg_print_text(msg, prev, true, NULL, 0);
 			prev = msg->flags;
@@ -1073,7 +1068,7 @@
 		idx = clear_idx;
 		prev = 0;
 		while (len > size && seq < log_next_seq) {
-			struct log *msg = log_from_idx(idx);
+			struct printk_log *msg = log_from_idx(idx);
 
 			len -= msg_print_text(msg, prev, true, NULL, 0);
 			prev = msg->flags;
@@ -1087,7 +1082,7 @@
 		len = 0;
 		prev = 0;
 		while (len >= 0 && seq < next_seq) {
-			struct log *msg = log_from_idx(idx);
+			struct printk_log *msg = log_from_idx(idx);
 			int textlen;
 
 			textlen = msg_print_text(msg, prev, true, text,
@@ -1233,7 +1228,7 @@
 
 			error = 0;
 			while (seq < log_next_seq) {
-				struct log *msg = log_from_idx(idx);
+				struct printk_log *msg = log_from_idx(idx);
 
 				error += msg_print_text(msg, prev, true, NULL, 0);
 				idx = log_next(idx);
@@ -1719,10 +1714,10 @@
 	u8 level;
 	bool flushed:1;
 } cont;
-static struct log *log_from_idx(u32 idx) { return NULL; }
+static struct printk_log *log_from_idx(u32 idx) { return NULL; }
 static u32 log_next(u32 idx) { return 0; }
 static void call_console_drivers(int level, const char *text, size_t len) {}
-static size_t msg_print_text(const struct log *msg, enum log_flags prev,
+static size_t msg_print_text(const struct printk_log *msg, enum log_flags prev,
 			     bool syslog, char *buf, size_t size) { return 0; }
 static size_t cont_print_text(char *text, size_t size) { return 0; }
 
@@ -1761,23 +1756,23 @@
 	 *	See if this tty is not yet registered, and
 	 *	if we have a slot free.
 	 */
-	for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++)
-		if (strcmp(console_cmdline[i].name, name) == 0 &&
-			  console_cmdline[i].index == idx) {
-				if (!brl_options)
-					selected_console = i;
-				return 0;
+	for (i = 0, c = console_cmdline;
+	     i < MAX_CMDLINECONSOLES && c->name[0];
+	     i++, c++) {
+		if (strcmp(c->name, name) == 0 && c->index == idx) {
+			if (!brl_options)
+				selected_console = i;
+			return 0;
 		}
+	}
 	if (i == MAX_CMDLINECONSOLES)
 		return -E2BIG;
 	if (!brl_options)
 		selected_console = i;
-	c = &console_cmdline[i];
 	strlcpy(c->name, name, sizeof(c->name));
 	c->options = options;
-#ifdef CONFIG_A11Y_BRAILLE_CONSOLE
-	c->brl_options = brl_options;
-#endif
+	braille_set_options(c, brl_options);
+
 	c->index = idx;
 	return 0;
 }
@@ -1790,20 +1785,8 @@
 	char *s, *options, *brl_options = NULL;
 	int idx;
 
-#ifdef CONFIG_A11Y_BRAILLE_CONSOLE
-	if (!memcmp(str, "brl,", 4)) {
-		brl_options = "";
-		str += 4;
-	} else if (!memcmp(str, "brl=", 4)) {
-		brl_options = str + 4;
-		str = strchr(brl_options, ',');
-		if (!str) {
-			printk(KERN_ERR "need port name after brl=\n");
-			return 1;
-		}
-		*(str++) = 0;
-	}
-#endif
+	if (_braille_console_setup(&str, &brl_options))
+		return 1;
 
 	/*
 	 * Decode str into name, index, options.
@@ -1858,15 +1841,15 @@
 	struct console_cmdline *c;
 	int i;
 
-	for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++)
-		if (strcmp(console_cmdline[i].name, name) == 0 &&
-			  console_cmdline[i].index == idx) {
-				c = &console_cmdline[i];
-				strlcpy(c->name, name_new, sizeof(c->name));
-				c->name[sizeof(c->name) - 1] = 0;
-				c->options = options;
-				c->index = idx_new;
-				return i;
+	for (i = 0, c = console_cmdline;
+	     i < MAX_CMDLINECONSOLES && c->name[0];
+	     i++, c++)
+		if (strcmp(c->name, name) == 0 && c->index == idx) {
+			strlcpy(c->name, name_new, sizeof(c->name));
+			c->name[sizeof(c->name) - 1] = 0;
+			c->options = options;
+			c->index = idx_new;
+			return i;
 		}
 	/* not found */
 	return -1;
@@ -2046,7 +2029,7 @@
 	console_cont_flush(text, sizeof(text));
 again:
 	for (;;) {
-		struct log *msg;
+		struct printk_log *msg;
 		size_t len;
 		int level;
 
@@ -2241,6 +2224,7 @@
 	int i;
 	unsigned long flags;
 	struct console *bcon = NULL;
+	struct console_cmdline *c;
 
 	/*
 	 * before we register a new CON_BOOT console, make sure we don't
@@ -2288,30 +2272,25 @@
 	 *	See if this console matches one we selected on
 	 *	the command line.
 	 */
-	for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0];
-			i++) {
-		if (strcmp(console_cmdline[i].name, newcon->name) != 0)
+	for (i = 0, c = console_cmdline;
+	     i < MAX_CMDLINECONSOLES && c->name[0];
+	     i++, c++) {
+		if (strcmp(c->name, newcon->name) != 0)
 			continue;
 		if (newcon->index >= 0 &&
-		    newcon->index != console_cmdline[i].index)
+		    newcon->index != c->index)
 			continue;
 		if (newcon->index < 0)
-			newcon->index = console_cmdline[i].index;
-#ifdef CONFIG_A11Y_BRAILLE_CONSOLE
-		if (console_cmdline[i].brl_options) {
-			newcon->flags |= CON_BRL;
-			braille_register_console(newcon,
-					console_cmdline[i].index,
-					console_cmdline[i].options,
-					console_cmdline[i].brl_options);
+			newcon->index = c->index;
+
+		if (_braille_register_console(newcon, c))
 			return;
-		}
-#endif
+
 		if (newcon->setup &&
 		    newcon->setup(newcon, console_cmdline[i].options) != 0)
 			break;
 		newcon->flags |= CON_ENABLED;
-		newcon->index = console_cmdline[i].index;
+		newcon->index = c->index;
 		if (i == selected_console) {
 			newcon->flags |= CON_CONSDEV;
 			preferred_console = selected_console;
@@ -2394,13 +2373,13 @@
 int unregister_console(struct console *console)
 {
         struct console *a, *b;
-	int res = 1;
+	int res;
 
-#ifdef CONFIG_A11Y_BRAILLE_CONSOLE
-	if (console->flags & CON_BRL)
-		return braille_unregister_console(console);
-#endif
+	res = _braille_unregister_console(console);
+	if (res)
+		return res;
 
+	res = 1;
 	console_lock();
 	if (console_drivers == console) {
 		console_drivers=console->next;
@@ -2666,7 +2645,7 @@
 bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog,
 			       char *line, size_t size, size_t *len)
 {
-	struct log *msg;
+	struct printk_log *msg;
 	size_t l = 0;
 	bool ret = false;
 
@@ -2778,7 +2757,7 @@
 	idx = dumper->cur_idx;
 	prev = 0;
 	while (seq < dumper->next_seq) {
-		struct log *msg = log_from_idx(idx);
+		struct printk_log *msg = log_from_idx(idx);
 
 		l += msg_print_text(msg, prev, true, NULL, 0);
 		idx = log_next(idx);
@@ -2791,7 +2770,7 @@
 	idx = dumper->cur_idx;
 	prev = 0;
 	while (l > size && seq < dumper->next_seq) {
-		struct log *msg = log_from_idx(idx);
+		struct printk_log *msg = log_from_idx(idx);
 
 		l -= msg_print_text(msg, prev, true, NULL, 0);
 		idx = log_next(idx);
@@ -2806,7 +2785,7 @@
 	l = 0;
 	prev = 0;
 	while (seq < dumper->next_seq) {
-		struct log *msg = log_from_idx(idx);
+		struct printk_log *msg = log_from_idx(idx);
 
 		l += msg_print_text(msg, prev, syslog, buf + l, size - l);
 		idx = log_next(idx);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 4041f57..a146ee3 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -469,7 +469,6 @@
 	/* Architecture-specific hardware disable .. */
 	ptrace_disable(child);
 	clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-	flush_ptrace_hw_breakpoint(child);
 
 	write_lock_irq(&tasklist_lock);
 	/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b7c32cb..05c39f0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -933,6 +933,8 @@
 /**
  * task_curr - is this task currently executing on a CPU?
  * @p: the task in question.
+ *
+ * Return: 1 if the task is currently executing. 0 otherwise.
  */
 inline int task_curr(const struct task_struct *p)
 {
@@ -1482,7 +1484,7 @@
  * the simpler "current->state = TASK_RUNNING" to mark yourself
  * runnable without the overhead of this.
  *
- * Returns %true if @p was woken up, %false if it was already running
+ * Return: %true if @p was woken up, %false if it was already running.
  * or @state didn't match @p's state.
  */
 static int
@@ -1491,7 +1493,13 @@
 	unsigned long flags;
 	int cpu, success = 0;
 
-	smp_wmb();
+	/*
+	 * If we are going to wake up a thread waiting for CONDITION we
+	 * need to ensure that CONDITION=1 done by the caller can not be
+	 * reordered with p->state check below. This pairs with mb() in
+	 * set_current_state() the waiting thread does.
+	 */
+	smp_mb__before_spinlock();
 	raw_spin_lock_irqsave(&p->pi_lock, flags);
 	if (!(p->state & state))
 		goto out;
@@ -1577,8 +1585,9 @@
  * @p: The process to be woken up.
  *
  * Attempt to wake up the nominated process and move it to the set of runnable
- * processes.  Returns 1 if the process was woken up, 0 if it was already
- * running.
+ * processes.
+ *
+ * Return: 1 if the process was woken up, 0 if it was already running.
  *
  * It may be assumed that this function implies a write memory barrier before
  * changing the task state if and only if any tasks are woken up.
@@ -2191,6 +2200,8 @@
  * This makes sure that uptime, CFS vruntime, load
  * balancing, etc... continue to move forward, even
  * with a very low granularity.
+ *
+ * Return: Maximum deferment in nanoseconds.
  */
 u64 scheduler_tick_max_deferment(void)
 {
@@ -2394,6 +2405,12 @@
 	if (sched_feat(HRTICK))
 		hrtick_clear(rq);
 
+	/*
+	 * Make sure that signal_pending_state()->signal_pending() below
+	 * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
+	 * done by the caller to avoid the race with signal_wake_up().
+	 */
+	smp_mb__before_spinlock();
 	raw_spin_lock_irq(&rq->lock);
 
 	switch_count = &prev->nivcsw;
@@ -2796,8 +2813,8 @@
  * specified timeout to expire. The timeout is in jiffies. It is not
  * interruptible.
  *
- * The return value is 0 if timed out, and positive (at least 1, or number of
- * jiffies left till timeout) if completed.
+ * Return: 0 if timed out, and positive (at least 1, or number of jiffies left
+ * till timeout) if completed.
  */
 unsigned long __sched
 wait_for_completion_timeout(struct completion *x, unsigned long timeout)
@@ -2829,8 +2846,8 @@
  * specified timeout to expire. The timeout is in jiffies. It is not
  * interruptible. The caller is accounted as waiting for IO.
  *
- * The return value is 0 if timed out, and positive (at least 1, or number of
- * jiffies left till timeout) if completed.
+ * Return: 0 if timed out, and positive (at least 1, or number of jiffies left
+ * till timeout) if completed.
  */
 unsigned long __sched
 wait_for_completion_io_timeout(struct completion *x, unsigned long timeout)
@@ -2846,7 +2863,7 @@
  * This waits for completion of a specific task to be signaled. It is
  * interruptible.
  *
- * The return value is -ERESTARTSYS if interrupted, 0 if completed.
+ * Return: -ERESTARTSYS if interrupted, 0 if completed.
  */
 int __sched wait_for_completion_interruptible(struct completion *x)
 {
@@ -2865,8 +2882,8 @@
  * This waits for either a completion of a specific task to be signaled or for a
  * specified timeout to expire. It is interruptible. The timeout is in jiffies.
  *
- * The return value is -ERESTARTSYS if interrupted, 0 if timed out,
- * positive (at least 1, or number of jiffies left till timeout) if completed.
+ * Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1,
+ * or number of jiffies left till timeout) if completed.
  */
 long __sched
 wait_for_completion_interruptible_timeout(struct completion *x,
@@ -2883,7 +2900,7 @@
  * This waits to be signaled for completion of a specific task. It can be
  * interrupted by a kill signal.
  *
- * The return value is -ERESTARTSYS if interrupted, 0 if completed.
+ * Return: -ERESTARTSYS if interrupted, 0 if completed.
  */
 int __sched wait_for_completion_killable(struct completion *x)
 {
@@ -2903,8 +2920,8 @@
  * signaled or for a specified timeout to expire. It can be
  * interrupted by a kill signal. The timeout is in jiffies.
  *
- * The return value is -ERESTARTSYS if interrupted, 0 if timed out,
- * positive (at least 1, or number of jiffies left till timeout) if completed.
+ * Return: -ERESTARTSYS if interrupted, 0 if timed out, positive (at least 1,
+ * or number of jiffies left till timeout) if completed.
  */
 long __sched
 wait_for_completion_killable_timeout(struct completion *x,
@@ -2918,7 +2935,7 @@
  *	try_wait_for_completion - try to decrement a completion without blocking
  *	@x:	completion structure
  *
- *	Returns: 0 if a decrement cannot be done without blocking
+ *	Return: 0 if a decrement cannot be done without blocking
  *		 1 if a decrement succeeded.
  *
  *	If a completion is being used as a counting completion,
@@ -2945,7 +2962,7 @@
  *	completion_done - Test to see if a completion has any waiters
  *	@x:	completion structure
  *
- *	Returns: 0 if there are waiters (wait_for_completion() in progress)
+ *	Return: 0 if there are waiters (wait_for_completion() in progress)
  *		 1 if there are no waiters.
  *
  */
@@ -3182,7 +3199,7 @@
  * task_prio - return the priority value of a given task.
  * @p: the task in question.
  *
- * This is the priority value as seen by users in /proc.
+ * Return: The priority value as seen by users in /proc.
  * RT tasks are offset by -200. Normal tasks are centered
  * around 0, value goes from -16 to +15.
  */
@@ -3194,6 +3211,8 @@
 /**
  * task_nice - return the nice value of a given task.
  * @p: the task in question.
+ *
+ * Return: The nice value [ -20 ... 0 ... 19 ].
  */
 int task_nice(const struct task_struct *p)
 {
@@ -3204,6 +3223,8 @@
 /**
  * idle_cpu - is a given cpu idle currently?
  * @cpu: the processor in question.
+ *
+ * Return: 1 if the CPU is currently idle. 0 otherwise.
  */
 int idle_cpu(int cpu)
 {
@@ -3226,6 +3247,8 @@
 /**
  * idle_task - return the idle task for a given cpu.
  * @cpu: the processor in question.
+ *
+ * Return: The idle task for the cpu @cpu.
  */
 struct task_struct *idle_task(int cpu)
 {
@@ -3235,6 +3258,8 @@
 /**
  * find_process_by_pid - find a process with a matching PID value.
  * @pid: the pid in question.
+ *
+ * The task of @pid, if found. %NULL otherwise.
  */
 static struct task_struct *find_process_by_pid(pid_t pid)
 {
@@ -3432,6 +3457,8 @@
  * @policy: new policy.
  * @param: structure containing the new RT priority.
  *
+ * Return: 0 on success. An error code otherwise.
+ *
  * NOTE that the task may be already dead.
  */
 int sched_setscheduler(struct task_struct *p, int policy,
@@ -3451,6 +3478,8 @@
  * current context has permission.  For example, this is needed in
  * stop_machine(): we create temporary high priority worker threads,
  * but our caller might not have that capability.
+ *
+ * Return: 0 on success. An error code otherwise.
  */
 int sched_setscheduler_nocheck(struct task_struct *p, int policy,
 			       const struct sched_param *param)
@@ -3485,6 +3514,8 @@
  * @pid: the pid in question.
  * @policy: new policy.
  * @param: structure containing the new RT priority.
+ *
+ * Return: 0 on success. An error code otherwise.
  */
 SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy,
 		struct sched_param __user *, param)
@@ -3500,6 +3531,8 @@
  * sys_sched_setparam - set/change the RT priority of a thread
  * @pid: the pid in question.
  * @param: structure containing the new RT priority.
+ *
+ * Return: 0 on success. An error code otherwise.
  */
 SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)
 {
@@ -3509,6 +3542,9 @@
 /**
  * sys_sched_getscheduler - get the policy (scheduling class) of a thread
  * @pid: the pid in question.
+ *
+ * Return: On success, the policy of the thread. Otherwise, a negative error
+ * code.
  */
 SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
 {
@@ -3535,6 +3571,9 @@
  * sys_sched_getparam - get the RT priority of a thread
  * @pid: the pid in question.
  * @param: structure containing the RT priority.
+ *
+ * Return: On success, 0 and the RT priority is in @param. Otherwise, an error
+ * code.
  */
 SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
 {
@@ -3659,6 +3698,8 @@
  * @pid: pid of the process
  * @len: length in bytes of the bitmask pointed to by user_mask_ptr
  * @user_mask_ptr: user-space pointer to the new cpu mask
+ *
+ * Return: 0 on success. An error code otherwise.
  */
 SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
 		unsigned long __user *, user_mask_ptr)
@@ -3710,6 +3751,8 @@
  * @pid: pid of the process
  * @len: length in bytes of the bitmask pointed to by user_mask_ptr
  * @user_mask_ptr: user-space pointer to hold the current cpu mask
+ *
+ * Return: 0 on success. An error code otherwise.
  */
 SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
 		unsigned long __user *, user_mask_ptr)
@@ -3744,6 +3787,8 @@
  *
  * This function yields the current CPU to other tasks. If there are no
  * other threads running on this CPU then this function will return.
+ *
+ * Return: 0.
  */
 SYSCALL_DEFINE0(sched_yield)
 {
@@ -3869,7 +3914,7 @@
  * It's the caller's job to ensure that the target task struct
  * can't go away on us before we can do any checks.
  *
- * Returns:
+ * Return:
  *	true (>0) if we indeed boosted the target task.
  *	false (0) if we failed to boost the target.
  *	-ESRCH if there's no task to yield to.
@@ -3972,8 +4017,9 @@
  * sys_sched_get_priority_max - return maximum RT priority.
  * @policy: scheduling class.
  *
- * this syscall returns the maximum rt_priority that can be used
- * by a given scheduling class.
+ * Return: On success, this syscall returns the maximum
+ * rt_priority that can be used by a given scheduling class.
+ * On failure, a negative error code is returned.
  */
 SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
 {
@@ -3997,8 +4043,9 @@
  * sys_sched_get_priority_min - return minimum RT priority.
  * @policy: scheduling class.
  *
- * this syscall returns the minimum rt_priority that can be used
- * by a given scheduling class.
+ * Return: On success, this syscall returns the minimum
+ * rt_priority that can be used by a given scheduling class.
+ * On failure, a negative error code is returned.
  */
 SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
 {
@@ -4024,6 +4071,9 @@
  *
  * this syscall writes the default timeslice value of a given process
  * into the user-space timespec buffer. A value of '0' means infinity.
+ *
+ * Return: On success, 0 and the timeslice is in @interval. Otherwise,
+ * an error code.
  */
 SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
 		struct timespec __user *, interval)
@@ -6632,6 +6682,8 @@
  * @cpu: the processor in question.
  *
  * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED!
+ *
+ * Return: The current task for @cpu.
  */
 struct task_struct *curr_task(int cpu)
 {
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
index 1095e87..8b836b3 100644
--- a/kernel/sched/cpupri.c
+++ b/kernel/sched/cpupri.c
@@ -62,7 +62,7 @@
  * any discrepancies created by racing against the uncertainty of the current
  * priority configuration.
  *
- * Returns: (int)bool - CPUs were found
+ * Return: (int)bool - CPUs were found
  */
 int cpupri_find(struct cpupri *cp, struct task_struct *p,
 		struct cpumask *lowest_mask)
@@ -203,7 +203,7 @@
  * cpupri_init - initialize the cpupri structure
  * @cp: The cpupri context
  *
- * Returns: -ENOMEM if memory fails.
+ * Return: -ENOMEM on memory allocation failure.
  */
 int cpupri_init(struct cpupri *cp)
 {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bb456f4..68f1609 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -851,7 +851,7 @@
 {
 	struct task_struct *p = current;
 
-	if (!sched_feat_numa(NUMA))
+	if (!numabalancing_enabled)
 		return;
 
 	/* FIXME: Allocate task-specific structure for placement policy here */
@@ -2032,6 +2032,7 @@
 	 */
 	update_entity_load_avg(curr, 1);
 	update_cfs_rq_blocked_load(cfs_rq, 1);
+	update_cfs_shares(cfs_rq);
 
 #ifdef CONFIG_SCHED_HRTICK
 	/*
@@ -4280,6 +4281,8 @@
  * get_sd_load_idx - Obtain the load index for a given sched domain.
  * @sd: The sched_domain whose load_idx is to be obtained.
  * @idle: The Idle status of the CPU for whose sd load_icx is obtained.
+ *
+ * Return: The load index.
  */
 static inline int get_sd_load_idx(struct sched_domain *sd,
 					enum cpu_idle_type idle)
@@ -4574,6 +4577,9 @@
  *
  * Determine if @sg is a busier group than the previously selected
  * busiest group.
+ *
+ * Return: %true if @sg is a busier group than the previously selected
+ * busiest group. %false otherwise.
  */
 static bool update_sd_pick_busiest(struct lb_env *env,
 				   struct sd_lb_stats *sds,
@@ -4691,7 +4697,7 @@
  * assuming lower CPU number will be equivalent to lower a SMT thread
  * number.
  *
- * Returns 1 when packing is required and a task should be moved to
+ * Return: 1 when packing is required and a task should be moved to
  * this CPU.  The amount of the imbalance is returned in *imbalance.
  *
  * @env: The load balancing environment.
@@ -4869,7 +4875,7 @@
  * @balance: Pointer to a variable indicating if this_cpu
  *	is the appropriate cpu to perform load balancing at this_level.
  *
- * Returns:	- the busiest group if imbalance exists.
+ * Return:	- The busiest group if imbalance exists.
  *		- If no imbalance and user has opted for power-savings balance,
  *		   return the least loaded group whose CPUs can be
  *		   put to idle by rebalancing its tasks onto our group.
@@ -5786,7 +5792,7 @@
 		entity_tick(cfs_rq, se, queued);
 	}
 
-	if (sched_feat_numa(NUMA))
+	if (numabalancing_enabled)
 		task_tick_numa(rq, curr);
 
 	update_rq_runnable_avg(rq, 1);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ac09d98..07f6fc4 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2346,7 +2346,11 @@
 					    int write, void *data)
 {
 	if (write) {
-		*valp = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
+		unsigned long jif = msecs_to_jiffies(*negp ? -*lvalp : *lvalp);
+
+		if (jif > INT_MAX)
+			return 1;
+		*valp = (int)jif;
 	} else {
 		int val = *valp;
 		unsigned long lval;
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index a326f27..0b479a6 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -121,7 +121,7 @@
 	BUG_ON(bits > 32);
 	WARN_ON(!irqs_disabled());
 	read_sched_clock = read;
-	sched_clock_mask = (1 << bits) - 1;
+	sched_clock_mask = (1ULL << bits) - 1;
 	cd.rate = rate;
 
 	/* calculate the mult/shift to convert counter ticks to ns. */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index e80183f..e8a1516 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -182,7 +182,8 @@
 		 * Don't allow the user to think they can get
 		 * full NO_HZ with this machine.
 		 */
-		WARN_ONCE(1, "NO_HZ FULL will not work with unstable sched clock");
+		WARN_ONCE(have_nohz_full_mask,
+			  "NO_HZ FULL will not work with unstable sched clock");
 		return false;
 	}
 #endif
@@ -343,8 +344,6 @@
 
 void __init tick_nohz_init(void)
 {
-	int cpu;
-
 	if (!have_nohz_full_mask) {
 		if (tick_nohz_init_all() < 0)
 			return;
@@ -827,13 +826,10 @@
 {
 	struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
 
-	if (ts->inidle) {
-		/* Cancel the timer because CPU already waken up from the C-states*/
-		menu_hrtimer_cancel();
+	if (ts->inidle)
 		__tick_nohz_idle_enter(ts);
-	} else {
+	else
 		tick_nohz_full_stop_tick(ts);
-	}
 }
 
 /**
@@ -931,8 +927,6 @@
 
 	ts->inidle = 0;
 
-	/* Cancel the timer because CPU already waken up from the C-states*/
-	menu_hrtimer_cancel();
 	if (ts->idle_active || ts->tick_stopped)
 		now = ktime_get();
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 67708f4..a6d098c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1441,12 +1441,22 @@
  * the hashes are freed with call_rcu_sched().
  */
 static int
-ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
+ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
 {
 	struct ftrace_hash *filter_hash;
 	struct ftrace_hash *notrace_hash;
 	int ret;
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+	/*
+	 * There's a small race when adding ops that the ftrace handler
+	 * that wants regs, may be called without them. We can not
+	 * allow that handler to be called if regs is NULL.
+	 */
+	if (regs == NULL && (ops->flags & FTRACE_OPS_FL_SAVE_REGS))
+		return 0;
+#endif
+
 	filter_hash = rcu_dereference_raw_notrace(ops->filter_hash);
 	notrace_hash = rcu_dereference_raw_notrace(ops->notrace_hash);
 
@@ -2159,12 +2169,57 @@
 static unsigned long	ftrace_update_cnt;
 unsigned long		ftrace_update_tot_cnt;
 
-static int ops_traces_mod(struct ftrace_ops *ops)
+static inline int ops_traces_mod(struct ftrace_ops *ops)
 {
-	struct ftrace_hash *hash;
+	/*
+	 * Filter_hash being empty will default to trace module.
+	 * But notrace hash requires a test of individual module functions.
+	 */
+	return ftrace_hash_empty(ops->filter_hash) &&
+		ftrace_hash_empty(ops->notrace_hash);
+}
 
-	hash = ops->filter_hash;
-	return ftrace_hash_empty(hash);
+/*
+ * Check if the current ops references the record.
+ *
+ * If the ops traces all functions, then it was already accounted for.
+ * If the ops does not trace the current record function, skip it.
+ * If the ops ignores the function via notrace filter, skip it.
+ */
+static inline bool
+ops_references_rec(struct ftrace_ops *ops, struct dyn_ftrace *rec)
+{
+	/* If ops isn't enabled, ignore it */
+	if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
+		return 0;
+
+	/* If ops traces all mods, we already accounted for it */
+	if (ops_traces_mod(ops))
+		return 0;
+
+	/* The function must be in the filter */
+	if (!ftrace_hash_empty(ops->filter_hash) &&
+	    !ftrace_lookup_ip(ops->filter_hash, rec->ip))
+		return 0;
+
+	/* If in notrace hash, we ignore it too */
+	if (ftrace_lookup_ip(ops->notrace_hash, rec->ip))
+		return 0;
+
+	return 1;
+}
+
+static int referenced_filters(struct dyn_ftrace *rec)
+{
+	struct ftrace_ops *ops;
+	int cnt = 0;
+
+	for (ops = ftrace_ops_list; ops != &ftrace_list_end; ops = ops->next) {
+		if (ops_references_rec(ops, rec))
+		    cnt++;
+	}
+
+	return cnt;
 }
 
 static int ftrace_update_code(struct module *mod)
@@ -2173,6 +2228,7 @@
 	struct dyn_ftrace *p;
 	cycle_t start, stop;
 	unsigned long ref = 0;
+	bool test = false;
 	int i;
 
 	/*
@@ -2186,9 +2242,12 @@
 
 		for (ops = ftrace_ops_list;
 		     ops != &ftrace_list_end; ops = ops->next) {
-			if (ops->flags & FTRACE_OPS_FL_ENABLED &&
-			    ops_traces_mod(ops))
-				ref++;
+			if (ops->flags & FTRACE_OPS_FL_ENABLED) {
+				if (ops_traces_mod(ops))
+					ref++;
+				else
+					test = true;
+			}
 		}
 	}
 
@@ -2198,12 +2257,16 @@
 	for (pg = ftrace_new_pgs; pg; pg = pg->next) {
 
 		for (i = 0; i < pg->index; i++) {
+			int cnt = ref;
+
 			/* If something went wrong, bail without enabling anything */
 			if (unlikely(ftrace_disabled))
 				return -1;
 
 			p = &pg->records[i];
-			p->flags = ref;
+			if (test)
+				cnt += referenced_filters(p);
+			p->flags = cnt;
 
 			/*
 			 * Do the initial record conversion from mcount jump
@@ -2223,7 +2286,7 @@
 			 * conversion puts the module to the correct state, thus
 			 * passing the ftrace_make_call check.
 			 */
-			if (ftrace_start_up && ref) {
+			if (ftrace_start_up && cnt) {
 				int failed = __ftrace_replace_code(p, 1);
 				if (failed)
 					ftrace_bug(failed, p->ip);
@@ -3374,6 +3437,12 @@
 	return add_hash_entry(hash, ip);
 }
 
+static void ftrace_ops_update_code(struct ftrace_ops *ops)
+{
+	if (ops->flags & FTRACE_OPS_FL_ENABLED && ftrace_enabled)
+		ftrace_run_update_code(FTRACE_UPDATE_CALLS);
+}
+
 static int
 ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len,
 		unsigned long ip, int remove, int reset, int enable)
@@ -3416,9 +3485,8 @@
 
 	mutex_lock(&ftrace_lock);
 	ret = ftrace_hash_move(ops, enable, orig_hash, hash);
-	if (!ret && ops->flags & FTRACE_OPS_FL_ENABLED
-	    && ftrace_enabled)
-		ftrace_run_update_code(FTRACE_UPDATE_CALLS);
+	if (!ret)
+		ftrace_ops_update_code(ops);
 
 	mutex_unlock(&ftrace_lock);
 
@@ -3645,9 +3713,8 @@
 		mutex_lock(&ftrace_lock);
 		ret = ftrace_hash_move(iter->ops, filter_hash,
 				       orig_hash, iter->hash);
-		if (!ret && (iter->ops->flags & FTRACE_OPS_FL_ENABLED)
-		    && ftrace_enabled)
-			ftrace_run_update_code(FTRACE_UPDATE_CALLS);
+		if (!ret)
+			ftrace_ops_update_code(iter->ops);
 
 		mutex_unlock(&ftrace_lock);
 	}
@@ -4218,7 +4285,7 @@
 # define ftrace_shutdown_sysctl()	do { } while (0)
 
 static inline int
-ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
+ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
 {
 	return 1;
 }
@@ -4241,7 +4308,7 @@
 	do_for_each_ftrace_op(op, ftrace_control_list) {
 		if (!(op->flags & FTRACE_OPS_FL_STUB) &&
 		    !ftrace_function_local_disabled(op) &&
-		    ftrace_ops_test(op, ip))
+		    ftrace_ops_test(op, ip, regs))
 			op->func(ip, parent_ip, op, regs);
 	} while_for_each_ftrace_op(op);
 	trace_recursion_clear(TRACE_CONTROL_BIT);
@@ -4274,7 +4341,7 @@
 	 */
 	preempt_disable_notrace();
 	do_for_each_ftrace_op(op, ftrace_ops_list) {
-		if (ftrace_ops_test(op, ip))
+		if (ftrace_ops_test(op, ip, regs))
 			op->func(ip, parent_ip, op, regs);
 	} while_for_each_ftrace_op(op);
 	preempt_enable_notrace();
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index e444ff8..cc2f66f 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -36,11 +36,11 @@
 {
 	int ret;
 
-	ret = trace_seq_printf(s, "# compressed entry header\n");
-	ret = trace_seq_printf(s, "\ttype_len    :    5 bits\n");
-	ret = trace_seq_printf(s, "\ttime_delta  :   27 bits\n");
-	ret = trace_seq_printf(s, "\tarray       :   32 bits\n");
-	ret = trace_seq_printf(s, "\n");
+	ret = trace_seq_puts(s, "# compressed entry header\n");
+	ret = trace_seq_puts(s, "\ttype_len    :    5 bits\n");
+	ret = trace_seq_puts(s, "\ttime_delta  :   27 bits\n");
+	ret = trace_seq_puts(s, "\tarray       :   32 bits\n");
+	ret = trace_seq_putc(s, '\n');
 	ret = trace_seq_printf(s, "\tpadding     : type == %d\n",
 			       RINGBUF_TYPE_PADDING);
 	ret = trace_seq_printf(s, "\ttime_extend : type == %d\n",
@@ -1066,7 +1066,7 @@
 }
 
 /**
- * check_pages - integrity check of buffer pages
+ * rb_check_pages - integrity check of buffer pages
  * @cpu_buffer: CPU buffer with pages to test
  *
  * As a safety measure we check to make sure the data pages have not
@@ -1258,7 +1258,7 @@
 #endif
 
 /**
- * ring_buffer_alloc - allocate a new ring_buffer
+ * __ring_buffer_alloc - allocate a new ring_buffer
  * @size: the size in bytes per cpu that is needed.
  * @flags: attributes to set for the ring buffer.
  *
@@ -1607,6 +1607,7 @@
  * ring_buffer_resize - resize the ring buffer
  * @buffer: the buffer to resize.
  * @size: the new size.
+ * @cpu_id: the cpu buffer to resize
  *
  * Minimum size is 2 * BUF_PAGE_SIZE.
  *
@@ -3956,11 +3957,11 @@
  * expected.
  *
  * After a sequence of ring_buffer_read_prepare calls, the user is
- * expected to make at least one call to ring_buffer_prepare_sync.
+ * expected to make at least one call to ring_buffer_read_prepare_sync.
  * Afterwards, ring_buffer_read_start is invoked to get things going
  * for real.
  *
- * This overall must be paired with ring_buffer_finish.
+ * This overall must be paired with ring_buffer_read_finish.
  */
 struct ring_buffer_iter *
 ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
@@ -4009,7 +4010,7 @@
  * an intervening ring_buffer_read_prepare_sync must have been
  * performed.
  *
- * Must be paired with ring_buffer_finish.
+ * Must be paired with ring_buffer_read_finish.
  */
 void
 ring_buffer_read_start(struct ring_buffer_iter *iter)
@@ -4031,7 +4032,7 @@
 EXPORT_SYMBOL_GPL(ring_buffer_read_start);
 
 /**
- * ring_buffer_finish - finish reading the iterator of the buffer
+ * ring_buffer_read_finish - finish reading the iterator of the buffer
  * @iter: The iterator retrieved by ring_buffer_start
  *
  * This re-enables the recording to the buffer, and frees the
@@ -4346,6 +4347,7 @@
 /**
  * ring_buffer_alloc_read_page - allocate a page to read from buffer
  * @buffer: the buffer to allocate for.
+ * @cpu: the cpu buffer to allocate.
  *
  * This function is used in conjunction with ring_buffer_read_page.
  * When reading a full page from the ring buffer, these functions
@@ -4403,7 +4405,7 @@
  * to swap with a page in the ring buffer.
  *
  * for example:
- *	rpage = ring_buffer_alloc_read_page(buffer);
+ *	rpage = ring_buffer_alloc_read_page(buffer, cpu);
  *	if (!rpage)
  *		return error;
  *	ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0cd500b..496f94d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -243,20 +243,25 @@
 }
 EXPORT_SYMBOL_GPL(filter_current_check_discard);
 
-cycle_t ftrace_now(int cpu)
+cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
 {
 	u64 ts;
 
 	/* Early boot up does not have a buffer yet */
-	if (!global_trace.trace_buffer.buffer)
+	if (!buf->buffer)
 		return trace_clock_local();
 
-	ts = ring_buffer_time_stamp(global_trace.trace_buffer.buffer, cpu);
-	ring_buffer_normalize_time_stamp(global_trace.trace_buffer.buffer, cpu, &ts);
+	ts = ring_buffer_time_stamp(buf->buffer, cpu);
+	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
 
 	return ts;
 }
 
+cycle_t ftrace_now(int cpu)
+{
+	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
+}
+
 /**
  * tracing_is_enabled - Show if global_trace has been disabled
  *
@@ -1211,7 +1216,7 @@
 	/* Make sure all commits have finished */
 	synchronize_sched();
 
-	buf->time_start = ftrace_now(buf->cpu);
+	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
 
 	for_each_online_cpu(cpu)
 		ring_buffer_reset_cpu(buffer, cpu);
@@ -1219,23 +1224,17 @@
 	ring_buffer_record_enable(buffer);
 }
 
-void tracing_reset_current(int cpu)
-{
-	tracing_reset(&global_trace.trace_buffer, cpu);
-}
-
+/* Must have trace_types_lock held */
 void tracing_reset_all_online_cpus(void)
 {
 	struct trace_array *tr;
 
-	mutex_lock(&trace_types_lock);
 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
 		tracing_reset_online_cpus(&tr->trace_buffer);
 #ifdef CONFIG_TRACER_MAX_TRACE
 		tracing_reset_online_cpus(&tr->max_buffer);
 #endif
 	}
-	mutex_unlock(&trace_types_lock);
 }
 
 #define SAVED_CMDLINES 128
@@ -2843,6 +2842,17 @@
 	return 0;
 }
 
+/*
+ * Should be used after trace_array_get(), trace_types_lock
+ * ensures that i_cdev was already initialized.
+ */
+static inline int tracing_get_cpu(struct inode *inode)
+{
+	if (inode->i_cdev) /* See trace_create_cpu_file() */
+		return (long)inode->i_cdev - 1;
+	return RING_BUFFER_ALL_CPUS;
+}
+
 static const struct seq_operations tracer_seq_ops = {
 	.start		= s_start,
 	.next		= s_next,
@@ -2851,9 +2861,9 @@
 };
 
 static struct trace_iterator *
-__tracing_open(struct trace_array *tr, struct trace_cpu *tc,
-	       struct inode *inode, struct file *file, bool snapshot)
+__tracing_open(struct inode *inode, struct file *file, bool snapshot)
 {
+	struct trace_array *tr = inode->i_private;
 	struct trace_iterator *iter;
 	int cpu;
 
@@ -2894,8 +2904,8 @@
 		iter->trace_buffer = &tr->trace_buffer;
 	iter->snapshot = snapshot;
 	iter->pos = -1;
+	iter->cpu_file = tracing_get_cpu(inode);
 	mutex_init(&iter->mutex);
-	iter->cpu_file = tc->cpu;
 
 	/* Notify the tracer early; before we stop tracing. */
 	if (iter->trace && iter->trace->open)
@@ -2971,45 +2981,22 @@
 	filp->private_data = inode->i_private;
 
 	return 0;
-	
-}
-
-static int tracing_open_generic_tc(struct inode *inode, struct file *filp)
-{
-	struct trace_cpu *tc = inode->i_private;
-	struct trace_array *tr = tc->tr;
-
-	if (tracing_disabled)
-		return -ENODEV;
-
-	if (trace_array_get(tr) < 0)
-		return -ENODEV;
-
-	filp->private_data = inode->i_private;
-
-	return 0;
-	
 }
 
 static int tracing_release(struct inode *inode, struct file *file)
 {
+	struct trace_array *tr = inode->i_private;
 	struct seq_file *m = file->private_data;
 	struct trace_iterator *iter;
-	struct trace_array *tr;
 	int cpu;
 
-	/* Writes do not use seq_file, need to grab tr from inode */
 	if (!(file->f_mode & FMODE_READ)) {
-		struct trace_cpu *tc = inode->i_private;
-
-		trace_array_put(tc->tr);
+		trace_array_put(tr);
 		return 0;
 	}
 
+	/* Writes do not use seq_file */
 	iter = m->private;
-	tr = iter->tr;
-	trace_array_put(tr);
-
 	mutex_lock(&trace_types_lock);
 
 	for_each_tracing_cpu(cpu) {
@@ -3023,6 +3010,9 @@
 	if (!iter->snapshot)
 		/* reenable tracing if it was previously enabled */
 		tracing_start_tr(tr);
+
+	__trace_array_put(tr);
+
 	mutex_unlock(&trace_types_lock);
 
 	mutex_destroy(&iter->mutex);
@@ -3042,15 +3032,6 @@
 	return 0;
 }
 
-static int tracing_release_generic_tc(struct inode *inode, struct file *file)
-{
-	struct trace_cpu *tc = inode->i_private;
-	struct trace_array *tr = tc->tr;
-
-	trace_array_put(tr);
-	return 0;
-}
-
 static int tracing_single_release_tr(struct inode *inode, struct file *file)
 {
 	struct trace_array *tr = inode->i_private;
@@ -3062,8 +3043,7 @@
 
 static int tracing_open(struct inode *inode, struct file *file)
 {
-	struct trace_cpu *tc = inode->i_private;
-	struct trace_array *tr = tc->tr;
+	struct trace_array *tr = inode->i_private;
 	struct trace_iterator *iter;
 	int ret = 0;
 
@@ -3071,16 +3051,17 @@
 		return -ENODEV;
 
 	/* If this file was open for write, then erase contents */
-	if ((file->f_mode & FMODE_WRITE) &&
-	    (file->f_flags & O_TRUNC)) {
-		if (tc->cpu == RING_BUFFER_ALL_CPUS)
+	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
+		int cpu = tracing_get_cpu(inode);
+
+		if (cpu == RING_BUFFER_ALL_CPUS)
 			tracing_reset_online_cpus(&tr->trace_buffer);
 		else
-			tracing_reset(&tr->trace_buffer, tc->cpu);
+			tracing_reset(&tr->trace_buffer, cpu);
 	}
 
 	if (file->f_mode & FMODE_READ) {
-		iter = __tracing_open(tr, tc, inode, file, false);
+		iter = __tracing_open(inode, file, false);
 		if (IS_ERR(iter))
 			ret = PTR_ERR(iter);
 		else if (trace_flags & TRACE_ITER_LATENCY_FMT)
@@ -3447,6 +3428,7 @@
 static int tracing_trace_options_open(struct inode *inode, struct file *file)
 {
 	struct trace_array *tr = inode->i_private;
+	int ret;
 
 	if (tracing_disabled)
 		return -ENODEV;
@@ -3454,7 +3436,11 @@
 	if (trace_array_get(tr) < 0)
 		return -ENODEV;
 
-	return single_open(file, tracing_trace_options_show, inode->i_private);
+	ret = single_open(file, tracing_trace_options_show, inode->i_private);
+	if (ret < 0)
+		trace_array_put(tr);
+
+	return ret;
 }
 
 static const struct file_operations tracing_iter_fops = {
@@ -3537,14 +3523,14 @@
 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static snapshot buffer\n"
 	"\t\t\t  Read the contents for more information\n"
 #endif
-#ifdef CONFIG_STACKTRACE
+#ifdef CONFIG_STACK_TRACER
 	"  stack_trace\t\t- Shows the max stack trace when active\n"
 	"  stack_max_size\t- Shows current max stack size that was traced\n"
 	"\t\t\t  Write into this file to reset the max size (trigger a new trace)\n"
 #ifdef CONFIG_DYNAMIC_FTRACE
 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace traces\n"
 #endif
-#endif /* CONFIG_STACKTRACE */
+#endif /* CONFIG_STACK_TRACER */
 ;
 
 static ssize_t
@@ -3941,8 +3927,7 @@
 
 static int tracing_open_pipe(struct inode *inode, struct file *filp)
 {
-	struct trace_cpu *tc = inode->i_private;
-	struct trace_array *tr = tc->tr;
+	struct trace_array *tr = inode->i_private;
 	struct trace_iterator *iter;
 	int ret = 0;
 
@@ -3958,6 +3943,7 @@
 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
 	if (!iter) {
 		ret = -ENOMEM;
+		__trace_array_put(tr);
 		goto out;
 	}
 
@@ -3987,9 +3973,9 @@
 	if (trace_clocks[tr->clock_id].in_ns)
 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
 
-	iter->cpu_file = tc->cpu;
-	iter->tr = tc->tr;
-	iter->trace_buffer = &tc->tr->trace_buffer;
+	iter->tr = tr;
+	iter->trace_buffer = &tr->trace_buffer;
+	iter->cpu_file = tracing_get_cpu(inode);
 	mutex_init(&iter->mutex);
 	filp->private_data = iter;
 
@@ -4012,8 +3998,7 @@
 static int tracing_release_pipe(struct inode *inode, struct file *file)
 {
 	struct trace_iterator *iter = file->private_data;
-	struct trace_cpu *tc = inode->i_private;
-	struct trace_array *tr = tc->tr;
+	struct trace_array *tr = inode->i_private;
 
 	mutex_lock(&trace_types_lock);
 
@@ -4166,6 +4151,7 @@
 	memset(&iter->seq, 0,
 	       sizeof(struct trace_iterator) -
 	       offsetof(struct trace_iterator, seq));
+	cpumask_clear(iter->started);
 	iter->pos = -1;
 
 	trace_event_read_lock();
@@ -4366,15 +4352,16 @@
 tracing_entries_read(struct file *filp, char __user *ubuf,
 		     size_t cnt, loff_t *ppos)
 {
-	struct trace_cpu *tc = filp->private_data;
-	struct trace_array *tr = tc->tr;
+	struct inode *inode = file_inode(filp);
+	struct trace_array *tr = inode->i_private;
+	int cpu = tracing_get_cpu(inode);
 	char buf[64];
 	int r = 0;
 	ssize_t ret;
 
 	mutex_lock(&trace_types_lock);
 
-	if (tc->cpu == RING_BUFFER_ALL_CPUS) {
+	if (cpu == RING_BUFFER_ALL_CPUS) {
 		int cpu, buf_size_same;
 		unsigned long size;
 
@@ -4401,7 +4388,7 @@
 		} else
 			r = sprintf(buf, "X\n");
 	} else
-		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, tc->cpu)->entries >> 10);
+		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
 
 	mutex_unlock(&trace_types_lock);
 
@@ -4413,7 +4400,8 @@
 tracing_entries_write(struct file *filp, const char __user *ubuf,
 		      size_t cnt, loff_t *ppos)
 {
-	struct trace_cpu *tc = filp->private_data;
+	struct inode *inode = file_inode(filp);
+	struct trace_array *tr = inode->i_private;
 	unsigned long val;
 	int ret;
 
@@ -4427,8 +4415,7 @@
 
 	/* value is in KB */
 	val <<= 10;
-
-	ret = tracing_resize_ring_buffer(tc->tr, val, tc->cpu);
+	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
 	if (ret < 0)
 		return ret;
 
@@ -4482,7 +4469,7 @@
 
 	/* disable tracing ? */
 	if (trace_flags & TRACE_ITER_STOP_ON_FREE)
-		tracing_off();
+		tracer_tracing_off(tr);
 	/* resize the ring buffer to 0 */
 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
 
@@ -4647,12 +4634,12 @@
 	 * New clock may not be consistent with the previous clock.
 	 * Reset the buffer so that it doesn't have incomparable timestamps.
 	 */
-	tracing_reset_online_cpus(&global_trace.trace_buffer);
+	tracing_reset_online_cpus(&tr->trace_buffer);
 
 #ifdef CONFIG_TRACER_MAX_TRACE
 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
-	tracing_reset_online_cpus(&global_trace.max_buffer);
+	tracing_reset_online_cpus(&tr->max_buffer);
 #endif
 
 	mutex_unlock(&trace_types_lock);
@@ -4689,8 +4676,7 @@
 #ifdef CONFIG_TRACER_SNAPSHOT
 static int tracing_snapshot_open(struct inode *inode, struct file *file)
 {
-	struct trace_cpu *tc = inode->i_private;
-	struct trace_array *tr = tc->tr;
+	struct trace_array *tr = inode->i_private;
 	struct trace_iterator *iter;
 	struct seq_file *m;
 	int ret = 0;
@@ -4699,26 +4685,29 @@
 		return -ENODEV;
 
 	if (file->f_mode & FMODE_READ) {
-		iter = __tracing_open(tr, tc, inode, file, true);
+		iter = __tracing_open(inode, file, true);
 		if (IS_ERR(iter))
 			ret = PTR_ERR(iter);
 	} else {
 		/* Writes still need the seq_file to hold the private data */
+		ret = -ENOMEM;
 		m = kzalloc(sizeof(*m), GFP_KERNEL);
 		if (!m)
-			return -ENOMEM;
+			goto out;
 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
 		if (!iter) {
 			kfree(m);
-			return -ENOMEM;
+			goto out;
 		}
+		ret = 0;
+
 		iter->tr = tr;
-		iter->trace_buffer = &tc->tr->max_buffer;
-		iter->cpu_file = tc->cpu;
+		iter->trace_buffer = &tr->max_buffer;
+		iter->cpu_file = tracing_get_cpu(inode);
 		m->private = iter;
 		file->private_data = m;
 	}
-
+out:
 	if (ret < 0)
 		trace_array_put(tr);
 
@@ -4873,11 +4862,11 @@
 };
 
 static const struct file_operations tracing_entries_fops = {
-	.open		= tracing_open_generic_tc,
+	.open		= tracing_open_generic_tr,
 	.read		= tracing_entries_read,
 	.write		= tracing_entries_write,
 	.llseek		= generic_file_llseek,
-	.release	= tracing_release_generic_tc,
+	.release	= tracing_release_generic_tr,
 };
 
 static const struct file_operations tracing_total_entries_fops = {
@@ -4929,8 +4918,7 @@
 
 static int tracing_buffers_open(struct inode *inode, struct file *filp)
 {
-	struct trace_cpu *tc = inode->i_private;
-	struct trace_array *tr = tc->tr;
+	struct trace_array *tr = inode->i_private;
 	struct ftrace_buffer_info *info;
 	int ret;
 
@@ -4948,10 +4936,8 @@
 
 	mutex_lock(&trace_types_lock);
 
-	tr->ref++;
-
 	info->iter.tr		= tr;
-	info->iter.cpu_file	= tc->cpu;
+	info->iter.cpu_file	= tracing_get_cpu(inode);
 	info->iter.trace	= tr->current_trace;
 	info->iter.trace_buffer = &tr->trace_buffer;
 	info->spare		= NULL;
@@ -5268,14 +5254,14 @@
 tracing_stats_read(struct file *filp, char __user *ubuf,
 		   size_t count, loff_t *ppos)
 {
-	struct trace_cpu *tc = filp->private_data;
-	struct trace_array *tr = tc->tr;
+	struct inode *inode = file_inode(filp);
+	struct trace_array *tr = inode->i_private;
 	struct trace_buffer *trace_buf = &tr->trace_buffer;
+	int cpu = tracing_get_cpu(inode);
 	struct trace_seq *s;
 	unsigned long cnt;
 	unsigned long long t;
 	unsigned long usec_rem;
-	int cpu = tc->cpu;
 
 	s = kmalloc(sizeof(*s), GFP_KERNEL);
 	if (!s)
@@ -5328,9 +5314,10 @@
 }
 
 static const struct file_operations tracing_stats_fops = {
-	.open		= tracing_open_generic,
+	.open		= tracing_open_generic_tr,
 	.read		= tracing_stats_read,
 	.llseek		= generic_file_llseek,
+	.release	= tracing_release_generic_tr,
 };
 
 #ifdef CONFIG_DYNAMIC_FTRACE
@@ -5519,10 +5506,20 @@
 	return tr->percpu_dir;
 }
 
+static struct dentry *
+trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
+		      void *data, long cpu, const struct file_operations *fops)
+{
+	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
+
+	if (ret) /* See tracing_get_cpu() */
+		ret->d_inode->i_cdev = (void *)(cpu + 1);
+	return ret;
+}
+
 static void
 tracing_init_debugfs_percpu(struct trace_array *tr, long cpu)
 {
-	struct trace_array_cpu *data = per_cpu_ptr(tr->trace_buffer.data, cpu);
 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
 	struct dentry *d_cpu;
 	char cpu_dir[30]; /* 30 characters should be more than enough */
@@ -5538,28 +5535,28 @@
 	}
 
 	/* per cpu trace_pipe */
-	trace_create_file("trace_pipe", 0444, d_cpu,
-			(void *)&data->trace_cpu, &tracing_pipe_fops);
+	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
+				tr, cpu, &tracing_pipe_fops);
 
 	/* per cpu trace */
-	trace_create_file("trace", 0644, d_cpu,
-			(void *)&data->trace_cpu, &tracing_fops);
+	trace_create_cpu_file("trace", 0644, d_cpu,
+				tr, cpu, &tracing_fops);
 
-	trace_create_file("trace_pipe_raw", 0444, d_cpu,
-			(void *)&data->trace_cpu, &tracing_buffers_fops);
+	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
+				tr, cpu, &tracing_buffers_fops);
 
-	trace_create_file("stats", 0444, d_cpu,
-			(void *)&data->trace_cpu, &tracing_stats_fops);
+	trace_create_cpu_file("stats", 0444, d_cpu,
+				tr, cpu, &tracing_stats_fops);
 
-	trace_create_file("buffer_size_kb", 0444, d_cpu,
-			(void *)&data->trace_cpu, &tracing_entries_fops);
+	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
+				tr, cpu, &tracing_entries_fops);
 
 #ifdef CONFIG_TRACER_SNAPSHOT
-	trace_create_file("snapshot", 0644, d_cpu,
-			  (void *)&data->trace_cpu, &snapshot_fops);
+	trace_create_cpu_file("snapshot", 0644, d_cpu,
+				tr, cpu, &snapshot_fops);
 
-	trace_create_file("snapshot_raw", 0444, d_cpu,
-			(void *)&data->trace_cpu, &snapshot_raw_fops);
+	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
+				tr, cpu, &snapshot_raw_fops);
 #endif
 }
 
@@ -5868,17 +5865,6 @@
 static void
 init_tracer_debugfs(struct trace_array *tr, struct dentry *d_tracer);
 
-static void init_trace_buffers(struct trace_array *tr, struct trace_buffer *buf)
-{
-	int cpu;
-
-	for_each_tracing_cpu(cpu) {
-		memset(per_cpu_ptr(buf->data, cpu), 0, sizeof(struct trace_array_cpu));
-		per_cpu_ptr(buf->data, cpu)->trace_cpu.cpu = cpu;
-		per_cpu_ptr(buf->data, cpu)->trace_cpu.tr = tr;
-	}
-}
-
 static int
 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
 {
@@ -5896,8 +5882,6 @@
 		return -ENOMEM;
 	}
 
-	init_trace_buffers(tr, buf);
-
 	/* Allocate the first page for all buffers */
 	set_buffer_entries(&tr->trace_buffer,
 			   ring_buffer_size(tr->trace_buffer.buffer, 0));
@@ -5964,17 +5948,15 @@
 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
 		goto out_free_tr;
 
-	/* Holder for file callbacks */
-	tr->trace_cpu.cpu = RING_BUFFER_ALL_CPUS;
-	tr->trace_cpu.tr = tr;
-
 	tr->dir = debugfs_create_dir(name, trace_instance_dir);
 	if (!tr->dir)
 		goto out_free_tr;
 
 	ret = event_trace_add_tracer(tr->dir, tr);
-	if (ret)
+	if (ret) {
+		debugfs_remove_recursive(tr->dir);
 		goto out_free_tr;
+	}
 
 	init_tracer_debugfs(tr, tr->dir);
 
@@ -6120,13 +6102,13 @@
 			  tr, &tracing_iter_fops);
 
 	trace_create_file("trace", 0644, d_tracer,
-			(void *)&tr->trace_cpu, &tracing_fops);
+			  tr, &tracing_fops);
 
 	trace_create_file("trace_pipe", 0444, d_tracer,
-			(void *)&tr->trace_cpu, &tracing_pipe_fops);
+			  tr, &tracing_pipe_fops);
 
 	trace_create_file("buffer_size_kb", 0644, d_tracer,
-			(void *)&tr->trace_cpu, &tracing_entries_fops);
+			  tr, &tracing_entries_fops);
 
 	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
 			  tr, &tracing_total_entries_fops);
@@ -6141,11 +6123,11 @@
 			  &trace_clock_fops);
 
 	trace_create_file("tracing_on", 0644, d_tracer,
-			    tr, &rb_simple_fops);
+			  tr, &rb_simple_fops);
 
 #ifdef CONFIG_TRACER_SNAPSHOT
 	trace_create_file("snapshot", 0644, d_tracer,
-			  (void *)&tr->trace_cpu, &snapshot_fops);
+			  tr, &snapshot_fops);
 #endif
 
 	for_each_tracing_cpu(cpu)
@@ -6439,10 +6421,6 @@
 
 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
 
-	/* Holder for file callbacks */
-	global_trace.trace_cpu.cpu = RING_BUFFER_ALL_CPUS;
-	global_trace.trace_cpu.tr = &global_trace;
-
 	INIT_LIST_HEAD(&global_trace.systems);
 	INIT_LIST_HEAD(&global_trace.events);
 	list_add(&global_trace.list, &ftrace_trace_arrays);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4a4f6e1..afaae41 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -130,19 +130,12 @@
 
 struct trace_array;
 
-struct trace_cpu {
-	struct trace_array	*tr;
-	struct dentry		*dir;
-	int			cpu;
-};
-
 /*
  * The CPU trace array - it consists of thousands of trace entries
  * plus some other descriptor data: (for example which task started
  * the trace, etc.)
  */
 struct trace_array_cpu {
-	struct trace_cpu	trace_cpu;
 	atomic_t		disabled;
 	void			*buffer_page;	/* ring buffer spare */
 
@@ -196,7 +189,6 @@
 	bool			allocated_snapshot;
 #endif
 	int			buffer_disabled;
-	struct trace_cpu	trace_cpu;	/* place holder */
 #ifdef CONFIG_FTRACE_SYSCALLS
 	int			sys_refcount_enter;
 	int			sys_refcount_exit;
@@ -214,7 +206,6 @@
 	struct dentry		*event_dir;
 	struct list_head	systems;
 	struct list_head	events;
-	struct task_struct	*waiter;
 	int			ref;
 };
 
@@ -680,6 +671,15 @@
 					       struct trace_array *tr);
 extern int trace_selftest_startup_branch(struct tracer *trace,
 					 struct trace_array *tr);
+/*
+ * Tracer data references selftest functions that only occur
+ * on boot up. These can be __init functions. Thus, when selftests
+ * are enabled, then the tracers need to reference __init functions.
+ */
+#define __tracer_data		__refdata
+#else
+/* Tracers are seldom changed. Optimize when selftests are disabled. */
+#define __tracer_data		__read_mostly
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
 
 extern void *head_page(struct trace_array_cpu *data);
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 84b1e04..80c36bc 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -236,6 +236,10 @@
 
 	BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
 
+	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
+			"perf buffer not large enough"))
+		return NULL;
+
 	pc = preempt_count();
 
 	*rctxp = perf_swevent_get_recursion_context();
@@ -266,6 +270,10 @@
 	struct pt_regs regs;
 	int rctx;
 
+	head = this_cpu_ptr(event_function.perf_events);
+	if (hlist_empty(head))
+		return;
+
 #define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \
 		    sizeof(u64)) - sizeof(u32))
 
@@ -279,8 +287,6 @@
 
 	entry->ip = ip;
 	entry->parent_ip = parent_ip;
-
-	head = this_cpu_ptr(event_function.perf_events);
 	perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0,
 			      1, &regs, head, NULL);
 
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 7d85429..29a7ebc 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -409,33 +409,42 @@
 	mutex_unlock(&event_mutex);
 }
 
-/*
- * Open and update trace_array ref count.
- * Must have the current trace_array passed to it.
- */
-static int tracing_open_generic_file(struct inode *inode, struct file *filp)
+static void remove_subsystem(struct ftrace_subsystem_dir *dir)
 {
-	struct ftrace_event_file *file = inode->i_private;
-	struct trace_array *tr = file->tr;
-	int ret;
+	if (!dir)
+		return;
 
-	if (trace_array_get(tr) < 0)
-		return -ENODEV;
-
-	ret = tracing_open_generic(inode, filp);
-	if (ret < 0)
-		trace_array_put(tr);
-	return ret;
+	if (!--dir->nr_events) {
+		debugfs_remove_recursive(dir->entry);
+		list_del(&dir->list);
+		__put_system_dir(dir);
+	}
 }
 
-static int tracing_release_generic_file(struct inode *inode, struct file *filp)
+static void *event_file_data(struct file *filp)
 {
-	struct ftrace_event_file *file = inode->i_private;
-	struct trace_array *tr = file->tr;
+	return ACCESS_ONCE(file_inode(filp)->i_private);
+}
 
-	trace_array_put(tr);
+static void remove_event_file_dir(struct ftrace_event_file *file)
+{
+	struct dentry *dir = file->dir;
+	struct dentry *child;
 
-	return 0;
+	if (dir) {
+		spin_lock(&dir->d_lock);	/* probably unneeded */
+		list_for_each_entry(child, &dir->d_subdirs, d_u.d_child) {
+			if (child->d_inode)	/* probably unneeded */
+				child->d_inode->i_private = NULL;
+		}
+		spin_unlock(&dir->d_lock);
+
+		debugfs_remove_recursive(dir);
+	}
+
+	list_del(&file->list);
+	remove_subsystem(file->system);
+	kmem_cache_free(file_cachep, file);
 }
 
 /*
@@ -679,15 +688,25 @@
 event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
 		  loff_t *ppos)
 {
-	struct ftrace_event_file *file = filp->private_data;
+	struct ftrace_event_file *file;
+	unsigned long flags;
 	char buf[4] = "0";
 
-	if (file->flags & FTRACE_EVENT_FL_ENABLED &&
-	    !(file->flags & FTRACE_EVENT_FL_SOFT_DISABLED))
+	mutex_lock(&event_mutex);
+	file = event_file_data(filp);
+	if (likely(file))
+		flags = file->flags;
+	mutex_unlock(&event_mutex);
+
+	if (!file)
+		return -ENODEV;
+
+	if (flags & FTRACE_EVENT_FL_ENABLED &&
+	    !(flags & FTRACE_EVENT_FL_SOFT_DISABLED))
 		strcpy(buf, "1");
 
-	if (file->flags & FTRACE_EVENT_FL_SOFT_DISABLED ||
-	    file->flags & FTRACE_EVENT_FL_SOFT_MODE)
+	if (flags & FTRACE_EVENT_FL_SOFT_DISABLED ||
+	    flags & FTRACE_EVENT_FL_SOFT_MODE)
 		strcat(buf, "*");
 
 	strcat(buf, "\n");
@@ -699,13 +718,10 @@
 event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
 		   loff_t *ppos)
 {
-	struct ftrace_event_file *file = filp->private_data;
+	struct ftrace_event_file *file;
 	unsigned long val;
 	int ret;
 
-	if (!file)
-		return -EINVAL;
-
 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
 	if (ret)
 		return ret;
@@ -717,8 +733,11 @@
 	switch (val) {
 	case 0:
 	case 1:
+		ret = -ENODEV;
 		mutex_lock(&event_mutex);
-		ret = ftrace_event_enable_disable(file, val);
+		file = event_file_data(filp);
+		if (likely(file))
+			ret = ftrace_event_enable_disable(file, val);
 		mutex_unlock(&event_mutex);
 		break;
 
@@ -825,65 +844,39 @@
 
 static void *f_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	struct ftrace_event_call *call = m->private;
-	struct ftrace_event_field *field;
+	struct ftrace_event_call *call = event_file_data(m->private);
 	struct list_head *common_head = &ftrace_common_fields;
 	struct list_head *head = trace_get_fields(call);
+	struct list_head *node = v;
 
 	(*pos)++;
 
 	switch ((unsigned long)v) {
 	case FORMAT_HEADER:
-		if (unlikely(list_empty(common_head)))
-			return NULL;
-
-		field = list_entry(common_head->prev,
-				   struct ftrace_event_field, link);
-		return field;
+		node = common_head;
+		break;
 
 	case FORMAT_FIELD_SEPERATOR:
-		if (unlikely(list_empty(head)))
-			return NULL;
-
-		field = list_entry(head->prev, struct ftrace_event_field, link);
-		return field;
+		node = head;
+		break;
 
 	case FORMAT_PRINTFMT:
 		/* all done */
 		return NULL;
 	}
 
-	field = v;
-	if (field->link.prev == common_head)
+	node = node->prev;
+	if (node == common_head)
 		return (void *)FORMAT_FIELD_SEPERATOR;
-	else if (field->link.prev == head)
+	else if (node == head)
 		return (void *)FORMAT_PRINTFMT;
-
-	field = list_entry(field->link.prev, struct ftrace_event_field, link);
-
-	return field;
-}
-
-static void *f_start(struct seq_file *m, loff_t *pos)
-{
-	loff_t l = 0;
-	void *p;
-
-	/* Start by showing the header */
-	if (!*pos)
-		return (void *)FORMAT_HEADER;
-
-	p = (void *)FORMAT_HEADER;
-	do {
-		p = f_next(m, p, &l);
-	} while (p && l < *pos);
-
-	return p;
+	else
+		return node;
 }
 
 static int f_show(struct seq_file *m, void *v)
 {
-	struct ftrace_event_call *call = m->private;
+	struct ftrace_event_call *call = event_file_data(m->private);
 	struct ftrace_event_field *field;
 	const char *array_descriptor;
 
@@ -904,8 +897,7 @@
 		return 0;
 	}
 
-	field = v;
-
+	field = list_entry(v, struct ftrace_event_field, link);
 	/*
 	 * Smartly shows the array type(except dynamic array).
 	 * Normal:
@@ -932,8 +924,25 @@
 	return 0;
 }
 
+static void *f_start(struct seq_file *m, loff_t *pos)
+{
+	void *p = (void *)FORMAT_HEADER;
+	loff_t l = 0;
+
+	/* ->stop() is called even if ->start() fails */
+	mutex_lock(&event_mutex);
+	if (!event_file_data(m->private))
+		return ERR_PTR(-ENODEV);
+
+	while (l < *pos && p)
+		p = f_next(m, p, &l);
+
+	return p;
+}
+
 static void f_stop(struct seq_file *m, void *p)
 {
+	mutex_unlock(&event_mutex);
 }
 
 static const struct seq_operations trace_format_seq_ops = {
@@ -945,7 +954,6 @@
 
 static int trace_format_open(struct inode *inode, struct file *file)
 {
-	struct ftrace_event_call *call = inode->i_private;
 	struct seq_file *m;
 	int ret;
 
@@ -954,7 +962,7 @@
 		return ret;
 
 	m = file->private_data;
-	m->private = call;
+	m->private = file;
 
 	return 0;
 }
@@ -962,45 +970,47 @@
 static ssize_t
 event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
 {
-	struct ftrace_event_call *call = filp->private_data;
-	struct trace_seq *s;
-	int r;
+	int id = (long)event_file_data(filp);
+	char buf[32];
+	int len;
 
 	if (*ppos)
 		return 0;
 
-	s = kmalloc(sizeof(*s), GFP_KERNEL);
-	if (!s)
-		return -ENOMEM;
+	if (unlikely(!id))
+		return -ENODEV;
 
-	trace_seq_init(s);
-	trace_seq_printf(s, "%d\n", call->event.type);
+	len = sprintf(buf, "%d\n", id);
 
-	r = simple_read_from_buffer(ubuf, cnt, ppos,
-				    s->buffer, s->len);
-	kfree(s);
-	return r;
+	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
 }
 
 static ssize_t
 event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
 		  loff_t *ppos)
 {
-	struct ftrace_event_call *call = filp->private_data;
+	struct ftrace_event_call *call;
 	struct trace_seq *s;
-	int r;
+	int r = -ENODEV;
 
 	if (*ppos)
 		return 0;
 
 	s = kmalloc(sizeof(*s), GFP_KERNEL);
+
 	if (!s)
 		return -ENOMEM;
 
 	trace_seq_init(s);
 
-	print_event_filter(call, s);
-	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
+	mutex_lock(&event_mutex);
+	call = event_file_data(filp);
+	if (call)
+		print_event_filter(call, s);
+	mutex_unlock(&event_mutex);
+
+	if (call)
+		r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
 
 	kfree(s);
 
@@ -1011,9 +1021,9 @@
 event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
 		   loff_t *ppos)
 {
-	struct ftrace_event_call *call = filp->private_data;
+	struct ftrace_event_call *call;
 	char *buf;
-	int err;
+	int err = -ENODEV;
 
 	if (cnt >= PAGE_SIZE)
 		return -EINVAL;
@@ -1028,7 +1038,12 @@
 	}
 	buf[cnt] = '\0';
 
-	err = apply_event_filter(call, buf);
+	mutex_lock(&event_mutex);
+	call = event_file_data(filp);
+	if (call)
+		err = apply_event_filter(call, buf);
+	mutex_unlock(&event_mutex);
+
 	free_page((unsigned long) buf);
 	if (err < 0)
 		return err;
@@ -1218,6 +1233,7 @@
 
 static int ftrace_event_avail_open(struct inode *inode, struct file *file);
 static int ftrace_event_set_open(struct inode *inode, struct file *file);
+static int ftrace_event_release(struct inode *inode, struct file *file);
 
 static const struct seq_operations show_event_seq_ops = {
 	.start = t_start,
@@ -1245,14 +1261,13 @@
 	.read = seq_read,
 	.write = ftrace_event_write,
 	.llseek = seq_lseek,
-	.release = seq_release,
+	.release = ftrace_event_release,
 };
 
 static const struct file_operations ftrace_enable_fops = {
-	.open = tracing_open_generic_file,
+	.open = tracing_open_generic,
 	.read = event_enable_read,
 	.write = event_enable_write,
-	.release = tracing_release_generic_file,
 	.llseek = default_llseek,
 };
 
@@ -1264,7 +1279,6 @@
 };
 
 static const struct file_operations ftrace_event_id_fops = {
-	.open = tracing_open_generic,
 	.read = event_id_read,
 	.llseek = default_llseek,
 };
@@ -1323,6 +1337,15 @@
 	return ret;
 }
 
+static int ftrace_event_release(struct inode *inode, struct file *file)
+{
+	struct trace_array *tr = inode->i_private;
+
+	trace_array_put(tr);
+
+	return seq_release(inode, file);
+}
+
 static int
 ftrace_event_avail_open(struct inode *inode, struct file *file)
 {
@@ -1336,12 +1359,19 @@
 {
 	const struct seq_operations *seq_ops = &show_set_event_seq_ops;
 	struct trace_array *tr = inode->i_private;
+	int ret;
+
+	if (trace_array_get(tr) < 0)
+		return -ENODEV;
 
 	if ((file->f_mode & FMODE_WRITE) &&
 	    (file->f_flags & O_TRUNC))
 		ftrace_clear_events(tr);
 
-	return ftrace_event_open(inode, file, seq_ops);
+	ret = ftrace_event_open(inode, file, seq_ops);
+	if (ret < 0)
+		trace_array_put(tr);
+	return ret;
 }
 
 static struct event_subsystem *
@@ -1496,8 +1526,8 @@
 
 #ifdef CONFIG_PERF_EVENTS
 	if (call->event.type && call->class->reg)
-		trace_create_file("id", 0444, file->dir, call,
-		 		  id);
+		trace_create_file("id", 0444, file->dir,
+				  (void *)(long)call->event.type, id);
 #endif
 
 	/*
@@ -1522,33 +1552,16 @@
 	return 0;
 }
 
-static void remove_subsystem(struct ftrace_subsystem_dir *dir)
-{
-	if (!dir)
-		return;
-
-	if (!--dir->nr_events) {
-		debugfs_remove_recursive(dir->entry);
-		list_del(&dir->list);
-		__put_system_dir(dir);
-	}
-}
-
 static void remove_event_from_tracers(struct ftrace_event_call *call)
 {
 	struct ftrace_event_file *file;
 	struct trace_array *tr;
 
 	do_for_each_event_file_safe(tr, file) {
-
 		if (file->event_call != call)
 			continue;
 
-		list_del(&file->list);
-		debugfs_remove_recursive(file->dir);
-		remove_subsystem(file->system);
-		kmem_cache_free(file_cachep, file);
-
+		remove_event_file_dir(file);
 		/*
 		 * The do_for_each_event_file_safe() is
 		 * a double loop. After finding the call for this
@@ -1700,16 +1713,53 @@
 	destroy_preds(call);
 }
 
-/* Remove an event_call */
-void trace_remove_event_call(struct ftrace_event_call *call)
+static int probe_remove_event_call(struct ftrace_event_call *call)
 {
+	struct trace_array *tr;
+	struct ftrace_event_file *file;
+
+#ifdef CONFIG_PERF_EVENTS
+	if (call->perf_refcount)
+		return -EBUSY;
+#endif
+	do_for_each_event_file(tr, file) {
+		if (file->event_call != call)
+			continue;
+		/*
+		 * We can't rely on ftrace_event_enable_disable(enable => 0)
+		 * we are going to do, FTRACE_EVENT_FL_SOFT_MODE can suppress
+		 * TRACE_REG_UNREGISTER.
+		 */
+		if (file->flags & FTRACE_EVENT_FL_ENABLED)
+			return -EBUSY;
+		/*
+		 * The do_for_each_event_file_safe() is
+		 * a double loop. After finding the call for this
+		 * trace_array, we use break to jump to the next
+		 * trace_array.
+		 */
+		break;
+	} while_for_each_event_file();
+
+	__trace_remove_event_call(call);
+
+	return 0;
+}
+
+/* Remove an event_call */
+int trace_remove_event_call(struct ftrace_event_call *call)
+{
+	int ret;
+
 	mutex_lock(&trace_types_lock);
 	mutex_lock(&event_mutex);
 	down_write(&trace_event_sem);
-	__trace_remove_event_call(call);
+	ret = probe_remove_event_call(call);
 	up_write(&trace_event_sem);
 	mutex_unlock(&event_mutex);
 	mutex_unlock(&trace_types_lock);
+
+	return ret;
 }
 
 #define for_each_event(event, start, end)			\
@@ -2278,12 +2328,8 @@
 {
 	struct ftrace_event_file *file, *next;
 
-	list_for_each_entry_safe(file, next, &tr->events, list) {
-		list_del(&file->list);
-		debugfs_remove_recursive(file->dir);
-		remove_subsystem(file->system);
-		kmem_cache_free(file_cachep, file);
-	}
+	list_for_each_entry_safe(file, next, &tr->events, list)
+		remove_event_file_dir(file);
 }
 
 static void
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 0d883dc..97daa8c 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -637,17 +637,15 @@
 	free_page((unsigned long) buf);
 }
 
+/* caller must hold event_mutex */
 void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
 {
-	struct event_filter *filter;
+	struct event_filter *filter = call->filter;
 
-	mutex_lock(&event_mutex);
-	filter = call->filter;
 	if (filter && filter->filter_string)
 		trace_seq_printf(s, "%s\n", filter->filter_string);
 	else
-		trace_seq_printf(s, "none\n");
-	mutex_unlock(&event_mutex);
+		trace_seq_puts(s, "none\n");
 }
 
 void print_subsystem_event_filter(struct event_subsystem *system,
@@ -660,7 +658,7 @@
 	if (filter && filter->filter_string)
 		trace_seq_printf(s, "%s\n", filter->filter_string);
 	else
-		trace_seq_printf(s, DEFAULT_SYS_FILTER_MESSAGE "\n");
+		trace_seq_puts(s, DEFAULT_SYS_FILTER_MESSAGE "\n");
 	mutex_unlock(&event_mutex);
 }
 
@@ -1841,23 +1839,22 @@
 	return err;
 }
 
+/* caller must hold event_mutex */
 int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
 {
 	struct event_filter *filter;
-	int err = 0;
-
-	mutex_lock(&event_mutex);
+	int err;
 
 	if (!strcmp(strstrip(filter_string), "0")) {
 		filter_disable(call);
 		filter = call->filter;
 		if (!filter)
-			goto out_unlock;
+			return 0;
 		RCU_INIT_POINTER(call->filter, NULL);
 		/* Make sure the filter is not being used */
 		synchronize_sched();
 		__free_filter(filter);
-		goto out_unlock;
+		return 0;
 	}
 
 	err = create_filter(call, filter_string, true, &filter);
@@ -1884,8 +1881,6 @@
 			__free_filter(tmp);
 		}
 	}
-out_unlock:
-	mutex_unlock(&event_mutex);
 
 	return err;
 }
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index b863f93..38fe148 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -199,7 +199,7 @@
 	return 0;
 }
 
-static struct tracer function_trace __read_mostly =
+static struct tracer function_trace __tracer_data =
 {
 	.name		= "function",
 	.init		= function_trace_init,
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 8388bc9..b5c0924 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -446,7 +446,7 @@
 
 	/* First spaces to align center */
 	for (i = 0; i < spaces / 2; i++) {
-		ret = trace_seq_printf(s, " ");
+		ret = trace_seq_putc(s, ' ');
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
 	}
@@ -457,7 +457,7 @@
 
 	/* Last spaces to align center */
 	for (i = 0; i < spaces - (spaces / 2); i++) {
-		ret = trace_seq_printf(s, " ");
+		ret = trace_seq_putc(s, ' ');
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
 	}
@@ -503,7 +503,7 @@
  ------------------------------------------
 
  */
-	ret = trace_seq_printf(s,
+	ret = trace_seq_puts(s,
 		" ------------------------------------------\n");
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
@@ -516,7 +516,7 @@
 	if (ret == TRACE_TYPE_PARTIAL_LINE)
 		return TRACE_TYPE_PARTIAL_LINE;
 
-	ret = trace_seq_printf(s, " => ");
+	ret = trace_seq_puts(s, " => ");
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
@@ -524,7 +524,7 @@
 	if (ret == TRACE_TYPE_PARTIAL_LINE)
 		return TRACE_TYPE_PARTIAL_LINE;
 
-	ret = trace_seq_printf(s,
+	ret = trace_seq_puts(s,
 		"\n ------------------------------------------\n\n");
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
@@ -645,7 +645,7 @@
 			ret = print_graph_proc(s, pid);
 			if (ret == TRACE_TYPE_PARTIAL_LINE)
 				return TRACE_TYPE_PARTIAL_LINE;
-			ret = trace_seq_printf(s, " | ");
+			ret = trace_seq_puts(s, " | ");
 			if (!ret)
 				return TRACE_TYPE_PARTIAL_LINE;
 		}
@@ -657,9 +657,9 @@
 		return ret;
 
 	if (type == TRACE_GRAPH_ENT)
-		ret = trace_seq_printf(s, "==========>");
+		ret = trace_seq_puts(s, "==========>");
 	else
-		ret = trace_seq_printf(s, "<==========");
+		ret = trace_seq_puts(s, "<==========");
 
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
@@ -668,7 +668,7 @@
 	if (ret != TRACE_TYPE_HANDLED)
 		return ret;
 
-	ret = trace_seq_printf(s, "\n");
+	ret = trace_seq_putc(s, '\n');
 
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
@@ -705,13 +705,13 @@
 		len += strlen(nsecs_str);
 	}
 
-	ret = trace_seq_printf(s, " us ");
+	ret = trace_seq_puts(s, " us ");
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
 	/* Print remaining spaces to fit the row's width */
 	for (i = len; i < 7; i++) {
-		ret = trace_seq_printf(s, " ");
+		ret = trace_seq_putc(s, ' ');
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
 	}
@@ -731,13 +731,13 @@
 	/* No real adata, just filling the column with spaces */
 	switch (duration) {
 	case DURATION_FILL_FULL:
-		ret = trace_seq_printf(s, "              |  ");
+		ret = trace_seq_puts(s, "              |  ");
 		return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
 	case DURATION_FILL_START:
-		ret = trace_seq_printf(s, "  ");
+		ret = trace_seq_puts(s, "  ");
 		return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
 	case DURATION_FILL_END:
-		ret = trace_seq_printf(s, " |");
+		ret = trace_seq_puts(s, " |");
 		return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
 	}
 
@@ -745,10 +745,10 @@
 	if (flags & TRACE_GRAPH_PRINT_OVERHEAD) {
 		/* Duration exceeded 100 msecs */
 		if (duration > 100000ULL)
-			ret = trace_seq_printf(s, "! ");
+			ret = trace_seq_puts(s, "! ");
 		/* Duration exceeded 10 msecs */
 		else if (duration > 10000ULL)
-			ret = trace_seq_printf(s, "+ ");
+			ret = trace_seq_puts(s, "+ ");
 	}
 
 	/*
@@ -757,7 +757,7 @@
 	 * to fill out the space.
 	 */
 	if (ret == -1)
-		ret = trace_seq_printf(s, "  ");
+		ret = trace_seq_puts(s, "  ");
 
 	/* Catching here any failure happenned above */
 	if (!ret)
@@ -767,7 +767,7 @@
 	if (ret != TRACE_TYPE_HANDLED)
 		return ret;
 
-	ret = trace_seq_printf(s, "|  ");
+	ret = trace_seq_puts(s, "|  ");
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
@@ -817,7 +817,7 @@
 
 	/* Function */
 	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
-		ret = trace_seq_printf(s, " ");
+		ret = trace_seq_putc(s, ' ');
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
 	}
@@ -858,7 +858,7 @@
 
 	/* Function */
 	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
-		ret = trace_seq_printf(s, " ");
+		ret = trace_seq_putc(s, ' ');
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
 	}
@@ -917,7 +917,7 @@
 		if (ret == TRACE_TYPE_PARTIAL_LINE)
 			return TRACE_TYPE_PARTIAL_LINE;
 
-		ret = trace_seq_printf(s, " | ");
+		ret = trace_seq_puts(s, " | ");
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
 	}
@@ -1117,7 +1117,7 @@
 
 	/* Closing brace */
 	for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
-		ret = trace_seq_printf(s, " ");
+		ret = trace_seq_putc(s, ' ');
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
 	}
@@ -1129,7 +1129,7 @@
 	 * belongs to, write out the function name.
 	 */
 	if (func_match) {
-		ret = trace_seq_printf(s, "}\n");
+		ret = trace_seq_puts(s, "}\n");
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
 	} else {
@@ -1179,13 +1179,13 @@
 	/* Indentation */
 	if (depth > 0)
 		for (i = 0; i < (depth + 1) * TRACE_GRAPH_INDENT; i++) {
-			ret = trace_seq_printf(s, " ");
+			ret = trace_seq_putc(s, ' ');
 			if (!ret)
 				return TRACE_TYPE_PARTIAL_LINE;
 		}
 
 	/* The comment */
-	ret = trace_seq_printf(s, "/* ");
+	ret = trace_seq_puts(s, "/* ");
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
@@ -1216,7 +1216,7 @@
 		s->len--;
 	}
 
-	ret = trace_seq_printf(s, " */\n");
+	ret = trace_seq_puts(s, " */\n");
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
@@ -1448,7 +1448,7 @@
 	.funcs		= &graph_functions
 };
 
-static struct tracer graph_trace __read_mostly = {
+static struct tracer graph_trace __tracer_data = {
 	.name		= "function_graph",
 	.open		= graph_trace_open,
 	.pipe_open	= graph_trace_open,
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 7ed6976..243f683 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -95,7 +95,7 @@
 }
 
 static int register_probe_event(struct trace_probe *tp);
-static void unregister_probe_event(struct trace_probe *tp);
+static int unregister_probe_event(struct trace_probe *tp);
 
 static DEFINE_MUTEX(probe_lock);
 static LIST_HEAD(probe_list);
@@ -243,11 +243,11 @@
 static int
 disable_trace_probe(struct trace_probe *tp, struct ftrace_event_file *file)
 {
+	struct event_file_link *link = NULL;
+	int wait = 0;
 	int ret = 0;
 
 	if (file) {
-		struct event_file_link *link;
-
 		link = find_event_file_link(tp, file);
 		if (!link) {
 			ret = -EINVAL;
@@ -255,10 +255,7 @@
 		}
 
 		list_del_rcu(&link->list);
-		/* synchronize with kprobe_trace_func/kretprobe_trace_func */
-		synchronize_sched();
-		kfree(link);
-
+		wait = 1;
 		if (!list_empty(&tp->files))
 			goto out;
 
@@ -271,8 +268,22 @@
 			disable_kretprobe(&tp->rp);
 		else
 			disable_kprobe(&tp->rp.kp);
+		wait = 1;
 	}
  out:
+	if (wait) {
+		/*
+		 * Synchronize with kprobe_trace_func/kretprobe_trace_func
+		 * to ensure disabled (all running handlers are finished).
+		 * This is not only for kfree(), but also the caller,
+		 * trace_remove_event_call() supposes it for releasing
+		 * event_call related objects, which will be accessed in
+		 * the kprobe_trace_func/kretprobe_trace_func.
+		 */
+		synchronize_sched();
+		kfree(link);	/* Ignored if link == NULL */
+	}
+
 	return ret;
 }
 
@@ -340,9 +351,12 @@
 	if (trace_probe_is_enabled(tp))
 		return -EBUSY;
 
+	/* Will fail if probe is being used by ftrace or perf */
+	if (unregister_probe_event(tp))
+		return -EBUSY;
+
 	__unregister_trace_probe(tp);
 	list_del(&tp->list);
-	unregister_probe_event(tp);
 
 	return 0;
 }
@@ -621,7 +635,9 @@
 	/* TODO: Use batch unregistration */
 	while (!list_empty(&probe_list)) {
 		tp = list_entry(probe_list.next, struct trace_probe, list);
-		unregister_trace_probe(tp);
+		ret = unregister_trace_probe(tp);
+		if (ret)
+			goto end;
 		free_trace_probe(tp);
 	}
 
@@ -1087,9 +1103,6 @@
 	__size = sizeof(*entry) + tp->size + dsize;
 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
-	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
-		     "profile buffer not large enough"))
-		return;
 
 	entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
 	if (!entry)
@@ -1120,9 +1133,6 @@
 	__size = sizeof(*entry) + tp->size + dsize;
 	size = ALIGN(__size + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
-	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
-		     "profile buffer not large enough"))
-		return;
 
 	entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
 	if (!entry)
@@ -1242,11 +1252,15 @@
 	return ret;
 }
 
-static void unregister_probe_event(struct trace_probe *tp)
+static int unregister_probe_event(struct trace_probe *tp)
 {
+	int ret;
+
 	/* tp->event is unregistered in trace_remove_event_call() */
-	trace_remove_event_call(&tp->call);
-	kfree(tp->call.print_fmt);
+	ret = trace_remove_event_call(&tp->call);
+	if (!ret)
+		kfree(tp->call.print_fmt);
+	return ret;
 }
 
 /* Make a debugfs interface for controlling probe points */
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index a5e8f48..b3dcfb2 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -90,7 +90,7 @@
 	if (drv)
 		ret += trace_seq_printf(s, " %s\n", drv->name);
 	else
-		ret += trace_seq_printf(s, " \n");
+		ret += trace_seq_puts(s, " \n");
 	return ret;
 }
 
@@ -107,7 +107,7 @@
 	struct header_iter *hiter;
 	struct trace_seq *s = &iter->seq;
 
-	trace_seq_printf(s, "VERSION 20070824\n");
+	trace_seq_puts(s, "VERSION 20070824\n");
 
 	hiter = kzalloc(sizeof(*hiter), GFP_KERNEL);
 	if (!hiter)
@@ -209,7 +209,7 @@
 			(rw->value >> 0) & 0xff, rw->pc, 0);
 		break;
 	default:
-		ret = trace_seq_printf(s, "rw what?\n");
+		ret = trace_seq_puts(s, "rw what?\n");
 		break;
 	}
 	if (ret)
@@ -245,7 +245,7 @@
 			secs, usec_rem, m->map_id, 0UL, 0);
 		break;
 	default:
-		ret = trace_seq_printf(s, "map what?\n");
+		ret = trace_seq_puts(s, "map what?\n");
 		break;
 	}
 	if (ret)
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index bb922d9..34e7cba 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -78,7 +78,7 @@
 
 	trace_assign_type(field, entry);
 
-	ret = trace_seq_printf(s, "%s", field->buf);
+	ret = trace_seq_puts(s, field->buf);
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
@@ -558,14 +558,14 @@
 			if (ret)
 				ret = trace_seq_puts(s, "??");
 			if (ret)
-				ret = trace_seq_puts(s, "\n");
+				ret = trace_seq_putc(s, '\n');
 			continue;
 		}
 		if (!ret)
 			break;
 		if (ret)
 			ret = seq_print_user_ip(s, mm, ip, sym_flags);
-		ret = trace_seq_puts(s, "\n");
+		ret = trace_seq_putc(s, '\n');
 	}
 
 	if (mm)
@@ -579,7 +579,7 @@
 	int ret;
 
 	if (!ip)
-		return trace_seq_printf(s, "0");
+		return trace_seq_putc(s, '0');
 
 	if (sym_flags & TRACE_ITER_SYM_OFFSET)
 		ret = seq_print_sym_offset(s, "%s", ip);
@@ -964,14 +964,14 @@
 		goto partial;
 
 	if ((flags & TRACE_ITER_PRINT_PARENT) && field->parent_ip) {
-		if (!trace_seq_printf(s, " <-"))
+		if (!trace_seq_puts(s, " <-"))
 			goto partial;
 		if (!seq_print_ip_sym(s,
 				      field->parent_ip,
 				      flags))
 			goto partial;
 	}
-	if (!trace_seq_printf(s, "\n"))
+	if (!trace_seq_putc(s, '\n'))
 		goto partial;
 
 	return TRACE_TYPE_HANDLED;
@@ -1210,7 +1210,7 @@
 
 		if (!seq_print_ip_sym(s, *p, flags))
 			goto partial;
-		if (!trace_seq_puts(s, "\n"))
+		if (!trace_seq_putc(s, '\n'))
 			goto partial;
 	}
 
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 322e164..8fd0365 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -175,7 +175,7 @@
 	entry = syscall_nr_to_meta(syscall);
 
 	if (!entry) {
-		trace_seq_printf(s, "\n");
+		trace_seq_putc(s, '\n');
 		return TRACE_TYPE_HANDLED;
 	}
 
@@ -566,15 +566,15 @@
 	if (!sys_data)
 		return;
 
+	head = this_cpu_ptr(sys_data->enter_event->perf_events);
+	if (hlist_empty(head))
+		return;
+
 	/* get the size after alignment with the u32 buffer size field */
 	size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
 	size = ALIGN(size + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
 
-	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
-		      "perf buffer not large enough"))
-		return;
-
 	rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
 				sys_data->enter_event->event.type, regs, &rctx);
 	if (!rec)
@@ -583,8 +583,6 @@
 	rec->nr = syscall_nr;
 	syscall_get_arguments(current, regs, 0, sys_data->nb_args,
 			       (unsigned long *)&rec->args);
-
-	head = this_cpu_ptr(sys_data->enter_event->perf_events);
 	perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
 }
 
@@ -642,18 +640,14 @@
 	if (!sys_data)
 		return;
 
+	head = this_cpu_ptr(sys_data->exit_event->perf_events);
+	if (hlist_empty(head))
+		return;
+
 	/* We can probably do that at build time */
 	size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
 	size -= sizeof(u32);
 
-	/*
-	 * Impossible, but be paranoid with the future
-	 * How to put this check outside runtime?
-	 */
-	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
-		"exit event has grown above perf buffer size"))
-		return;
-
 	rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
 				sys_data->exit_event->event.type, regs, &rctx);
 	if (!rec)
@@ -661,8 +655,6 @@
 
 	rec->nr = syscall_nr;
 	rec->ret = syscall_get_return_value(current, regs);
-
-	head = this_cpu_ptr(sys_data->exit_event->perf_events);
 	perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head, NULL);
 }
 
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index d5d0cd3..272261b 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -70,7 +70,7 @@
 	(sizeof(struct probe_arg) * (n)))
 
 static int register_uprobe_event(struct trace_uprobe *tu);
-static void unregister_uprobe_event(struct trace_uprobe *tu);
+static int unregister_uprobe_event(struct trace_uprobe *tu);
 
 static DEFINE_MUTEX(uprobe_lock);
 static LIST_HEAD(uprobe_list);
@@ -164,11 +164,17 @@
 }
 
 /* Unregister a trace_uprobe and probe_event: call with locking uprobe_lock */
-static void unregister_trace_uprobe(struct trace_uprobe *tu)
+static int unregister_trace_uprobe(struct trace_uprobe *tu)
 {
+	int ret;
+
+	ret = unregister_uprobe_event(tu);
+	if (ret)
+		return ret;
+
 	list_del(&tu->list);
-	unregister_uprobe_event(tu);
 	free_trace_uprobe(tu);
+	return 0;
 }
 
 /* Register a trace_uprobe and probe_event */
@@ -181,9 +187,12 @@
 
 	/* register as an event */
 	old_tp = find_probe_event(tu->call.name, tu->call.class->system);
-	if (old_tp)
+	if (old_tp) {
 		/* delete old event */
-		unregister_trace_uprobe(old_tp);
+		ret = unregister_trace_uprobe(old_tp);
+		if (ret)
+			goto end;
+	}
 
 	ret = register_uprobe_event(tu);
 	if (ret) {
@@ -256,6 +265,8 @@
 		group = UPROBE_EVENT_SYSTEM;
 
 	if (is_delete) {
+		int ret;
+
 		if (!event) {
 			pr_info("Delete command needs an event name.\n");
 			return -EINVAL;
@@ -269,9 +280,9 @@
 			return -ENOENT;
 		}
 		/* delete an event */
-		unregister_trace_uprobe(tu);
+		ret = unregister_trace_uprobe(tu);
 		mutex_unlock(&uprobe_lock);
-		return 0;
+		return ret;
 	}
 
 	if (argc < 2) {
@@ -408,16 +419,20 @@
 	return ret;
 }
 
-static void cleanup_all_probes(void)
+static int cleanup_all_probes(void)
 {
 	struct trace_uprobe *tu;
+	int ret = 0;
 
 	mutex_lock(&uprobe_lock);
 	while (!list_empty(&uprobe_list)) {
 		tu = list_entry(uprobe_list.next, struct trace_uprobe, list);
-		unregister_trace_uprobe(tu);
+		ret = unregister_trace_uprobe(tu);
+		if (ret)
+			break;
 	}
 	mutex_unlock(&uprobe_lock);
+	return ret;
 }
 
 /* Probes listing interfaces */
@@ -462,8 +477,13 @@
 
 static int probes_open(struct inode *inode, struct file *file)
 {
-	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
-		cleanup_all_probes();
+	int ret;
+
+	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
+		ret = cleanup_all_probes();
+		if (ret)
+			return ret;
+	}
 
 	return seq_open(file, &probes_seq_op);
 }
@@ -818,8 +838,6 @@
 
 	size = SIZEOF_TRACE_ENTRY(is_ret_probe(tu));
 	size = ALIGN(size + tu->size + sizeof(u32), sizeof(u64)) - sizeof(u32);
-	if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
-		return;
 
 	preempt_disable();
 	head = this_cpu_ptr(call->perf_events);
@@ -970,12 +988,17 @@
 	return ret;
 }
 
-static void unregister_uprobe_event(struct trace_uprobe *tu)
+static int unregister_uprobe_event(struct trace_uprobe *tu)
 {
+	int ret;
+
 	/* tu->event is unregistered in trace_remove_event_call() */
-	trace_remove_event_call(&tu->call);
+	ret = trace_remove_event_call(&tu->call);
+	if (ret)
+		return ret;
 	kfree(tu->call.print_fmt);
 	tu->call.print_fmt = NULL;
+	return 0;
 }
 
 /* Make a trace interface for controling probe points */
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index d8c30db..9064b91 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -62,6 +62,9 @@
 	kgid_t group = new->egid;
 	int ret;
 
+	if (parent_ns->level > 32)
+		return -EUSERS;
+
 	/*
 	 * Verify that we can not violate the policy of which files
 	 * may be accessed that is specified by the root directory,
@@ -92,6 +95,7 @@
 	atomic_set(&ns->count, 1);
 	/* Leave the new->user_ns reference with the new user namespace. */
 	ns->parent = parent_ns;
+	ns->level = parent_ns->level + 1;
 	ns->owner = owner;
 	ns->group = group;
 
@@ -105,16 +109,21 @@
 int unshare_userns(unsigned long unshare_flags, struct cred **new_cred)
 {
 	struct cred *cred;
+	int err = -ENOMEM;
 
 	if (!(unshare_flags & CLONE_NEWUSER))
 		return 0;
 
 	cred = prepare_creds();
-	if (!cred)
-		return -ENOMEM;
+	if (cred) {
+		err = create_user_ns(cred);
+		if (err)
+			put_cred(cred);
+		else
+			*new_cred = cred;
+	}
 
-	*new_cred = cred;
-	return create_user_ns(cred);
+	return err;
 }
 
 void free_user_ns(struct user_namespace *ns)
diff --git a/kernel/wait.c b/kernel/wait.c
index ce0daa3..d550920 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -333,7 +333,8 @@
 		prepare_to_wait(wq, &q->wait, mode);
 		val = q->key.flags;
 		if (atomic_read(val) == 0)
-			ret = (*action)(val);
+			break;
+		ret = (*action)(val);
 	} while (!ret && atomic_read(val) != 0);
 	finish_wait(wq, &q->wait);
 	return ret;
@@ -362,8 +363,7 @@
 
 /**
  * wake_up_atomic_t - Wake up a waiter on a atomic_t
- * @word: The word being waited on, a kernel virtual address
- * @bit: The bit of the word being waited on
+ * @p: The atomic_t being waited on, a kernel virtual address
  *
  * Wake up anyone waiting for the atomic_t to go to zero.
  *
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 0b72e81..7f5d4be 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2817,6 +2817,19 @@
 	return false;
 }
 
+static bool __flush_work(struct work_struct *work)
+{
+	struct wq_barrier barr;
+
+	if (start_flush_work(work, &barr)) {
+		wait_for_completion(&barr.done);
+		destroy_work_on_stack(&barr.work);
+		return true;
+	} else {
+		return false;
+	}
+}
+
 /**
  * flush_work - wait for a work to finish executing the last queueing instance
  * @work: the work to flush
@@ -2830,18 +2843,10 @@
  */
 bool flush_work(struct work_struct *work)
 {
-	struct wq_barrier barr;
-
 	lock_map_acquire(&work->lockdep_map);
 	lock_map_release(&work->lockdep_map);
 
-	if (start_flush_work(work, &barr)) {
-		wait_for_completion(&barr.done);
-		destroy_work_on_stack(&barr.work);
-		return true;
-	} else {
-		return false;
-	}
+	return __flush_work(work);
 }
 EXPORT_SYMBOL_GPL(flush_work);
 
@@ -3411,6 +3416,12 @@
 {
 	to->nice = from->nice;
 	cpumask_copy(to->cpumask, from->cpumask);
+	/*
+	 * Unlike hash and equality test, this function doesn't ignore
+	 * ->no_numa as it is used for both pool and wq attrs.  Instead,
+	 * get_unbound_pool() explicitly clears ->no_numa after copying.
+	 */
+	to->no_numa = from->no_numa;
 }
 
 /* hash value of the content of @attr */
@@ -3578,6 +3589,12 @@
 	lockdep_set_subclass(&pool->lock, 1);	/* see put_pwq() */
 	copy_workqueue_attrs(pool->attrs, attrs);
 
+	/*
+	 * no_numa isn't a worker_pool attribute, always clear it.  See
+	 * 'struct workqueue_attrs' comments for detail.
+	 */
+	pool->attrs->no_numa = false;
+
 	/* if cpumask is contained inside a NUMA node, we belong to that node */
 	if (wq_numa_enabled) {
 		for_each_node(node) {
@@ -4756,7 +4773,14 @@
 
 	INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
 	schedule_work_on(cpu, &wfc.work);
-	flush_work(&wfc.work);
+
+	/*
+	 * The work item is on-stack and can't lead to deadlock through
+	 * flushing.  Use __flush_work() to avoid spurious lockdep warnings
+	 * when work_on_cpu()s are nested.
+	 */
+	__flush_work(&wfc.work);
+
 	return wfc.ret;
 }
 EXPORT_SYMBOL_GPL(work_on_cpu);
diff --git a/lib/Kconfig b/lib/Kconfig
index 35da513..71d9f81 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -66,8 +66,6 @@
 
 config CRC_T10DIF
 	tristate "CRC calculation for the T10 Data Integrity Field"
-	select CRYPTO
-	select CRYPTO_CRCT10DIF
 	help
 	  This option is only needed if a module that's not in the
 	  kernel tree needs to calculate CRC checks for use with the
diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c
index fe3428c..fbbd66e 100644
--- a/lib/crc-t10dif.c
+++ b/lib/crc-t10dif.c
@@ -11,44 +11,57 @@
 #include <linux/types.h>
 #include <linux/module.h>
 #include <linux/crc-t10dif.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <crypto/hash.h>
 
-static struct crypto_shash *crct10dif_tfm;
+/* Table generated using the following polynomium:
+ * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1
+ * gt: 0x8bb7
+ */
+static const __u16 t10_dif_crc_table[256] = {
+	0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
+	0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
+	0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
+	0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
+	0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
+	0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
+	0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
+	0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
+	0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
+	0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
+	0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
+	0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
+	0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
+	0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
+	0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
+	0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
+	0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
+	0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
+	0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
+	0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
+	0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
+	0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
+	0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
+	0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
+	0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
+	0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
+	0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
+	0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
+	0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
+	0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
+	0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
+	0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
+};
 
 __u16 crc_t10dif(const unsigned char *buffer, size_t len)
 {
-	struct {
-		struct shash_desc shash;
-		char ctx[2];
-	} desc;
-	int err;
+	__u16 crc = 0;
+	unsigned int i;
 
-	desc.shash.tfm = crct10dif_tfm;
-	desc.shash.flags = 0;
-	*(__u16 *)desc.ctx = 0;
+	for (i = 0 ; i < len ; i++)
+		crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff];
 
-	err = crypto_shash_update(&desc.shash, buffer, len);
-	BUG_ON(err);
-
-	return *(__u16 *)desc.ctx;
+	return crc;
 }
 EXPORT_SYMBOL(crc_t10dif);
 
-static int __init crc_t10dif_mod_init(void)
-{
-	crct10dif_tfm = crypto_alloc_shash("crct10dif", 0, 0);
-	return PTR_RET(crct10dif_tfm);
-}
-
-static void __exit crc_t10dif_mod_fini(void)
-{
-	crypto_free_shash(crct10dif_tfm);
-}
-
-module_init(crc_t10dif_mod_init);
-module_exit(crc_t10dif_mod_fini);
-
 MODULE_DESCRIPTION("T10 DIF CRC calculation");
 MODULE_LICENSE("GPL");
diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c
index fd94058..28321d8 100644
--- a/lib/lz4/lz4_compress.c
+++ b/lib/lz4/lz4_compress.c
@@ -437,7 +437,7 @@
 exit:
 	return ret;
 }
-EXPORT_SYMBOL_GPL(lz4_compress);
+EXPORT_SYMBOL(lz4_compress);
 
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("LZ4 compressor");
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index d3414ea..411be80 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -299,7 +299,7 @@
 	return ret;
 }
 #ifndef STATIC
-EXPORT_SYMBOL_GPL(lz4_decompress);
+EXPORT_SYMBOL(lz4_decompress);
 #endif
 
 int lz4_decompress_unknownoutputsize(const char *src, size_t src_len,
@@ -319,8 +319,8 @@
 	return ret;
 }
 #ifndef STATIC
-EXPORT_SYMBOL_GPL(lz4_decompress_unknownoutputsize);
+EXPORT_SYMBOL(lz4_decompress_unknownoutputsize);
 
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("LZ4 Decompressor");
 #endif
diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c
index eb1a74f..f344f76 100644
--- a/lib/lz4/lz4hc_compress.c
+++ b/lib/lz4/lz4hc_compress.c
@@ -533,7 +533,7 @@
 exit:
 	return ret;
 }
-EXPORT_SYMBOL_GPL(lz4hc_compress);
+EXPORT_SYMBOL(lz4hc_compress);
 
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("LZ4HC compressor");
diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h
index d411355..aac5114 100644
--- a/lib/mpi/longlong.h
+++ b/lib/mpi/longlong.h
@@ -151,15 +151,12 @@
 #endif /* __a29k__ */
 
 #if defined(__alpha) && W_TYPE_SIZE == 64
-#define umul_ppmm(ph, pl, m0, m1) \
-do { \
-		UDItype __m0 = (m0), __m1 = (m1); \
-		__asm__ ("umulh %r1,%2,%0" \
-		: "=r" ((UDItype) ph) \
-		: "%rJ" (__m0), \
-			"rI" (__m1)); \
-		(pl) = __m0 * __m1; \
-	} while (0)
+#define umul_ppmm(ph, pl, m0, m1)			\
+do {							\
+	UDItype __m0 = (m0), __m1 = (m1);		\
+	(ph) = __builtin_alpha_umulh(__m0, __m1);	\
+	(pl) = __m0 * __m1;                             \
+} while (0)
 #define UMUL_TIME 46
 #ifndef LONGLONG_STANDALONE
 #define udiv_qrnnd(q, r, n1, n0, d) \
@@ -167,7 +164,7 @@
 	(q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
 	(r) = __r; \
 } while (0)
-extern UDItype __udiv_qrnnd();
+extern UDItype __udiv_qrnnd(UDItype *, UDItype, UDItype, UDItype);
 #define UDIV_TIME 220
 #endif /* LONGLONG_STANDALONE */
 #endif /* __alpha */
diff --git a/mm/fremap.c b/mm/fremap.c
index 87da359..5bff081 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -57,17 +57,22 @@
 		unsigned long addr, unsigned long pgoff, pgprot_t prot)
 {
 	int err = -ENOMEM;
-	pte_t *pte;
+	pte_t *pte, ptfile;
 	spinlock_t *ptl;
 
 	pte = get_locked_pte(mm, addr, &ptl);
 	if (!pte)
 		goto out;
 
-	if (!pte_none(*pte))
-		zap_pte(mm, vma, addr, pte);
+	ptfile = pgoff_to_pte(pgoff);
 
-	set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff));
+	if (!pte_none(*pte)) {
+		if (pte_present(*pte) && pte_soft_dirty(*pte))
+			pte_file_mksoft_dirty(ptfile);
+		zap_pte(mm, vma, addr, pte);
+	}
+
+	set_pte_at(mm, addr, pte, ptfile);
 	/*
 	 * We don't need to run update_mmu_cache() here because the "file pte"
 	 * being installed by install_file_pte() is not a real pte - it's a
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 243e710..a92012a 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1620,7 +1620,9 @@
 				     ((1L << PG_referenced) |
 				      (1L << PG_swapbacked) |
 				      (1L << PG_mlocked) |
-				      (1L << PG_uptodate)));
+				      (1L << PG_uptodate) |
+				      (1L << PG_active) |
+				      (1L << PG_unevictable)));
 		page_tail->flags |= (1L << PG_dirty);
 
 		/* clear PageTail before overwriting first_page */
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 83aff0a..b60f330 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2490,7 +2490,7 @@
 
 	mm = vma->vm_mm;
 
-	tlb_gather_mmu(&tlb, mm, 0);
+	tlb_gather_mmu(&tlb, mm, start, end);
 	__unmap_hugepage_range(&tlb, vma, start, end, ref_page);
 	tlb_finish_mmu(&tlb, start, end);
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 00a7a66..0878ff7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3195,11 +3195,11 @@
 	if (!s->memcg_params)
 		return -ENOMEM;
 
-	INIT_WORK(&s->memcg_params->destroy,
-			kmem_cache_destroy_work_func);
 	if (memcg) {
 		s->memcg_params->memcg = memcg;
 		s->memcg_params->root_cache = root_cache;
+		INIT_WORK(&s->memcg_params->destroy,
+				kmem_cache_destroy_work_func);
 	} else
 		s->memcg_params->is_root_cache = true;
 
@@ -6335,6 +6335,7 @@
 	mem_cgroup_invalidate_reclaim_iterators(memcg);
 	mem_cgroup_reparent_charges(memcg);
 	mem_cgroup_destroy_all_caches(memcg);
+	vmpressure_cleanup(&memcg->vmpressure);
 }
 
 static void mem_cgroup_css_free(struct cgroup *cont)
@@ -6968,7 +6969,6 @@
 #ifdef CONFIG_MEMCG_SWAP
 static int __init enable_swap_account(char *s)
 {
-	/* consider enabled if no parameter or 1 is given */
 	if (!strcmp(s, "1"))
 		really_do_swap_account = 1;
 	else if (!strcmp(s, "0"))
diff --git a/mm/memory.c b/mm/memory.c
index 1ce2e2a..af84bc0 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -209,14 +209,15 @@
  *	tear-down from @mm. The @fullmm argument is used when @mm is without
  *	users and we're going to destroy the full address space (exit/execve).
  */
-void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm)
+void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end)
 {
 	tlb->mm = mm;
 
-	tlb->fullmm     = fullmm;
+	/* Is it from 0 to ~0? */
+	tlb->fullmm     = !(start | (end+1));
 	tlb->need_flush_all = 0;
-	tlb->start	= -1UL;
-	tlb->end	= 0;
+	tlb->start	= start;
+	tlb->end	= end;
 	tlb->need_flush = 0;
 	tlb->local.next = NULL;
 	tlb->local.nr   = 0;
@@ -256,8 +257,6 @@
 {
 	struct mmu_gather_batch *batch, *next;
 
-	tlb->start = start;
-	tlb->end   = end;
 	tlb_flush_mmu(tlb);
 
 	/* keep the page table cache within bounds */
@@ -1099,7 +1098,6 @@
 	spinlock_t *ptl;
 	pte_t *start_pte;
 	pte_t *pte;
-	unsigned long range_start = addr;
 
 again:
 	init_rss_vec(rss);
@@ -1141,9 +1139,12 @@
 				continue;
 			if (unlikely(details) && details->nonlinear_vma
 			    && linear_page_index(details->nonlinear_vma,
-						addr) != page->index)
-				set_pte_at(mm, addr, pte,
-					   pgoff_to_pte(page->index));
+						addr) != page->index) {
+				pte_t ptfile = pgoff_to_pte(page->index);
+				if (pte_soft_dirty(ptent))
+					pte_file_mksoft_dirty(ptfile);
+				set_pte_at(mm, addr, pte, ptfile);
+			}
 			if (PageAnon(page))
 				rss[MM_ANONPAGES]--;
 			else {
@@ -1202,17 +1203,25 @@
 	 * and page-free while holding it.
 	 */
 	if (force_flush) {
+		unsigned long old_end;
+
 		force_flush = 0;
 
-#ifdef HAVE_GENERIC_MMU_GATHER
-		tlb->start = range_start;
+		/*
+		 * Flush the TLB just for the previous segment,
+		 * then update the range to be the remaining
+		 * TLB range.
+		 */
+		old_end = tlb->end;
 		tlb->end = addr;
-#endif
+
 		tlb_flush_mmu(tlb);
-		if (addr != end) {
-			range_start = addr;
+
+		tlb->start = addr;
+		tlb->end = old_end;
+
+		if (addr != end)
 			goto again;
-		}
 	}
 
 	return addr;
@@ -1397,7 +1406,7 @@
 	unsigned long end = start + size;
 
 	lru_add_drain();
-	tlb_gather_mmu(&tlb, mm, 0);
+	tlb_gather_mmu(&tlb, mm, start, end);
 	update_hiwater_rss(mm);
 	mmu_notifier_invalidate_range_start(mm, start, end);
 	for ( ; vma && vma->vm_start < end; vma = vma->vm_next)
@@ -1423,7 +1432,7 @@
 	unsigned long end = address + size;
 
 	lru_add_drain();
-	tlb_gather_mmu(&tlb, mm, 0);
+	tlb_gather_mmu(&tlb, mm, address, end);
 	update_hiwater_rss(mm);
 	mmu_notifier_invalidate_range_start(mm, address, end);
 	unmap_single_vma(&tlb, vma, address, end, details);
@@ -3115,6 +3124,8 @@
 		exclusive = 1;
 	}
 	flush_icache_page(vma, page);
+	if (pte_swp_soft_dirty(orig_pte))
+		pte = pte_mksoft_dirty(pte);
 	set_pte_at(mm, address, page_table, pte);
 	if (page == swapcache)
 		do_page_add_anon_rmap(page, vma, address, exclusive);
@@ -3408,6 +3419,8 @@
 		entry = mk_pte(page, vma->vm_page_prot);
 		if (flags & FAULT_FLAG_WRITE)
 			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+		else if (pte_file(orig_pte) && pte_file_soft_dirty(orig_pte))
+			pte_mksoft_dirty(entry);
 		if (anon) {
 			inc_mm_counter_fast(mm, MM_ANONPAGES);
 			page_add_new_anon_rmap(page, vma, address);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 7431001..4baf12e 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -732,7 +732,10 @@
 		if (prev) {
 			vma = prev;
 			next = vma->vm_next;
-			continue;
+			if (mpol_equal(vma_policy(vma), new_pol))
+				continue;
+			/* vma_merge() joined vma && vma->next, case 8 */
+			goto replace;
 		}
 		if (vma->vm_start != vmstart) {
 			err = split_vma(vma->vm_mm, vma, vmstart, 1);
@@ -744,6 +747,7 @@
 			if (err)
 				goto out;
 		}
+ replace:
 		err = vma_replace_policy(vma, new_pol);
 		if (err)
 			goto out;
diff --git a/mm/mmap.c b/mm/mmap.c
index fbad7b0..f9c97d1 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -865,7 +865,7 @@
 		if (next->anon_vma)
 			anon_vma_merge(vma, next);
 		mm->map_count--;
-		vma_set_policy(vma, vma_policy(next));
+		mpol_put(vma_policy(next));
 		kmem_cache_free(vm_area_cachep, next);
 		/*
 		 * In mprotect's case 6 (see comments on vma_merge),
@@ -2336,7 +2336,7 @@
 	struct mmu_gather tlb;
 
 	lru_add_drain();
-	tlb_gather_mmu(&tlb, mm, 0);
+	tlb_gather_mmu(&tlb, mm, start, end);
 	update_hiwater_rss(mm);
 	unmap_vmas(&tlb, vma, start, end);
 	free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
@@ -2709,7 +2709,7 @@
 
 	lru_add_drain();
 	flush_cache_mm(mm);
-	tlb_gather_mmu(&tlb, mm, 1);
+	tlb_gather_mmu(&tlb, mm, 0, -1);
 	/* update_hiwater_rss(mm) here? but nobody should be looking */
 	/* Use -1 here to ensure all VMAs in the mm are unmapped */
 	unmap_vmas(&tlb, vma, 0, -1);
diff --git a/mm/rmap.c b/mm/rmap.c
index cd356df..b2e29ac 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1236,6 +1236,7 @@
 			   swp_entry_to_pte(make_hwpoison_entry(page)));
 	} else if (PageAnon(page)) {
 		swp_entry_t entry = { .val = page_private(page) };
+		pte_t swp_pte;
 
 		if (PageSwapCache(page)) {
 			/*
@@ -1264,7 +1265,10 @@
 			BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION);
 			entry = make_migration_entry(page, pte_write(pteval));
 		}
-		set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
+		swp_pte = swp_entry_to_pte(entry);
+		if (pte_soft_dirty(pteval))
+			swp_pte = pte_swp_mksoft_dirty(swp_pte);
+		set_pte_at(mm, address, pte, swp_pte);
 		BUG_ON(pte_file(*pte));
 	} else if (IS_ENABLED(CONFIG_MIGRATION) &&
 		   (TTU_ACTION(flags) == TTU_MIGRATION)) {
@@ -1401,8 +1405,12 @@
 		pteval = ptep_clear_flush(vma, address, pte);
 
 		/* If nonlinear, store the file page offset in the pte. */
-		if (page->index != linear_page_index(vma, address))
-			set_pte_at(mm, address, pte, pgoff_to_pte(page->index));
+		if (page->index != linear_page_index(vma, address)) {
+			pte_t ptfile = pgoff_to_pte(page->index);
+			if (pte_soft_dirty(pteval))
+				pte_file_mksoft_dirty(ptfile);
+			set_pte_at(mm, address, pte, ptfile);
+		}
 
 		/* Move the dirty bit to the physical page now the pte is gone. */
 		if (pte_dirty(pteval))
diff --git a/mm/shmem.c b/mm/shmem.c
index a87990c..e43dc55 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1798,7 +1798,8 @@
 		}
 	}
 
-	offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE);
+	if (offset >= 0)
+		offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE);
 	mutex_unlock(&inode->i_mutex);
 	return offset;
 }
@@ -2908,14 +2909,8 @@
 
 /* common code */
 
-static char *shmem_dname(struct dentry *dentry, char *buffer, int buflen)
-{
-	return dynamic_dname(dentry, buffer, buflen, "/%s (deleted)",
-				dentry->d_name.name);
-}
-
 static struct dentry_operations anon_ops = {
-	.d_dname = shmem_dname
+	.d_dname = simple_dname
 };
 
 /**
diff --git a/mm/slub.c b/mm/slub.c
index 2b02d66..e3ba1f2 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1968,9 +1968,6 @@
 	int pages;
 	int pobjects;
 
-	if (!s->cpu_partial)
-		return;
-
 	do {
 		pages = 0;
 		pobjects = 0;
diff --git a/mm/swap.c b/mm/swap.c
index 4a1d0d2..62b78a6 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -512,12 +512,7 @@
  */
 void lru_cache_add(struct page *page)
 {
-	if (PageActive(page)) {
-		VM_BUG_ON(PageUnevictable(page));
-	} else if (PageUnevictable(page)) {
-		VM_BUG_ON(PageActive(page));
-	}
-
+	VM_BUG_ON(PageActive(page) && PageUnevictable(page));
 	VM_BUG_ON(PageLRU(page));
 	__lru_cache_add(page);
 }
@@ -539,6 +534,7 @@
 
 	spin_lock_irq(&zone->lru_lock);
 	lruvec = mem_cgroup_page_lruvec(page, zone);
+	ClearPageActive(page);
 	SetPageUnevictable(page);
 	SetPageLRU(page);
 	add_page_to_lru_list(page, lruvec, LRU_UNEVICTABLE);
@@ -774,8 +770,6 @@
 void lru_add_page_tail(struct page *page, struct page *page_tail,
 		       struct lruvec *lruvec, struct list_head *list)
 {
-	int uninitialized_var(active);
-	enum lru_list lru;
 	const int file = 0;
 
 	VM_BUG_ON(!PageHead(page));
@@ -787,20 +781,6 @@
 	if (!list)
 		SetPageLRU(page_tail);
 
-	if (page_evictable(page_tail)) {
-		if (PageActive(page)) {
-			SetPageActive(page_tail);
-			active = 1;
-			lru = LRU_ACTIVE_ANON;
-		} else {
-			active = 0;
-			lru = LRU_INACTIVE_ANON;
-		}
-	} else {
-		SetPageUnevictable(page_tail);
-		lru = LRU_UNEVICTABLE;
-	}
-
 	if (likely(PageLRU(page)))
 		list_add_tail(&page_tail->lru, &page->lru);
 	else if (list) {
@@ -816,13 +796,13 @@
 		 * Use the standard add function to put page_tail on the list,
 		 * but then correct its position so they all end up in order.
 		 */
-		add_page_to_lru_list(page_tail, lruvec, lru);
+		add_page_to_lru_list(page_tail, lruvec, page_lru(page_tail));
 		list_head = page_tail->lru.prev;
 		list_move_tail(&page_tail->lru, list_head);
 	}
 
 	if (!PageUnevictable(page))
-		update_page_reclaim_stat(lruvec, file, active);
+		update_page_reclaim_stat(lruvec, file, PageActive(page_tail));
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
@@ -833,7 +813,6 @@
 	int active = PageActive(page);
 	enum lru_list lru = page_lru(page);
 
-	VM_BUG_ON(PageUnevictable(page));
 	VM_BUG_ON(PageLRU(page));
 
 	SetPageLRU(page);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 36af6ee..6cf2e60 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -866,6 +866,21 @@
 }
 #endif /* CONFIG_HIBERNATION */
 
+static inline int maybe_same_pte(pte_t pte, pte_t swp_pte)
+{
+#ifdef CONFIG_MEM_SOFT_DIRTY
+	/*
+	 * When pte keeps soft dirty bit the pte generated
+	 * from swap entry does not has it, still it's same
+	 * pte from logical point of view.
+	 */
+	pte_t swp_pte_dirty = pte_swp_mksoft_dirty(swp_pte);
+	return pte_same(pte, swp_pte) || pte_same(pte, swp_pte_dirty);
+#else
+	return pte_same(pte, swp_pte);
+#endif
+}
+
 /*
  * No need to decide whether this PTE shares the swap entry with others,
  * just let do_wp_page work it out if a write is requested later - to
@@ -892,7 +907,7 @@
 	}
 
 	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
-	if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) {
+	if (unlikely(!maybe_same_pte(*pte, swp_entry_to_pte(entry)))) {
 		mem_cgroup_cancel_charge_swapin(memcg);
 		ret = 0;
 		goto out;
@@ -947,7 +962,7 @@
 		 * swapoff spends a _lot_ of time in this loop!
 		 * Test inline before going to call unuse_pte.
 		 */
-		if (unlikely(pte_same(*pte, swp_pte))) {
+		if (unlikely(maybe_same_pte(*pte, swp_pte))) {
 			pte_unmap(pte);
 			ret = unuse_pte(vma, pmd, addr, entry, page);
 			if (ret)
diff --git a/mm/vmpressure.c b/mm/vmpressure.c
index 736a601..0c1e37d 100644
--- a/mm/vmpressure.c
+++ b/mm/vmpressure.c
@@ -180,12 +180,12 @@
 	if (!vmpr->scanned)
 		return;
 
-	mutex_lock(&vmpr->sr_lock);
+	spin_lock(&vmpr->sr_lock);
 	scanned = vmpr->scanned;
 	reclaimed = vmpr->reclaimed;
 	vmpr->scanned = 0;
 	vmpr->reclaimed = 0;
-	mutex_unlock(&vmpr->sr_lock);
+	spin_unlock(&vmpr->sr_lock);
 
 	do {
 		if (vmpressure_event(vmpr, scanned, reclaimed))
@@ -240,13 +240,13 @@
 	if (!scanned)
 		return;
 
-	mutex_lock(&vmpr->sr_lock);
+	spin_lock(&vmpr->sr_lock);
 	vmpr->scanned += scanned;
 	vmpr->reclaimed += reclaimed;
 	scanned = vmpr->scanned;
-	mutex_unlock(&vmpr->sr_lock);
+	spin_unlock(&vmpr->sr_lock);
 
-	if (scanned < vmpressure_win || work_pending(&vmpr->work))
+	if (scanned < vmpressure_win)
 		return;
 	schedule_work(&vmpr->work);
 }
@@ -367,8 +367,24 @@
  */
 void vmpressure_init(struct vmpressure *vmpr)
 {
-	mutex_init(&vmpr->sr_lock);
+	spin_lock_init(&vmpr->sr_lock);
 	mutex_init(&vmpr->events_lock);
 	INIT_LIST_HEAD(&vmpr->events);
 	INIT_WORK(&vmpr->work, vmpressure_work_fn);
 }
+
+/**
+ * vmpressure_cleanup() - shuts down vmpressure control structure
+ * @vmpr:	Structure to be cleaned up
+ *
+ * This function should be called before the structure in which it is
+ * embedded is cleaned up.
+ */
+void vmpressure_cleanup(struct vmpressure *vmpr)
+{
+	/*
+	 * Make sure there is no pending work before eventfd infrastructure
+	 * goes away.
+	 */
+	flush_work(&vmpr->work);
+}
diff --git a/mm/zbud.c b/mm/zbud.c
index 9bb4710..ad1e781 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -257,7 +257,7 @@
 
 	if (size <= 0 || gfp & __GFP_HIGHMEM)
 		return -EINVAL;
-	if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED)
+	if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE)
 		return -ENOSPC;
 	chunks = size_to_chunks(size);
 	spin_lock(&pool->lock);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 4a78c4d..6ee48aa 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -91,7 +91,12 @@
 
 struct net_device *vlan_dev_real_dev(const struct net_device *dev)
 {
-	return vlan_dev_priv(dev)->real_dev;
+	struct net_device *ret = vlan_dev_priv(dev)->real_dev;
+
+	while (is_vlan_dev(ret))
+		ret = vlan_dev_priv(ret)->real_dev;
+
+	return ret;
 }
 EXPORT_SYMBOL(vlan_dev_real_dev);
 
diff --git a/net/Kconfig b/net/Kconfig
index 3770249..2b40660 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -244,7 +244,7 @@
 	  Cgroup subsystem for use in assigning processes to network priorities on
 	  a per-interface basis
 
-config NET_LL_RX_POLL
+config NET_RX_BUSY_POLL
 	boolean
 	default y
 
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index e14531f..264de88 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1529,6 +1529,8 @@
  * in these cases, the skb is further handled by this function and
  * returns 1, otherwise it returns 0 and the caller shall further
  * process the skb.
+ *
+ * This call might reallocate skb data.
  */
 int batadv_bla_tx(struct batadv_priv *bat_priv, struct sk_buff *skb,
 		  unsigned short vid)
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index f105219..7614af3 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -508,6 +508,7 @@
 	return 0;
 }
 
+/* this call might reallocate skb data */
 static bool batadv_is_type_dhcprequest(struct sk_buff *skb, int header_len)
 {
 	int ret = false;
@@ -568,6 +569,7 @@
 	return ret;
 }
 
+/* this call might reallocate skb data */
 bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len)
 {
 	struct ethhdr *ethhdr;
@@ -619,6 +621,12 @@
 
 	if (!pskb_may_pull(skb, *header_len + sizeof(*udphdr)))
 		return false;
+
+	/* skb->data might have been reallocated by pskb_may_pull() */
+	ethhdr = (struct ethhdr *)skb->data;
+	if (ntohs(ethhdr->h_proto) == ETH_P_8021Q)
+		ethhdr = (struct ethhdr *)(skb->data + VLAN_HLEN);
+
 	udphdr = (struct udphdr *)(skb->data + *header_len);
 	*header_len += sizeof(*udphdr);
 
@@ -634,12 +642,14 @@
 	return true;
 }
 
+/* this call might reallocate skb data */
 bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
-			    struct sk_buff *skb, struct ethhdr *ethhdr)
+			    struct sk_buff *skb)
 {
 	struct batadv_neigh_node *neigh_curr = NULL, *neigh_old = NULL;
 	struct batadv_orig_node *orig_dst_node = NULL;
 	struct batadv_gw_node *curr_gw = NULL;
+	struct ethhdr *ethhdr;
 	bool ret, out_of_range = false;
 	unsigned int header_len = 0;
 	uint8_t curr_tq_avg;
@@ -648,6 +658,7 @@
 	if (!ret)
 		goto out;
 
+	ethhdr = (struct ethhdr *)skb->data;
 	orig_dst_node = batadv_transtable_search(bat_priv, ethhdr->h_source,
 						 ethhdr->h_dest);
 	if (!orig_dst_node)
diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h
index 039902d..1037d75 100644
--- a/net/batman-adv/gateway_client.h
+++ b/net/batman-adv/gateway_client.h
@@ -34,7 +34,6 @@
 void batadv_gw_node_purge(struct batadv_priv *bat_priv);
 int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset);
 bool batadv_gw_is_dhcp_target(struct sk_buff *skb, unsigned int *header_len);
-bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
-			    struct sk_buff *skb, struct ethhdr *ethhdr);
+bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, struct sk_buff *skb);
 
 #endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 700d0b4..0f04e1c 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -180,6 +180,9 @@
 	if (batadv_bla_tx(bat_priv, skb, vid))
 		goto dropped;
 
+	/* skb->data might have been reallocated by batadv_bla_tx() */
+	ethhdr = (struct ethhdr *)skb->data;
+
 	/* Register the client MAC in the transtable */
 	if (!is_multicast_ether_addr(ethhdr->h_source))
 		batadv_tt_local_add(soft_iface, ethhdr->h_source, skb->skb_iif);
@@ -220,6 +223,10 @@
 		default:
 			break;
 		}
+
+		/* reminder: ethhdr might have become unusable from here on
+		 * (batadv_gw_is_dhcp_target() might have reallocated skb data)
+		 */
 	}
 
 	/* ethernet packet should be broadcasted */
@@ -266,7 +273,7 @@
 	/* unicast packet */
 	} else {
 		if (atomic_read(&bat_priv->gw_mode) != BATADV_GW_MODE_OFF) {
-			ret = batadv_gw_out_of_range(bat_priv, skb, ethhdr);
+			ret = batadv_gw_out_of_range(bat_priv, skb);
 			if (ret)
 				goto dropped;
 		}
diff --git a/net/batman-adv/unicast.c b/net/batman-adv/unicast.c
index dc8b5d4..857e1b8 100644
--- a/net/batman-adv/unicast.c
+++ b/net/batman-adv/unicast.c
@@ -326,7 +326,9 @@
  * @skb: the skb containing the payload to encapsulate
  * @orig_node: the destination node
  *
- * Returns false if the payload could not be encapsulated or true otherwise
+ * Returns false if the payload could not be encapsulated or true otherwise.
+ *
+ * This call might reallocate skb data.
  */
 static bool batadv_unicast_prepare_skb(struct sk_buff *skb,
 				       struct batadv_orig_node *orig_node)
@@ -343,7 +345,9 @@
  * @orig_node: the destination node
  * @packet_subtype: the batman 4addr packet subtype to use
  *
- * Returns false if the payload could not be encapsulated or true otherwise
+ * Returns false if the payload could not be encapsulated or true otherwise.
+ *
+ * This call might reallocate skb data.
  */
 bool batadv_unicast_4addr_prepare_skb(struct batadv_priv *bat_priv,
 				      struct sk_buff *skb,
@@ -401,7 +405,7 @@
 	struct batadv_neigh_node *neigh_node;
 	int data_len = skb->len;
 	int ret = NET_RX_DROP;
-	unsigned int dev_mtu;
+	unsigned int dev_mtu, header_len;
 
 	/* get routing information */
 	if (is_multicast_ether_addr(ethhdr->h_dest)) {
@@ -428,11 +432,17 @@
 
 	switch (packet_type) {
 	case BATADV_UNICAST:
-		batadv_unicast_prepare_skb(skb, orig_node);
+		if (!batadv_unicast_prepare_skb(skb, orig_node))
+			goto out;
+
+		header_len = sizeof(struct batadv_unicast_packet);
 		break;
 	case BATADV_UNICAST_4ADDR:
-		batadv_unicast_4addr_prepare_skb(bat_priv, skb, orig_node,
-						 packet_subtype);
+		if (!batadv_unicast_4addr_prepare_skb(bat_priv, skb, orig_node,
+						      packet_subtype))
+			goto out;
+
+		header_len = sizeof(struct batadv_unicast_4addr_packet);
 		break;
 	default:
 		/* this function supports UNICAST and UNICAST_4ADDR only. It
@@ -441,6 +451,7 @@
 		goto out;
 	}
 
+	ethhdr = (struct ethhdr *)(skb->data + header_len);
 	unicast_packet = (struct batadv_unicast_packet *)skb->data;
 
 	/* inform the destination node that we are still missing a correct route
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index e3a3499..cc27297 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -513,7 +513,10 @@
 
 	hci_setup_event_mask(req);
 
-	if (hdev->hci_ver > BLUETOOTH_VER_1_1)
+	/* AVM Berlin (31), aka "BlueFRITZ!", doesn't support the read
+	 * local supported commands HCI command.
+	 */
+	if (hdev->manufacturer != 31 && hdev->hci_ver > BLUETOOTH_VER_1_1)
 		hci_req_add(req, HCI_OP_READ_LOCAL_COMMANDS, 0, NULL);
 
 	if (lmp_ssp_capable(hdev)) {
@@ -2165,10 +2168,6 @@
 
 	BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
 
-	write_lock(&hci_dev_list_lock);
-	list_add(&hdev->list, &hci_dev_list);
-	write_unlock(&hci_dev_list_lock);
-
 	hdev->workqueue = alloc_workqueue("%s", WQ_HIGHPRI | WQ_UNBOUND |
 					  WQ_MEM_RECLAIM, 1, hdev->name);
 	if (!hdev->workqueue) {
@@ -2203,6 +2202,10 @@
 	if (hdev->dev_type != HCI_AMP)
 		set_bit(HCI_AUTO_OFF, &hdev->dev_flags);
 
+	write_lock(&hci_dev_list_lock);
+	list_add(&hdev->list, &hci_dev_list);
+	write_unlock(&hci_dev_list_lock);
+
 	hci_notify(hdev, HCI_DEV_REG);
 	hci_dev_hold(hdev);
 
@@ -2215,9 +2218,6 @@
 	destroy_workqueue(hdev->req_workqueue);
 err:
 	ida_simple_remove(&hci_index_ida, hdev->id);
-	write_lock(&hci_dev_list_lock);
-	list_del(&hdev->list);
-	write_unlock(&hci_dev_list_lock);
 
 	return error;
 }
@@ -3399,8 +3399,16 @@
 	 */
 	if (hdev->sent_cmd) {
 		req_complete = bt_cb(hdev->sent_cmd)->req.complete;
-		if (req_complete)
+
+		if (req_complete) {
+			/* We must set the complete callback to NULL to
+			 * avoid calling the callback more than once if
+			 * this function gets called again.
+			 */
+			bt_cb(hdev->sent_cmd)->req.complete = NULL;
+
 			goto call_complete;
+		}
 	}
 
 	/* Remove all pending commands belonging to this request */
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 2ef6678..69363bd 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -70,7 +70,8 @@
 		}
 
 		mdst = br_mdb_get(br, skb, vid);
-		if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb))
+		if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
+		    br_multicast_querier_exists(br))
 			br_multicast_deliver(mdst, skb);
 		else
 			br_flood_deliver(br, skb, false);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 60aca91..ffd5874 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -161,7 +161,7 @@
 	if (!pv)
 		return;
 
-	for_each_set_bit_from(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
+	for_each_set_bit_from(vid, pv->vlan_bitmap, VLAN_N_VID) {
 		f = __br_fdb_get(br, br->dev->dev_addr, vid);
 		if (f && f->is_local && !f->dst)
 			fdb_delete(br, f);
@@ -730,7 +730,7 @@
 		/* VID was specified, so use it. */
 		err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
 	} else {
-		if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
+		if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) {
 			err = __br_fdb_add(ndm, p, addr, nlh_flags, 0);
 			goto out;
 		}
@@ -739,7 +739,7 @@
 		 * specify a VLAN.  To be nice, add/update entry for every
 		 * vlan on this port.
 		 */
-		for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
+		for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
 			err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
 			if (err)
 				goto out;
@@ -817,7 +817,7 @@
 
 		err = __br_fdb_delete(p, addr, vid);
 	} else {
-		if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
+		if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) {
 			err = __br_fdb_delete(p, addr, 0);
 			goto out;
 		}
@@ -827,7 +827,7 @@
 		 * vlan on this port.
 		 */
 		err = -ENOENT;
-		for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
+		for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
 			err &= __br_fdb_delete(p, addr, vid);
 		}
 	}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 1b8b8b8..8c561c0 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -101,7 +101,8 @@
 		unicast = false;
 	} else if (is_multicast_ether_addr(dest)) {
 		mdst = br_mdb_get(br, skb, vid);
-		if (mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) {
+		if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
+		    br_multicast_querier_exists(br)) {
 			if ((mdst && mdst->mglist) ||
 			    br_multicast_is_router(br))
 				skb2 = skb;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 69af490..08e576a 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -619,6 +619,9 @@
 	mp->br = br;
 	mp->addr = *group;
 
+	setup_timer(&mp->timer, br_multicast_group_expired,
+		    (unsigned long)mp);
+
 	hlist_add_head_rcu(&mp->hlist[mdb->ver], &mdb->mhash[hash]);
 	mdb->size++;
 
@@ -1011,6 +1014,16 @@
 }
 #endif
 
+static void br_multicast_update_querier_timer(struct net_bridge *br,
+					      unsigned long max_delay)
+{
+	if (!timer_pending(&br->multicast_querier_timer))
+		br->multicast_querier_delay_time = jiffies + max_delay;
+
+	mod_timer(&br->multicast_querier_timer,
+		  jiffies + br->multicast_querier_interval);
+}
+
 /*
  * Add port to router_list
  *  list is maintained ordered by pointer value
@@ -1061,11 +1074,11 @@
 
 static void br_multicast_query_received(struct net_bridge *br,
 					struct net_bridge_port *port,
-					int saddr)
+					int saddr,
+					unsigned long max_delay)
 {
 	if (saddr)
-		mod_timer(&br->multicast_querier_timer,
-			  jiffies + br->multicast_querier_interval);
+		br_multicast_update_querier_timer(br, max_delay);
 	else if (timer_pending(&br->multicast_querier_timer))
 		return;
 
@@ -1093,8 +1106,6 @@
 	    (port && port->state == BR_STATE_DISABLED))
 		goto out;
 
-	br_multicast_query_received(br, port, !!iph->saddr);
-
 	group = ih->group;
 
 	if (skb->len == sizeof(*ih)) {
@@ -1118,6 +1129,8 @@
 			    IGMPV3_MRC(ih3->code) * (HZ / IGMP_TIMER_SCALE) : 1;
 	}
 
+	br_multicast_query_received(br, port, !!iph->saddr, max_delay);
+
 	if (!group)
 		goto out;
 
@@ -1126,7 +1139,6 @@
 	if (!mp)
 		goto out;
 
-	setup_timer(&mp->timer, br_multicast_group_expired, (unsigned long)mp);
 	mod_timer(&mp->timer, now + br->multicast_membership_interval);
 	mp->timer_armed = true;
 
@@ -1174,8 +1186,6 @@
 	    (port && port->state == BR_STATE_DISABLED))
 		goto out;
 
-	br_multicast_query_received(br, port, !ipv6_addr_any(&ip6h->saddr));
-
 	if (skb->len == sizeof(*mld)) {
 		if (!pskb_may_pull(skb, sizeof(*mld))) {
 			err = -EINVAL;
@@ -1185,7 +1195,7 @@
 		max_delay = msecs_to_jiffies(ntohs(mld->mld_maxdelay));
 		if (max_delay)
 			group = &mld->mld_mca;
-	} else if (skb->len >= sizeof(*mld2q)) {
+	} else {
 		if (!pskb_may_pull(skb, sizeof(*mld2q))) {
 			err = -EINVAL;
 			goto out;
@@ -1196,6 +1206,9 @@
 		max_delay = mld2q->mld2q_mrc ? MLDV2_MRC(ntohs(mld2q->mld2q_mrc)) : 1;
 	}
 
+	br_multicast_query_received(br, port, !ipv6_addr_any(&ip6h->saddr),
+				    max_delay);
+
 	if (!group)
 		goto out;
 
@@ -1204,7 +1217,6 @@
 	if (!mp)
 		goto out;
 
-	setup_timer(&mp->timer, br_multicast_group_expired, (unsigned long)mp);
 	mod_timer(&mp->timer, now + br->multicast_membership_interval);
 	mp->timer_armed = true;
 
@@ -1642,6 +1654,8 @@
 	br->multicast_querier_interval = 255 * HZ;
 	br->multicast_membership_interval = 260 * HZ;
 
+	br->multicast_querier_delay_time = 0;
+
 	spin_lock_init(&br->multicast_lock);
 	setup_timer(&br->multicast_router_timer,
 		    br_multicast_local_router_expired, 0);
@@ -1830,6 +1844,8 @@
 
 int br_multicast_set_querier(struct net_bridge *br, unsigned long val)
 {
+	unsigned long max_delay;
+
 	val = !!val;
 
 	spin_lock_bh(&br->multicast_lock);
@@ -1837,8 +1853,14 @@
 		goto unlock;
 
 	br->multicast_querier = val;
-	if (val)
-		br_multicast_start_querier(br);
+	if (!val)
+		goto unlock;
+
+	max_delay = br->multicast_query_response_interval;
+	if (!timer_pending(&br->multicast_querier_timer))
+		br->multicast_querier_delay_time = jiffies + max_delay;
+
+	br_multicast_start_querier(br);
 
 unlock:
 	spin_unlock_bh(&br->multicast_lock);
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 1fc30ab..b9259ef 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -132,7 +132,7 @@
 		else
 			pv = br_get_vlan_info(br);
 
-		if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN))
+		if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID))
 			goto done;
 
 		af = nla_nest_start(skb, IFLA_AF_SPEC);
@@ -140,7 +140,7 @@
 			goto nla_put_failure;
 
 		pvid = br_get_pvid(pv);
-		for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
+		for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
 			vinfo.vid = vid;
 			vinfo.flags = 0;
 			if (vid == pvid)
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 3be89b3..2f7da41 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -267,6 +267,7 @@
 	unsigned long			multicast_query_interval;
 	unsigned long			multicast_query_response_interval;
 	unsigned long			multicast_startup_query_interval;
+	unsigned long			multicast_querier_delay_time;
 
 	spinlock_t			multicast_lock;
 	struct net_bridge_mdb_htable __rcu *mdb;
@@ -501,6 +502,13 @@
 	       (br->multicast_router == 1 &&
 		timer_pending(&br->multicast_router_timer));
 }
+
+static inline bool br_multicast_querier_exists(struct net_bridge *br)
+{
+	return time_is_before_jiffies(br->multicast_querier_delay_time) &&
+	       (br->multicast_querier ||
+		timer_pending(&br->multicast_querier_timer));
+}
 #else
 static inline int br_multicast_rcv(struct net_bridge *br,
 				   struct net_bridge_port *port,
@@ -557,6 +565,10 @@
 {
 	return 0;
 }
+static inline bool br_multicast_querier_exists(struct net_bridge *br)
+{
+	return false;
+}
 static inline void br_mdb_init(void)
 {
 }
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 394bb96..3b9637f 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -1,5 +1,5 @@
 /*
- *	Sysfs attributes of bridge ports
+ *	Sysfs attributes of bridge
  *	Linux ethernet bridge
  *
  *	Authors:
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index bd58b45..9a9ffe7 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -108,7 +108,7 @@
 
 	clear_bit(vid, v->vlan_bitmap);
 	v->num_vlans--;
-	if (bitmap_empty(v->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
+	if (bitmap_empty(v->vlan_bitmap, VLAN_N_VID)) {
 		if (v->port_idx)
 			rcu_assign_pointer(v->parent.port->vlan_info, NULL);
 		else
@@ -122,7 +122,7 @@
 {
 	smp_wmb();
 	v->pvid = 0;
-	bitmap_zero(v->vlan_bitmap, BR_VLAN_BITMAP_LEN);
+	bitmap_zero(v->vlan_bitmap, VLAN_N_VID);
 	if (v->port_idx)
 		rcu_assign_pointer(v->parent.port->vlan_info, NULL);
 	else
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 00ee068..b84a1b1 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -65,6 +65,7 @@
 		nhoff += sizeof(struct ipv6hdr);
 		break;
 	}
+	case __constant_htons(ETH_P_8021AD):
 	case __constant_htons(ETH_P_8021Q): {
 		const struct vlan_hdr *vlan;
 		struct vlan_hdr _vlan;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index b7de821..60533db 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1441,16 +1441,18 @@
 		atomic_set(&p->refcnt, 1);
 		p->reachable_time =
 				neigh_rand_reach_time(p->base_reachable_time);
-
-		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
-			kfree(p);
-			return NULL;
-		}
-
 		dev_hold(dev);
 		p->dev = dev;
 		write_pnet(&p->net, hold_net(net));
 		p->sysctl_table = NULL;
+
+		if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
+			release_net(net);
+			dev_put(dev);
+			kfree(p);
+			return NULL;
+		}
+
 		write_lock_bh(&tbl->lock);
 		p->next		= tbl->parms.next;
 		tbl->parms.next = p;
@@ -2767,6 +2769,7 @@
 
 #ifdef CONFIG_SYSCTL
 static int zero;
+static int int_max = INT_MAX;
 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
 
 static int proc_unres_qlen(struct ctl_table *ctl, int write,
@@ -2819,19 +2822,25 @@
 			.procname	= "mcast_solicit",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
-			.proc_handler	= proc_dointvec,
+			.extra1 	= &zero,
+			.extra2		= &int_max,
+			.proc_handler	= proc_dointvec_minmax,
 		},
 		[NEIGH_VAR_UCAST_PROBE] = {
 			.procname	= "ucast_solicit",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
-			.proc_handler	= proc_dointvec,
+			.extra1 	= &zero,
+			.extra2		= &int_max,
+			.proc_handler	= proc_dointvec_minmax,
 		},
 		[NEIGH_VAR_APP_PROBE] = {
 			.procname	= "app_solicit",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
-			.proc_handler	= proc_dointvec,
+			.extra1 	= &zero,
+			.extra2		= &int_max,
+			.proc_handler	= proc_dointvec_minmax,
 		},
 		[NEIGH_VAR_RETRANS_TIME] = {
 			.procname	= "retrans_time",
@@ -2874,7 +2883,9 @@
 			.procname	= "proxy_qlen",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
-			.proc_handler	= proc_dointvec,
+			.extra1 	= &zero,
+			.extra2		= &int_max,
+			.proc_handler	= proc_dointvec_minmax,
 		},
 		[NEIGH_VAR_ANYCAST_DELAY] = {
 			.procname	= "anycast_delay",
@@ -2916,19 +2927,25 @@
 			.procname	= "gc_thresh1",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
-			.proc_handler	= proc_dointvec,
+			.extra1 	= &zero,
+			.extra2		= &int_max,
+			.proc_handler	= proc_dointvec_minmax,
 		},
 		[NEIGH_VAR_GC_THRESH2] = {
 			.procname	= "gc_thresh2",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
-			.proc_handler	= proc_dointvec,
+			.extra1 	= &zero,
+			.extra2		= &int_max,
+			.proc_handler	= proc_dointvec_minmax,
 		},
 		[NEIGH_VAR_GC_THRESH3] = {
 			.procname	= "gc_thresh3",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
-			.proc_handler	= proc_dointvec,
+			.extra1 	= &zero,
+			.extra2		= &int_max,
+			.proc_handler	= proc_dointvec_minmax,
 		},
 		{},
 	},
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 3de7408..ca198c1 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2156,7 +2156,7 @@
 	/* If aging addresses are supported device will need to
 	 * implement its own handler for this.
 	 */
-	if (ndm->ndm_state & NUD_PERMANENT) {
+	if (!(ndm->ndm_state & NUD_PERMANENT)) {
 		pr_info("%s: FDB only supports static addresses\n", dev->name);
 		return -EINVAL;
 	}
@@ -2384,7 +2384,7 @@
 	struct nlattr *extfilt;
 	u32 filter_mask = 0;
 
-	extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct rtgenmsg),
+	extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct ifinfomsg),
 				  IFLA_EXT_MASK);
 	if (extfilt)
 		filter_mask = nla_get_u32(extfilt);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 20e02d2..2c3d0f5 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -309,7 +309,8 @@
  * @frag_size: size of fragment, or 0 if head was kmalloced
  *
  * Allocate a new &sk_buff. Caller provides space holding head and
- * skb_shared_info. @data must have been allocated by kmalloc()
+ * skb_shared_info. @data must have been allocated by kmalloc() only if
+ * @frag_size is 0, otherwise data should come from the page allocator.
  * The return is the new skb buffer.
  * On a failure the return is %NULL, and @data is not freed.
  * Notes :
@@ -739,7 +740,7 @@
 
 	skb_copy_secmark(new, old);
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 	new->napi_id	= old->napi_id;
 #endif
 }
diff --git a/net/core/sock.c b/net/core/sock.c
index 548d716..2c097c5 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -900,7 +900,7 @@
 		sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
 		break;
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 	case SO_BUSY_POLL:
 		/* allow unprivileged users to decrease the value */
 		if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN))
@@ -1170,7 +1170,7 @@
 		v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
 		break;
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 	case SO_BUSY_POLL:
 		v.val = sk->sk_ll_usec;
 		break;
@@ -2292,7 +2292,7 @@
 
 	sk->sk_stamp = ktime_set(-1L, 0);
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 	sk->sk_napi_id		=	0;
 	sk->sk_ll_usec		=	sysctl_net_busy_read;
 #endif
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 6609686..31107ab 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -21,7 +21,9 @@
 #include <net/net_ratelimit.h>
 #include <net/busy_poll.h>
 
+static int zero = 0;
 static int one = 1;
+static int ushort_max = USHRT_MAX;
 
 #ifdef CONFIG_RPS
 static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
@@ -298,7 +300,7 @@
 		.proc_handler	= flow_limit_table_len_sysctl
 	},
 #endif /* CONFIG_NET_FLOW_LIMIT */
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 	{
 		.procname	= "busy_poll",
 		.data		= &sysctl_net_busy_poll,
@@ -339,7 +341,9 @@
 		.data		= &init_net.core.sysctl_somaxconn,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.extra1		= &zero,
+		.extra2		= &ushort_max,
+		.proc_handler	= proc_dointvec_minmax
 	},
 	{ }
 };
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 8d48c39..34ca6d5 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -772,7 +772,7 @@
 		ci = nla_data(tb[IFA_CACHEINFO]);
 		if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
 			err = -EINVAL;
-			goto errout;
+			goto errout_free;
 		}
 		*pvalid_lft = ci->ifa_valid;
 		*pprefered_lft = ci->ifa_prefered;
@@ -780,6 +780,8 @@
 
 	return ifa;
 
+errout_free:
+	inet_free_ifa(ifa);
 errout:
 	return ERR_PTR(err);
 }
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index ab3d814..109ee89 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -477,7 +477,7 @@
 	}
 
 	return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) -
-		 net_adj) & ~(align - 1)) + (net_adj - 2);
+		 net_adj) & ~(align - 1)) + net_adj - 2;
 }
 
 static void esp4_err(struct sk_buff *skb, u32 info)
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 49616fe..3df6d3e 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -71,7 +71,6 @@
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/slab.h>
-#include <linux/prefetch.h>
 #include <linux/export.h>
 #include <net/net_namespace.h>
 #include <net/ip.h>
@@ -1761,10 +1760,8 @@
 			if (!c)
 				continue;
 
-			if (IS_LEAF(c)) {
-				prefetch(rcu_dereference_rtnl(p->child[idx]));
+			if (IS_LEAF(c))
 				return (struct leaf *) c;
-			}
 
 			/* Rescan start scanning in new node */
 			p = (struct tnode *) c;
@@ -2133,7 +2130,7 @@
 		max--;
 
 	pointers = 0;
-	for (i = 1; i <= max; i++)
+	for (i = 1; i < max; i++)
 		if (stat->nodesizes[i] != 0) {
 			seq_printf(seq, "  %u: %u",  i, stat->nodesizes[i]);
 			pointers += (1<<i) * stat->nodesizes[i];
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 1f6eab6..8d6939e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -383,7 +383,7 @@
 	if (daddr)
 		memcpy(&iph->daddr, daddr, 4);
 	if (iph->daddr)
-		return t->hlen;
+		return t->hlen + sizeof(*iph);
 
 	return -(t->hlen + sizeof(*iph));
 }
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 7167b08..850525b 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -76,9 +76,7 @@
 	iph->daddr	=	dst;
 	iph->saddr	=	src;
 	iph->ttl	=	ttl;
-	tunnel_ip_select_ident(skb,
-			       (const struct iphdr *)skb_inner_network_header(skb),
-			       &rt->dst);
+	__ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);
 
 	err = ip_local_out(skb);
 	if (unlikely(net_xmit_eval(err)))
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 6577a11..463bd12 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -273,7 +273,7 @@
 	SNMP_MIB_ITEM("TCPFastOpenListenOverflow", LINUX_MIB_TCPFASTOPENLISTENOVERFLOW),
 	SNMP_MIB_ITEM("TCPFastOpenCookieReqd", LINUX_MIB_TCPFASTOPENCOOKIEREQD),
 	SNMP_MIB_ITEM("TCPSpuriousRtxHostQueues", LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES),
-	SNMP_MIB_ITEM("LowLatencyRxPackets", LINUX_MIB_LOWLATENCYRXPACKETS),
+	SNMP_MIB_ITEM("BusyPollRxPackets", LINUX_MIB_BUSYPOLLRXPACKETS),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index b2c123c..610e324 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -36,6 +36,8 @@
 static int tcp_adv_win_scale_max = 31;
 static int ip_ttl_min = 1;
 static int ip_ttl_max = 255;
+static int tcp_syn_retries_min = 1;
+static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
 static int ip_ping_group_range_min[] = { 0, 0 };
 static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
 
@@ -332,7 +334,9 @@
 		.data		= &sysctl_tcp_syn_retries,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &tcp_syn_retries_min,
+		.extra2		= &tcp_syn_retries_max
 	},
 	{
 		.procname	= "tcp_synack_retries",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5423223..b2f6c74 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1121,6 +1121,13 @@
 					goto wait_for_memory;
 
 				/*
+				 * All packets are restored as if they have
+				 * already been sent.
+				 */
+				if (tp->repair)
+					TCP_SKB_CB(skb)->when = tcp_time_stamp;
+
+				/*
 				 * Check whether we can use HW checksum.
 				 */
 				if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index a9077f4..b6ae92a 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -206,8 +206,8 @@
  */
 static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
 {
-	u64 offs;
-	u32 delta, t, bic_target, max_cnt;
+	u32 delta, bic_target, max_cnt;
+	u64 offs, t;
 
 	ca->ack_cnt++;	/* count the number of ACKs */
 
@@ -250,9 +250,11 @@
 	 * if the cwnd < 1 million packets !!!
 	 */
 
+	t = (s32)(tcp_time_stamp - ca->epoch_start);
+	t += msecs_to_jiffies(ca->delay_min >> 3);
 	/* change the unit from HZ to bictcp_HZ */
-	t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3)
-	      - ca->epoch_start) << BICTCP_HZ) / HZ;
+	t <<= BICTCP_HZ;
+	do_div(t, HZ);
 
 	if (t < ca->bic_K)		/* t - K */
 		offs = ca->bic_K - t;
@@ -414,7 +416,7 @@
 		return;
 
 	/* Discard delay samples right after fast recovery */
-	if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ)
+	if (ca->epoch_start && (s32)(tcp_time_stamp - ca->epoch_start) < HZ)
 		return;
 
 	delay = (rtt_us << 3) / USEC_PER_MSEC;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index cfdcf7b..498ea99 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -813,8 +813,9 @@
 /* On success it returns ifp with increased reference count */
 
 static struct inet6_ifaddr *
-ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
-	      int scope, u32 flags)
+ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
+	      const struct in6_addr *peer_addr, int pfxlen,
+	      int scope, u32 flags, u32 valid_lft, u32 prefered_lft)
 {
 	struct inet6_ifaddr *ifa = NULL;
 	struct rt6_info *rt;
@@ -863,6 +864,8 @@
 	}
 
 	ifa->addr = *addr;
+	if (peer_addr)
+		ifa->peer_addr = *peer_addr;
 
 	spin_lock_init(&ifa->lock);
 	spin_lock_init(&ifa->state_lock);
@@ -872,6 +875,8 @@
 	ifa->scope = scope;
 	ifa->prefix_len = pfxlen;
 	ifa->flags = flags | IFA_F_TENTATIVE;
+	ifa->valid_lft = valid_lft;
+	ifa->prefered_lft = prefered_lft;
 	ifa->cstamp = ifa->tstamp = jiffies;
 	ifa->tokenized = false;
 
@@ -1121,11 +1126,10 @@
 	if (ifp->flags & IFA_F_OPTIMISTIC)
 		addr_flags |= IFA_F_OPTIMISTIC;
 
-	ift = !max_addresses ||
-	      ipv6_count_addresses(idev) < max_addresses ?
-		ipv6_add_addr(idev, &addr, tmp_plen, ipv6_addr_scope(&addr),
-			      addr_flags) : NULL;
-	if (IS_ERR_OR_NULL(ift)) {
+	ift = ipv6_add_addr(idev, &addr, NULL, tmp_plen,
+			    ipv6_addr_scope(&addr), addr_flags,
+			    tmp_valid_lft, tmp_prefered_lft);
+	if (IS_ERR(ift)) {
 		in6_ifa_put(ifp);
 		in6_dev_put(idev);
 		pr_info("%s: retry temporary address regeneration\n", __func__);
@@ -1136,8 +1140,6 @@
 
 	spin_lock_bh(&ift->lock);
 	ift->ifpub = ifp;
-	ift->valid_lft = tmp_valid_lft;
-	ift->prefered_lft = tmp_prefered_lft;
 	ift->cstamp = now;
 	ift->tstamp = tmp_tstamp;
 	spin_unlock_bh(&ift->lock);
@@ -2179,16 +2181,19 @@
 			 */
 			if (!max_addresses ||
 			    ipv6_count_addresses(in6_dev) < max_addresses)
-				ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len,
+				ifp = ipv6_add_addr(in6_dev, &addr, NULL,
+						    pinfo->prefix_len,
 						    addr_type&IPV6_ADDR_SCOPE_MASK,
-						    addr_flags);
+						    addr_flags, valid_lft,
+						    prefered_lft);
 
 			if (IS_ERR_OR_NULL(ifp)) {
 				in6_dev_put(in6_dev);
 				return;
 			}
 
-			update_lft = create = 1;
+			update_lft = 0;
+			create = 1;
 			ifp->cstamp = jiffies;
 			ifp->tokenized = tokenized;
 			addrconf_dad_start(ifp);
@@ -2209,7 +2214,7 @@
 				stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
 			else
 				stored_lft = 0;
-			if (!update_lft && stored_lft) {
+			if (!update_lft && !create && stored_lft) {
 				if (valid_lft > MIN_VALID_LIFETIME ||
 				    valid_lft > stored_lft)
 					update_lft = 1;
@@ -2455,17 +2460,10 @@
 		prefered_lft = timeout;
 	}
 
-	ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags);
+	ifp = ipv6_add_addr(idev, pfx, peer_pfx, plen, scope, ifa_flags,
+			    valid_lft, prefered_lft);
 
 	if (!IS_ERR(ifp)) {
-		spin_lock_bh(&ifp->lock);
-		ifp->valid_lft = valid_lft;
-		ifp->prefered_lft = prefered_lft;
-		ifp->tstamp = jiffies;
-		if (peer_pfx)
-			ifp->peer_addr = *peer_pfx;
-		spin_unlock_bh(&ifp->lock);
-
 		addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
 				      expires, flags);
 		/*
@@ -2557,7 +2555,8 @@
 {
 	struct inet6_ifaddr *ifp;
 
-	ifp = ipv6_add_addr(idev, addr, plen, scope, IFA_F_PERMANENT);
+	ifp = ipv6_add_addr(idev, addr, NULL, plen,
+			    scope, IFA_F_PERMANENT, 0, 0);
 	if (!IS_ERR(ifp)) {
 		spin_lock_bh(&ifp->lock);
 		ifp->flags &= ~IFA_F_TENTATIVE;
@@ -2683,7 +2682,7 @@
 #endif
 
 
-	ifp = ipv6_add_addr(idev, addr, 64, IFA_LINK, addr_flags);
+	ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags, 0, 0);
 	if (!IS_ERR(ifp)) {
 		addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0);
 		addrconf_dad_start(ifp);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 40ffd72..aeac0dc 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -425,7 +425,7 @@
 		net_adj = 0;
 
 	return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) -
-		 net_adj) & ~(align - 1)) + (net_adj - 2);
+		 net_adj) & ~(align - 1)) + net_adj - 2;
 }
 
 static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 5fc9c7a..c4ff5bb 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -993,14 +993,22 @@
 
 			if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
 #ifdef CONFIG_IPV6_SUBTREES
-				if (fn->subtree)
-					fn = fib6_lookup_1(fn->subtree, args + 1);
+				if (fn->subtree) {
+					struct fib6_node *sfn;
+					sfn = fib6_lookup_1(fn->subtree,
+							    args + 1);
+					if (!sfn)
+						goto backtrack;
+					fn = sfn;
+				}
 #endif
-				if (!fn || fn->fn_flags & RTN_RTINFO)
+				if (fn->fn_flags & RTN_RTINFO)
 					return fn;
 			}
 		}
-
+#ifdef CONFIG_IPV6_SUBTREES
+backtrack:
+#endif
 		if (fn->fn_flags & RTN_ROOT)
 			break;
 
@@ -1632,27 +1640,28 @@
 
 static DEFINE_SPINLOCK(fib6_gc_lock);
 
-void fib6_run_gc(unsigned long expires, struct net *net)
+void fib6_run_gc(unsigned long expires, struct net *net, bool force)
 {
-	if (expires != ~0UL) {
+	unsigned long now;
+
+	if (force) {
 		spin_lock_bh(&fib6_gc_lock);
-		gc_args.timeout = expires ? (int)expires :
-			net->ipv6.sysctl.ip6_rt_gc_interval;
-	} else {
-		if (!spin_trylock_bh(&fib6_gc_lock)) {
-			mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
-			return;
-		}
-		gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval;
+	} else if (!spin_trylock_bh(&fib6_gc_lock)) {
+		mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
+		return;
 	}
+	gc_args.timeout = expires ? (int)expires :
+			  net->ipv6.sysctl.ip6_rt_gc_interval;
 
 	gc_args.more = icmp6_dst_gc();
 
 	fib6_clean_all(net, fib6_age, 0, NULL);
+	now = jiffies;
+	net->ipv6.ip6_rt_last_gc = now;
 
 	if (gc_args.more)
 		mod_timer(&net->ipv6.ip6_fib_timer,
-			  round_jiffies(jiffies
+			  round_jiffies(now
 					+ net->ipv6.sysctl.ip6_rt_gc_interval));
 	else
 		del_timer(&net->ipv6.ip6_fib_timer);
@@ -1661,7 +1670,7 @@
 
 static void fib6_gc_timer_cb(unsigned long arg)
 {
-	fib6_run_gc(0, (struct net *)arg);
+	fib6_run_gc(0, (struct net *)arg, true);
 }
 
 static int __net_init fib6_net_init(struct net *net)
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 583e8d4..03986d3 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -259,10 +259,12 @@
 {
 	struct mr6_table *mrt, *next;
 
+	rtnl_lock();
 	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
 		list_del(&mrt->list);
 		ip6mr_free_table(mrt);
 	}
+	rtnl_unlock();
 	fib_rules_unregister(net->ipv6.mr6_rules_ops);
 }
 #else
@@ -289,7 +291,10 @@
 
 static void __net_exit ip6mr_rules_exit(struct net *net)
 {
+	rtnl_lock();
 	ip6mr_free_table(net->ipv6.mrt6);
+	net->ipv6.mrt6 = NULL;
+	rtnl_unlock();
 }
 #endif
 
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 24c03396..04d31c2 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1369,8 +1369,10 @@
 	if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts))
 		return;
 
-	if (!ndopts.nd_opts_rh)
+	if (!ndopts.nd_opts_rh) {
+		ip6_redirect_no_header(skb, dev_net(skb->dev), 0, 0);
 		return;
+	}
 
 	hdr = (u8 *)ndopts.nd_opts_rh;
 	hdr += 8;
@@ -1576,7 +1578,7 @@
 	switch (event) {
 	case NETDEV_CHANGEADDR:
 		neigh_changeaddr(&nd_tbl, dev);
-		fib6_run_gc(~0UL, net);
+		fib6_run_gc(0, net, false);
 		idev = in6_dev_get(dev);
 		if (!idev)
 			break;
@@ -1586,7 +1588,7 @@
 		break;
 	case NETDEV_DOWN:
 		neigh_ifdown(&nd_tbl, dev);
-		fib6_run_gc(~0UL, net);
+		fib6_run_gc(0, net, false);
 		break;
 	case NETDEV_NOTIFY_PEERS:
 		ndisc_send_unsol_na(dev);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 790d9f4..1aeb473 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -490,6 +490,7 @@
 	ipv6_hdr(head)->payload_len = htons(payload_len);
 	ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
 	IP6CB(head)->nhoff = nhoff;
+	IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
 
 	/* Yes, and fold redundant checksum back. 8) */
 	if (head->ip_summed == CHECKSUM_COMPLETE)
@@ -524,6 +525,9 @@
 	struct net *net = dev_net(skb_dst(skb)->dev);
 	int evicted;
 
+	if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
+		goto fail_hdr;
+
 	IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
 
 	/* Jumbo payload inhibits frag. header */
@@ -544,6 +548,7 @@
 				 ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS);
 
 		IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
+		IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
 		return 1;
 	}
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a8c891a..8d9a93e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1178,6 +1178,27 @@
 }
 EXPORT_SYMBOL_GPL(ip6_redirect);
 
+void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
+			    u32 mark)
+{
+	const struct ipv6hdr *iph = ipv6_hdr(skb);
+	const struct rd_msg *msg = (struct rd_msg *)icmp6_hdr(skb);
+	struct dst_entry *dst;
+	struct flowi6 fl6;
+
+	memset(&fl6, 0, sizeof(fl6));
+	fl6.flowi6_oif = oif;
+	fl6.flowi6_mark = mark;
+	fl6.flowi6_flags = 0;
+	fl6.daddr = msg->dest;
+	fl6.saddr = iph->daddr;
+
+	dst = ip6_route_output(net, NULL, &fl6);
+	if (!dst->error)
+		rt6_do_redirect(dst, NULL, skb);
+	dst_release(dst);
+}
+
 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
 {
 	ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark);
@@ -1311,7 +1332,6 @@
 
 static int ip6_dst_gc(struct dst_ops *ops)
 {
-	unsigned long now = jiffies;
 	struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
 	int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
 	int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
@@ -1321,13 +1341,12 @@
 	int entries;
 
 	entries = dst_entries_get_fast(ops);
-	if (time_after(rt_last_gc + rt_min_interval, now) &&
+	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
 	    entries <= rt_max_size)
 		goto out;
 
 	net->ipv6.ip6_rt_gc_expire++;
-	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
-	net->ipv6.ip6_rt_last_gc = now;
+	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size);
 	entries = dst_entries_get_slow(ops);
 	if (entries < ops->gc_thresh)
 		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
@@ -2827,7 +2846,7 @@
 	net = (struct net *)ctl->extra1;
 	delay = net->ipv6.sysctl.flush_delay;
 	proc_dointvec(ctl, write, buffer, lenp, ppos);
-	fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
+	fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
 	return 0;
 }
 
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 9da8620..ab8bd2c 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2081,6 +2081,7 @@
 			pol->sadb_x_policy_type = IPSEC_POLICY_NONE;
 	}
 	pol->sadb_x_policy_dir = dir+1;
+	pol->sadb_x_policy_reserved = 0;
 	pol->sadb_x_policy_id = xp->index;
 	pol->sadb_x_policy_priority = xp->priority;
 
@@ -3137,7 +3138,9 @@
 	pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
 	pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC;
 	pol->sadb_x_policy_dir = XFRM_POLICY_OUT + 1;
+	pol->sadb_x_policy_reserved = 0;
 	pol->sadb_x_policy_id = xp->index;
+	pol->sadb_x_policy_priority = xp->priority;
 
 	/* Set sadb_comb's. */
 	if (x->id.proto == IPPROTO_AH)
@@ -3525,6 +3528,7 @@
 	pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
 	pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC;
 	pol->sadb_x_policy_dir = dir + 1;
+	pol->sadb_x_policy_reserved = 0;
 	pol->sadb_x_policy_id = 0;
 	pol->sadb_x_policy_priority = 0;
 
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 8184d12..43dd752 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -666,6 +666,8 @@
 			if (sta->sdata->dev != dev)
 				continue;
 
+			sinfo.filled = 0;
+			sta_set_sinfo(sta, &sinfo);
 			i = 0;
 			ADD_STA_STATS(sta);
 		}
diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c
index 3b7bfc0..22290a9 100644
--- a/net/mac80211/mesh_ps.c
+++ b/net/mac80211/mesh_ps.c
@@ -229,6 +229,10 @@
 	enum nl80211_mesh_power_mode pm;
 	bool do_buffer;
 
+	/* For non-assoc STA, prevent buffering or frame transmission */
+	if (sta->sta_state < IEEE80211_STA_ASSOC)
+		return;
+
 	/*
 	 * use peer-specific power mode if peering is established and the
 	 * peer's power mode is known
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index ae31968..cc9e02d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -31,10 +31,12 @@
 #include "led.h"
 
 #define IEEE80211_AUTH_TIMEOUT		(HZ / 5)
+#define IEEE80211_AUTH_TIMEOUT_LONG	(HZ / 2)
 #define IEEE80211_AUTH_TIMEOUT_SHORT	(HZ / 10)
 #define IEEE80211_AUTH_MAX_TRIES	3
 #define IEEE80211_AUTH_WAIT_ASSOC	(HZ * 5)
 #define IEEE80211_ASSOC_TIMEOUT		(HZ / 5)
+#define IEEE80211_ASSOC_TIMEOUT_LONG	(HZ / 2)
 #define IEEE80211_ASSOC_TIMEOUT_SHORT	(HZ / 10)
 #define IEEE80211_ASSOC_MAX_TRIES	3
 
@@ -209,8 +211,9 @@
 			     struct ieee80211_channel *channel,
 			     const struct ieee80211_ht_operation *ht_oper,
 			     const struct ieee80211_vht_operation *vht_oper,
-			     struct cfg80211_chan_def *chandef, bool verbose)
+			     struct cfg80211_chan_def *chandef, bool tracking)
 {
+	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct cfg80211_chan_def vht_chandef;
 	u32 ht_cfreq, ret;
 
@@ -229,7 +232,7 @@
 	ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan,
 						  channel->band);
 	/* check that channel matches the right operating channel */
-	if (channel->center_freq != ht_cfreq) {
+	if (!tracking && channel->center_freq != ht_cfreq) {
 		/*
 		 * It's possible that some APs are confused here;
 		 * Netgear WNDR3700 sometimes reports 4 higher than
@@ -237,11 +240,10 @@
 		 * since we look at probe response/beacon data here
 		 * it should be OK.
 		 */
-		if (verbose)
-			sdata_info(sdata,
-				   "Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n",
-				   channel->center_freq, ht_cfreq,
-				   ht_oper->primary_chan, channel->band);
+		sdata_info(sdata,
+			   "Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n",
+			   channel->center_freq, ht_cfreq,
+			   ht_oper->primary_chan, channel->band);
 		ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
 		goto out;
 	}
@@ -295,7 +297,7 @@
 				channel->band);
 		break;
 	default:
-		if (verbose)
+		if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
 			sdata_info(sdata,
 				   "AP VHT operation IE has invalid channel width (%d), disable VHT\n",
 				   vht_oper->chan_width);
@@ -304,7 +306,7 @@
 	}
 
 	if (!cfg80211_chandef_valid(&vht_chandef)) {
-		if (verbose)
+		if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
 			sdata_info(sdata,
 				   "AP VHT information is invalid, disable VHT\n");
 		ret = IEEE80211_STA_DISABLE_VHT;
@@ -317,7 +319,7 @@
 	}
 
 	if (!cfg80211_chandef_compatible(chandef, &vht_chandef)) {
-		if (verbose)
+		if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
 			sdata_info(sdata,
 				   "AP VHT information doesn't match HT, disable VHT\n");
 		ret = IEEE80211_STA_DISABLE_VHT;
@@ -333,18 +335,27 @@
 	if (ret & IEEE80211_STA_DISABLE_VHT)
 		vht_chandef = *chandef;
 
+	/*
+	 * Ignore the DISABLED flag when we're already connected and only
+	 * tracking the APs beacon for bandwidth changes - otherwise we
+	 * might get disconnected here if we connect to an AP, update our
+	 * regulatory information based on the AP's country IE and the
+	 * information we have is wrong/outdated and disables the channel
+	 * that we're actually using for the connection to the AP.
+	 */
 	while (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef,
-					IEEE80211_CHAN_DISABLED)) {
+					tracking ? 0 :
+						   IEEE80211_CHAN_DISABLED)) {
 		if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) {
 			ret = IEEE80211_STA_DISABLE_HT |
 			      IEEE80211_STA_DISABLE_VHT;
-			goto out;
+			break;
 		}
 
 		ret |= chandef_downgrade(chandef);
 	}
 
-	if (chandef->width != vht_chandef.width && verbose)
+	if (chandef->width != vht_chandef.width && !tracking)
 		sdata_info(sdata,
 			   "capabilities/regulatory prevented using AP HT/VHT configuration, downgraded\n");
 
@@ -384,7 +395,7 @@
 
 	/* calculate new channel (type) based on HT/VHT operation IEs */
 	flags = ieee80211_determine_chantype(sdata, sband, chan, ht_oper,
-					     vht_oper, &chandef, false);
+					     vht_oper, &chandef, true);
 
 	/*
 	 * Downgrade the new channel if we associated with restricted
@@ -3394,10 +3405,13 @@
 
 	if (tx_flags == 0) {
 		auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT;
-		ifmgd->auth_data->timeout_started = true;
+		auth_data->timeout_started = true;
 		run_again(sdata, auth_data->timeout);
 	} else {
-		auth_data->timeout_started = false;
+		auth_data->timeout =
+			round_jiffies_up(jiffies + IEEE80211_AUTH_TIMEOUT_LONG);
+		auth_data->timeout_started = true;
+		run_again(sdata, auth_data->timeout);
 	}
 
 	return 0;
@@ -3434,7 +3448,11 @@
 		assoc_data->timeout_started = true;
 		run_again(sdata, assoc_data->timeout);
 	} else {
-		assoc_data->timeout_started = false;
+		assoc_data->timeout =
+			round_jiffies_up(jiffies +
+					 IEEE80211_ASSOC_TIMEOUT_LONG);
+		assoc_data->timeout_started = true;
+		run_again(sdata, assoc_data->timeout);
 	}
 
 	return 0;
@@ -3829,7 +3847,7 @@
 	ifmgd->flags |= ieee80211_determine_chantype(sdata, sband,
 						     cbss->channel,
 						     ht_oper, vht_oper,
-						     &chandef, true);
+						     &chandef, false);
 
 	sdata->needed_rx_chains = min(ieee80211_ht_vht_rx_chains(sdata, cbss),
 				      local->rx_chains);
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 7fc5d0d..3401262 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -99,10 +99,13 @@
 	}
 	mutex_unlock(&local->sta_mtx);
 
-	/* remove all interfaces */
+	/* remove all interfaces that were created in the driver */
 	list_for_each_entry(sdata, &local->interfaces, list) {
-		if (!ieee80211_sdata_running(sdata))
+		if (!ieee80211_sdata_running(sdata) ||
+		    sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
+		    sdata->vif.type == NL80211_IFTYPE_MONITOR)
 			continue;
+
 		drv_remove_interface(local, sdata);
 	}
 
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index ac7ef54..e6512e2 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -290,7 +290,7 @@
 	struct minstrel_rate *msr, *mr;
 	unsigned int ndx;
 	bool mrr_capable;
-	bool prev_sample = mi->prev_sample;
+	bool prev_sample;
 	int delta;
 	int sampling_ratio;
 
@@ -314,6 +314,7 @@
 			(mi->sample_count + mi->sample_deferred / 2);
 
 	/* delta < 0: no sampling required */
+	prev_sample = mi->prev_sample;
 	mi->prev_sample = false;
 	if (delta < 0 || (!mrr_capable && prev_sample))
 		return;
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 5b2d301..f5aed96 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -804,10 +804,18 @@
 
 	sample_group = &minstrel_mcs_groups[sample_idx / MCS_GROUP_RATES];
 	info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
+	rate->count = 1;
+
+	if (sample_idx / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) {
+		int idx = sample_idx % ARRAY_SIZE(mp->cck_rates);
+		rate->idx = mp->cck_rates[idx];
+		rate->flags = 0;
+		return;
+	}
+
 	rate->idx = sample_idx % MCS_GROUP_RATES +
 		    (sample_group->streams - 1) * MCS_GROUP_RATES;
 	rate->flags = IEEE80211_TX_RC_MCS | sample_group->flags;
-	rate->count = 1;
 }
 
 static void
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 23dbcfc..2c5a79b 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -936,8 +936,14 @@
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
 
-	/* Drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.2.9) */
-	if (rx->sta && !is_multicast_ether_addr(hdr->addr1)) {
+	/*
+	 * Drop duplicate 802.11 retransmissions
+	 * (IEEE 802.11-2012: 9.3.2.10 "Duplicate detection and recovery")
+	 */
+	if (rx->skb->len >= 24 && rx->sta &&
+	    !ieee80211_is_ctl(hdr->frame_control) &&
+	    !ieee80211_is_qos_nullfunc(hdr->frame_control) &&
+	    !is_multicast_ether_addr(hdr->addr1)) {
 		if (unlikely(ieee80211_has_retry(hdr->frame_control) &&
 			     rx->sta->last_seq_ctrl[rx->seqno_idx] ==
 			     hdr->seq_ctrl)) {
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index c63b618..4fd1ca9 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -293,6 +293,11 @@
 		       sizeof(exp->tuple.dst.u3) - len);
 
 	exp->tuple.dst.u.all = *dst;
+
+#ifdef CONFIG_NF_NAT_NEEDED
+	memset(&exp->saved_addr, 0, sizeof(exp->saved_addr));
+	memset(&exp->saved_proto, 0, sizeof(exp->saved_proto));
+#endif
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_init);
 
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 7dcc376..2f80107 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -526,7 +526,7 @@
 	const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
 	__u32 seq, ack, sack, end, win, swin;
 	s16 receiver_offset;
-	bool res;
+	bool res, in_recv_win;
 
 	/*
 	 * Get the required data from the packet.
@@ -649,14 +649,18 @@
 		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
 		 receiver->td_scale);
 
+	/* Is the ending sequence in the receive window (if available)? */
+	in_recv_win = !receiver->td_maxwin ||
+		      after(end, sender->td_end - receiver->td_maxwin - 1);
+
 	pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
 		 before(seq, sender->td_maxend + 1),
-		 after(end, sender->td_end - receiver->td_maxwin - 1),
+		 (in_recv_win ? 1 : 0),
 		 before(sack, receiver->td_end + 1),
 		 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
 
 	if (before(seq, sender->td_maxend + 1) &&
-	    after(end, sender->td_end - receiver->td_maxwin - 1) &&
+	    in_recv_win &&
 	    before(sack, receiver->td_end + 1) &&
 	    after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
 		/*
@@ -725,7 +729,7 @@
 			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
 			"nf_ct_tcp: %s ",
 			before(seq, sender->td_maxend + 1) ?
-			after(end, sender->td_end - receiver->td_maxwin - 1) ?
+			in_recv_win ?
 			before(sack, receiver->td_end + 1) ?
 			after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
 			: "ACK is under the lower bound (possible overly delayed ACK)"
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 962e979..d92cc31 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -419,6 +419,7 @@
 	nfmsg->version = NFNETLINK_V0;
 	nfmsg->res_id = htons(inst->group_num);
 
+	memset(&pmsg, 0, sizeof(pmsg));
 	pmsg.hw_protocol	= skb->protocol;
 	pmsg.hook		= hooknum;
 
@@ -498,7 +499,10 @@
 	if (indev && skb->dev &&
 	    skb->mac_header != skb->network_header) {
 		struct nfulnl_msg_packet_hw phw;
-		int len = dev_parse_header(skb, phw.hw_addr);
+		int len;
+
+		memset(&phw, 0, sizeof(phw));
+		len = dev_parse_header(skb, phw.hw_addr);
 		if (len > 0) {
 			phw.hw_addrlen = htons(len);
 			if (nla_put(inst->skb, NFULA_HWADDR, sizeof(phw), &phw))
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 971ea14..8a703c3 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -463,7 +463,10 @@
 	if (indev && entskb->dev &&
 	    entskb->mac_header != entskb->network_header) {
 		struct nfqnl_msg_packet_hw phw;
-		int len = dev_parse_header(entskb, phw.hw_addr);
+		int len;
+
+		memset(&phw, 0, sizeof(phw));
+		len = dev_parse_header(entskb, phw.hw_addr);
 		if (len) {
 			phw.hw_addrlen = htons(len);
 			if (nla_put(skb, NFQA_HWADDR, sizeof(phw), &phw))
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 7011c71..6113cc7 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -52,7 +52,8 @@
 {
 	const struct xt_tcpmss_info *info = par->targinfo;
 	struct tcphdr *tcph;
-	unsigned int tcplen, i;
+	int len, tcp_hdrlen;
+	unsigned int i;
 	__be16 oldval;
 	u16 newmss;
 	u8 *opt;
@@ -64,11 +65,14 @@
 	if (!skb_make_writable(skb, skb->len))
 		return -1;
 
-	tcplen = skb->len - tcphoff;
-	tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
+	len = skb->len - tcphoff;
+	if (len < (int)sizeof(struct tcphdr))
+		return -1;
 
-	/* Header cannot be larger than the packet */
-	if (tcplen < tcph->doff*4)
+	tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
+	tcp_hdrlen = tcph->doff * 4;
+
+	if (len < tcp_hdrlen)
 		return -1;
 
 	if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
@@ -87,9 +91,8 @@
 		newmss = info->mss;
 
 	opt = (u_int8_t *)tcph;
-	for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)) {
-		if (opt[i] == TCPOPT_MSS && tcph->doff*4 - i >= TCPOLEN_MSS &&
-		    opt[i+1] == TCPOLEN_MSS) {
+	for (i = sizeof(struct tcphdr); i <= tcp_hdrlen - TCPOLEN_MSS; i += optlen(opt, i)) {
+		if (opt[i] == TCPOPT_MSS && opt[i+1] == TCPOLEN_MSS) {
 			u_int16_t oldmss;
 
 			oldmss = (opt[i+2] << 8) | opt[i+3];
@@ -112,9 +115,10 @@
 	}
 
 	/* There is data after the header so the option can't be added
-	   without moving it, and doing so may make the SYN packet
-	   itself too large. Accept the packet unmodified instead. */
-	if (tcplen > tcph->doff*4)
+	 * without moving it, and doing so may make the SYN packet
+	 * itself too large. Accept the packet unmodified instead.
+	 */
+	if (len > tcp_hdrlen)
 		return 0;
 
 	/*
@@ -143,10 +147,10 @@
 		newmss = min(newmss, (u16)1220);
 
 	opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
-	memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
+	memmove(opt + TCPOLEN_MSS, opt, len - sizeof(struct tcphdr));
 
 	inet_proto_csum_replace2(&tcph->check, skb,
-				 htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1);
+				 htons(len), htons(len + TCPOLEN_MSS), 1);
 	opt[0] = TCPOPT_MSS;
 	opt[1] = TCPOLEN_MSS;
 	opt[2] = (newmss & 0xff00) >> 8;
diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
index b68fa19..625fa1d 100644
--- a/net/netfilter/xt_TCPOPTSTRIP.c
+++ b/net/netfilter/xt_TCPOPTSTRIP.c
@@ -38,7 +38,7 @@
 	struct tcphdr *tcph;
 	u_int16_t n, o;
 	u_int8_t *opt;
-	int len;
+	int len, tcp_hdrlen;
 
 	/* This is a fragment, no TCP header is available */
 	if (par->fragoff != 0)
@@ -52,7 +52,9 @@
 		return NF_DROP;
 
 	tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
-	if (tcph->doff * 4 > len)
+	tcp_hdrlen = tcph->doff * 4;
+
+	if (len < tcp_hdrlen)
 		return NF_DROP;
 
 	opt  = (u_int8_t *)tcph;
@@ -61,10 +63,10 @@
 	 * Walk through all TCP options - if we find some option to remove,
 	 * set all octets to %TCPOPT_NOP and adjust checksum.
 	 */
-	for (i = sizeof(struct tcphdr); i < tcp_hdrlen(skb); i += optl) {
+	for (i = sizeof(struct tcphdr); i < tcp_hdrlen - 1; i += optl) {
 		optl = optlen(opt, i);
 
-		if (i + optl > tcp_hdrlen(skb))
+		if (i + optl > tcp_hdrlen)
 			break;
 
 		if (!tcpoptstrip_test_bit(info->strip_bmap, opt[i]))
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index f8b7191..20b1591 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -172,7 +172,7 @@
 
 		/* Ignore non-transparent sockets,
 		   if XT_SOCKET_TRANSPARENT is used */
-		if (info && info->flags & XT_SOCKET_TRANSPARENT)
+		if (info->flags & XT_SOCKET_TRANSPARENT)
 			transparent = ((sk->sk_state != TCP_TIME_WAIT &&
 					inet_sk(sk)->transparent) ||
 				       (sk->sk_state == TCP_TIME_WAIT &&
@@ -196,7 +196,11 @@
 static bool
 socket_mt4_v0(const struct sk_buff *skb, struct xt_action_param *par)
 {
-	return socket_match(skb, par, NULL);
+	static struct xt_socket_mtinfo1 xt_info_v0 = {
+		.flags = 0,
+	};
+
+	return socket_match(skb, par, &xt_info_v0);
 }
 
 static bool
@@ -314,7 +318,7 @@
 
 		/* Ignore non-transparent sockets,
 		   if XT_SOCKET_TRANSPARENT is used */
-		if (info && info->flags & XT_SOCKET_TRANSPARENT)
+		if (info->flags & XT_SOCKET_TRANSPARENT)
 			transparent = ((sk->sk_state != TCP_TIME_WAIT &&
 					inet_sk(sk)->transparent) ||
 				       (sk->sk_state == TCP_TIME_WAIT &&
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index c15042f..a110064 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -691,8 +691,8 @@
 {
 	struct netlbl_domhsh_walk_arg *cb_arg = arg;
 
-	if (entry->type == NETLBL_NLTYPE_CIPSOV4 &&
-	    entry->type_def.cipsov4->doi == cb_arg->doi)
+	if (entry->def.type == NETLBL_NLTYPE_CIPSOV4 &&
+	    entry->def.cipso->doi == cb_arg->doi)
 		return netlbl_domhsh_remove_entry(entry, cb_arg->audit_info);
 
 	return 0;
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index 6bb1d42..85d842e 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -84,15 +84,15 @@
 #endif /* IPv6 */
 
 	ptr = container_of(entry, struct netlbl_dom_map, rcu);
-	if (ptr->type == NETLBL_NLTYPE_ADDRSELECT) {
+	if (ptr->def.type == NETLBL_NLTYPE_ADDRSELECT) {
 		netlbl_af4list_foreach_safe(iter4, tmp4,
-					    &ptr->type_def.addrsel->list4) {
+					    &ptr->def.addrsel->list4) {
 			netlbl_af4list_remove_entry(iter4);
 			kfree(netlbl_domhsh_addr4_entry(iter4));
 		}
 #if IS_ENABLED(CONFIG_IPV6)
 		netlbl_af6list_foreach_safe(iter6, tmp6,
-					    &ptr->type_def.addrsel->list6) {
+					    &ptr->def.addrsel->list6) {
 			netlbl_af6list_remove_entry(iter6);
 			kfree(netlbl_domhsh_addr6_entry(iter6));
 		}
@@ -213,21 +213,21 @@
 		if (addr4 != NULL) {
 			struct netlbl_domaddr4_map *map4;
 			map4 = netlbl_domhsh_addr4_entry(addr4);
-			type = map4->type;
-			cipsov4 = map4->type_def.cipsov4;
+			type = map4->def.type;
+			cipsov4 = map4->def.cipso;
 			netlbl_af4list_audit_addr(audit_buf, 0, NULL,
 						  addr4->addr, addr4->mask);
 #if IS_ENABLED(CONFIG_IPV6)
 		} else if (addr6 != NULL) {
 			struct netlbl_domaddr6_map *map6;
 			map6 = netlbl_domhsh_addr6_entry(addr6);
-			type = map6->type;
+			type = map6->def.type;
 			netlbl_af6list_audit_addr(audit_buf, 0, NULL,
 						  &addr6->addr, &addr6->mask);
 #endif /* IPv6 */
 		} else {
-			type = entry->type;
-			cipsov4 = entry->type_def.cipsov4;
+			type = entry->def.type;
+			cipsov4 = entry->def.cipso;
 		}
 		switch (type) {
 		case NETLBL_NLTYPE_UNLABELED:
@@ -265,26 +265,25 @@
 	if (entry == NULL)
 		return -EINVAL;
 
-	switch (entry->type) {
+	switch (entry->def.type) {
 	case NETLBL_NLTYPE_UNLABELED:
-		if (entry->type_def.cipsov4 != NULL ||
-		    entry->type_def.addrsel != NULL)
+		if (entry->def.cipso != NULL || entry->def.addrsel != NULL)
 			return -EINVAL;
 		break;
 	case NETLBL_NLTYPE_CIPSOV4:
-		if (entry->type_def.cipsov4 == NULL)
+		if (entry->def.cipso == NULL)
 			return -EINVAL;
 		break;
 	case NETLBL_NLTYPE_ADDRSELECT:
-		netlbl_af4list_foreach(iter4, &entry->type_def.addrsel->list4) {
+		netlbl_af4list_foreach(iter4, &entry->def.addrsel->list4) {
 			map4 = netlbl_domhsh_addr4_entry(iter4);
-			switch (map4->type) {
+			switch (map4->def.type) {
 			case NETLBL_NLTYPE_UNLABELED:
-				if (map4->type_def.cipsov4 != NULL)
+				if (map4->def.cipso != NULL)
 					return -EINVAL;
 				break;
 			case NETLBL_NLTYPE_CIPSOV4:
-				if (map4->type_def.cipsov4 == NULL)
+				if (map4->def.cipso == NULL)
 					return -EINVAL;
 				break;
 			default:
@@ -292,9 +291,9 @@
 			}
 		}
 #if IS_ENABLED(CONFIG_IPV6)
-		netlbl_af6list_foreach(iter6, &entry->type_def.addrsel->list6) {
+		netlbl_af6list_foreach(iter6, &entry->def.addrsel->list6) {
 			map6 = netlbl_domhsh_addr6_entry(iter6);
-			switch (map6->type) {
+			switch (map6->def.type) {
 			case NETLBL_NLTYPE_UNLABELED:
 				break;
 			default:
@@ -402,32 +401,31 @@
 			rcu_assign_pointer(netlbl_domhsh_def, entry);
 		}
 
-		if (entry->type == NETLBL_NLTYPE_ADDRSELECT) {
+		if (entry->def.type == NETLBL_NLTYPE_ADDRSELECT) {
 			netlbl_af4list_foreach_rcu(iter4,
-					       &entry->type_def.addrsel->list4)
+						   &entry->def.addrsel->list4)
 				netlbl_domhsh_audit_add(entry, iter4, NULL,
 							ret_val, audit_info);
 #if IS_ENABLED(CONFIG_IPV6)
 			netlbl_af6list_foreach_rcu(iter6,
-					       &entry->type_def.addrsel->list6)
+						   &entry->def.addrsel->list6)
 				netlbl_domhsh_audit_add(entry, NULL, iter6,
 							ret_val, audit_info);
 #endif /* IPv6 */
 		} else
 			netlbl_domhsh_audit_add(entry, NULL, NULL,
 						ret_val, audit_info);
-	} else if (entry_old->type == NETLBL_NLTYPE_ADDRSELECT &&
-		   entry->type == NETLBL_NLTYPE_ADDRSELECT) {
+	} else if (entry_old->def.type == NETLBL_NLTYPE_ADDRSELECT &&
+		   entry->def.type == NETLBL_NLTYPE_ADDRSELECT) {
 		struct list_head *old_list4;
 		struct list_head *old_list6;
 
-		old_list4 = &entry_old->type_def.addrsel->list4;
-		old_list6 = &entry_old->type_def.addrsel->list6;
+		old_list4 = &entry_old->def.addrsel->list4;
+		old_list6 = &entry_old->def.addrsel->list6;
 
 		/* we only allow the addition of address selectors if all of
 		 * the selectors do not exist in the existing domain map */
-		netlbl_af4list_foreach_rcu(iter4,
-					   &entry->type_def.addrsel->list4)
+		netlbl_af4list_foreach_rcu(iter4, &entry->def.addrsel->list4)
 			if (netlbl_af4list_search_exact(iter4->addr,
 							iter4->mask,
 							old_list4)) {
@@ -435,8 +433,7 @@
 				goto add_return;
 			}
 #if IS_ENABLED(CONFIG_IPV6)
-		netlbl_af6list_foreach_rcu(iter6,
-					   &entry->type_def.addrsel->list6)
+		netlbl_af6list_foreach_rcu(iter6, &entry->def.addrsel->list6)
 			if (netlbl_af6list_search_exact(&iter6->addr,
 							&iter6->mask,
 							old_list6)) {
@@ -446,7 +443,7 @@
 #endif /* IPv6 */
 
 		netlbl_af4list_foreach_safe(iter4, tmp4,
-					    &entry->type_def.addrsel->list4) {
+					    &entry->def.addrsel->list4) {
 			netlbl_af4list_remove_entry(iter4);
 			iter4->valid = 1;
 			ret_val = netlbl_af4list_add(iter4, old_list4);
@@ -457,7 +454,7 @@
 		}
 #if IS_ENABLED(CONFIG_IPV6)
 		netlbl_af6list_foreach_safe(iter6, tmp6,
-					    &entry->type_def.addrsel->list6) {
+					    &entry->def.addrsel->list6) {
 			netlbl_af6list_remove_entry(iter6);
 			iter6->valid = 1;
 			ret_val = netlbl_af6list_add(iter6, old_list6);
@@ -538,18 +535,18 @@
 		struct netlbl_af4list *iter4;
 		struct netlbl_domaddr4_map *map4;
 
-		switch (entry->type) {
+		switch (entry->def.type) {
 		case NETLBL_NLTYPE_ADDRSELECT:
 			netlbl_af4list_foreach_rcu(iter4,
-					     &entry->type_def.addrsel->list4) {
+					     &entry->def.addrsel->list4) {
 				map4 = netlbl_domhsh_addr4_entry(iter4);
-				cipso_v4_doi_putdef(map4->type_def.cipsov4);
+				cipso_v4_doi_putdef(map4->def.cipso);
 			}
 			/* no need to check the IPv6 list since we currently
 			 * support only unlabeled protocols for IPv6 */
 			break;
 		case NETLBL_NLTYPE_CIPSOV4:
-			cipso_v4_doi_putdef(entry->type_def.cipsov4);
+			cipso_v4_doi_putdef(entry->def.cipso);
 			break;
 		}
 		call_rcu(&entry->rcu, netlbl_domhsh_free_entry);
@@ -590,20 +587,21 @@
 		entry_map = netlbl_domhsh_search(domain);
 	else
 		entry_map = netlbl_domhsh_search_def(domain);
-	if (entry_map == NULL || entry_map->type != NETLBL_NLTYPE_ADDRSELECT)
+	if (entry_map == NULL ||
+	    entry_map->def.type != NETLBL_NLTYPE_ADDRSELECT)
 		goto remove_af4_failure;
 
 	spin_lock(&netlbl_domhsh_lock);
 	entry_addr = netlbl_af4list_remove(addr->s_addr, mask->s_addr,
-					   &entry_map->type_def.addrsel->list4);
+					   &entry_map->def.addrsel->list4);
 	spin_unlock(&netlbl_domhsh_lock);
 
 	if (entry_addr == NULL)
 		goto remove_af4_failure;
-	netlbl_af4list_foreach_rcu(iter4, &entry_map->type_def.addrsel->list4)
+	netlbl_af4list_foreach_rcu(iter4, &entry_map->def.addrsel->list4)
 		goto remove_af4_single_addr;
 #if IS_ENABLED(CONFIG_IPV6)
-	netlbl_af6list_foreach_rcu(iter6, &entry_map->type_def.addrsel->list6)
+	netlbl_af6list_foreach_rcu(iter6, &entry_map->def.addrsel->list6)
 		goto remove_af4_single_addr;
 #endif /* IPv6 */
 	/* the domain mapping is empty so remove it from the mapping table */
@@ -616,7 +614,7 @@
 	 * shouldn't be a problem */
 	synchronize_rcu();
 	entry = netlbl_domhsh_addr4_entry(entry_addr);
-	cipso_v4_doi_putdef(entry->type_def.cipsov4);
+	cipso_v4_doi_putdef(entry->def.cipso);
 	kfree(entry);
 	return 0;
 
@@ -693,8 +691,8 @@
  * responsible for ensuring that rcu_read_[un]lock() is called.
  *
  */
-struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain,
-						       __be32 addr)
+struct netlbl_dommap_def *netlbl_domhsh_getentry_af4(const char *domain,
+						     __be32 addr)
 {
 	struct netlbl_dom_map *dom_iter;
 	struct netlbl_af4list *addr_iter;
@@ -702,15 +700,13 @@
 	dom_iter = netlbl_domhsh_search_def(domain);
 	if (dom_iter == NULL)
 		return NULL;
-	if (dom_iter->type != NETLBL_NLTYPE_ADDRSELECT)
-		return NULL;
 
-	addr_iter = netlbl_af4list_search(addr,
-					  &dom_iter->type_def.addrsel->list4);
+	if (dom_iter->def.type != NETLBL_NLTYPE_ADDRSELECT)
+		return &dom_iter->def;
+	addr_iter = netlbl_af4list_search(addr, &dom_iter->def.addrsel->list4);
 	if (addr_iter == NULL)
 		return NULL;
-
-	return netlbl_domhsh_addr4_entry(addr_iter);
+	return &(netlbl_domhsh_addr4_entry(addr_iter)->def);
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
@@ -725,7 +721,7 @@
  * responsible for ensuring that rcu_read_[un]lock() is called.
  *
  */
-struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain,
+struct netlbl_dommap_def *netlbl_domhsh_getentry_af6(const char *domain,
 						   const struct in6_addr *addr)
 {
 	struct netlbl_dom_map *dom_iter;
@@ -734,15 +730,13 @@
 	dom_iter = netlbl_domhsh_search_def(domain);
 	if (dom_iter == NULL)
 		return NULL;
-	if (dom_iter->type != NETLBL_NLTYPE_ADDRSELECT)
-		return NULL;
 
-	addr_iter = netlbl_af6list_search(addr,
-					  &dom_iter->type_def.addrsel->list6);
+	if (dom_iter->def.type != NETLBL_NLTYPE_ADDRSELECT)
+		return &dom_iter->def;
+	addr_iter = netlbl_af6list_search(addr, &dom_iter->def.addrsel->list6);
 	if (addr_iter == NULL)
 		return NULL;
-
-	return netlbl_domhsh_addr6_entry(addr_iter);
+	return &(netlbl_domhsh_addr6_entry(addr_iter)->def);
 }
 #endif /* IPv6 */
 
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h
index 90872c4..b9be0ee 100644
--- a/net/netlabel/netlabel_domainhash.h
+++ b/net/netlabel/netlabel_domainhash.h
@@ -43,37 +43,35 @@
 #define NETLBL_DOMHSH_BITSIZE       7
 
 /* Domain mapping definition structures */
+struct netlbl_domaddr_map {
+	struct list_head list4;
+	struct list_head list6;
+};
+struct netlbl_dommap_def {
+	u32 type;
+	union {
+		struct netlbl_domaddr_map *addrsel;
+		struct cipso_v4_doi *cipso;
+	};
+};
 #define netlbl_domhsh_addr4_entry(iter) \
 	container_of(iter, struct netlbl_domaddr4_map, list)
 struct netlbl_domaddr4_map {
-	u32 type;
-	union {
-		struct cipso_v4_doi *cipsov4;
-	} type_def;
+	struct netlbl_dommap_def def;
 
 	struct netlbl_af4list list;
 };
 #define netlbl_domhsh_addr6_entry(iter) \
 	container_of(iter, struct netlbl_domaddr6_map, list)
 struct netlbl_domaddr6_map {
-	u32 type;
-
-	/* NOTE: no 'type_def' union needed at present since we don't currently
-	 *       support any IPv6 labeling protocols */
+	struct netlbl_dommap_def def;
 
 	struct netlbl_af6list list;
 };
-struct netlbl_domaddr_map {
-	struct list_head list4;
-	struct list_head list6;
-};
+
 struct netlbl_dom_map {
 	char *domain;
-	u32 type;
-	union {
-		struct cipso_v4_doi *cipsov4;
-		struct netlbl_domaddr_map *addrsel;
-	} type_def;
+	struct netlbl_dommap_def def;
 
 	u32 valid;
 	struct list_head list;
@@ -97,16 +95,16 @@
 int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info);
 int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info);
 struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain);
-struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain,
-						       __be32 addr);
+struct netlbl_dommap_def *netlbl_domhsh_getentry_af4(const char *domain,
+						     __be32 addr);
+#if IS_ENABLED(CONFIG_IPV6)
+struct netlbl_dommap_def *netlbl_domhsh_getentry_af6(const char *domain,
+						   const struct in6_addr *addr);
+#endif /* IPv6 */
+
 int netlbl_domhsh_walk(u32 *skip_bkt,
 		     u32 *skip_chain,
 		     int (*callback) (struct netlbl_dom_map *entry, void *arg),
 		     void *cb_arg);
 
-#if IS_ENABLED(CONFIG_IPV6)
-struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain,
-						  const struct in6_addr *addr);
-#endif /* IPv6 */
-
 #endif
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 7c94aed..96a458e 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -122,7 +122,7 @@
 	}
 
 	if (addr == NULL && mask == NULL)
-		entry->type = NETLBL_NLTYPE_UNLABELED;
+		entry->def.type = NETLBL_NLTYPE_UNLABELED;
 	else if (addr != NULL && mask != NULL) {
 		addrmap = kzalloc(sizeof(*addrmap), GFP_ATOMIC);
 		if (addrmap == NULL)
@@ -137,7 +137,7 @@
 			map4 = kzalloc(sizeof(*map4), GFP_ATOMIC);
 			if (map4 == NULL)
 				goto cfg_unlbl_map_add_failure;
-			map4->type = NETLBL_NLTYPE_UNLABELED;
+			map4->def.type = NETLBL_NLTYPE_UNLABELED;
 			map4->list.addr = addr4->s_addr & mask4->s_addr;
 			map4->list.mask = mask4->s_addr;
 			map4->list.valid = 1;
@@ -154,7 +154,7 @@
 			map6 = kzalloc(sizeof(*map6), GFP_ATOMIC);
 			if (map6 == NULL)
 				goto cfg_unlbl_map_add_failure;
-			map6->type = NETLBL_NLTYPE_UNLABELED;
+			map6->def.type = NETLBL_NLTYPE_UNLABELED;
 			map6->list.addr = *addr6;
 			map6->list.addr.s6_addr32[0] &= mask6->s6_addr32[0];
 			map6->list.addr.s6_addr32[1] &= mask6->s6_addr32[1];
@@ -174,8 +174,8 @@
 			break;
 		}
 
-		entry->type_def.addrsel = addrmap;
-		entry->type = NETLBL_NLTYPE_ADDRSELECT;
+		entry->def.addrsel = addrmap;
+		entry->def.type = NETLBL_NLTYPE_ADDRSELECT;
 	} else {
 		ret_val = -EINVAL;
 		goto cfg_unlbl_map_add_failure;
@@ -355,8 +355,8 @@
 	}
 
 	if (addr == NULL && mask == NULL) {
-		entry->type_def.cipsov4 = doi_def;
-		entry->type = NETLBL_NLTYPE_CIPSOV4;
+		entry->def.cipso = doi_def;
+		entry->def.type = NETLBL_NLTYPE_CIPSOV4;
 	} else if (addr != NULL && mask != NULL) {
 		addrmap = kzalloc(sizeof(*addrmap), GFP_ATOMIC);
 		if (addrmap == NULL)
@@ -367,8 +367,8 @@
 		addrinfo = kzalloc(sizeof(*addrinfo), GFP_ATOMIC);
 		if (addrinfo == NULL)
 			goto out_addrinfo;
-		addrinfo->type_def.cipsov4 = doi_def;
-		addrinfo->type = NETLBL_NLTYPE_CIPSOV4;
+		addrinfo->def.cipso = doi_def;
+		addrinfo->def.type = NETLBL_NLTYPE_CIPSOV4;
 		addrinfo->list.addr = addr->s_addr & mask->s_addr;
 		addrinfo->list.mask = mask->s_addr;
 		addrinfo->list.valid = 1;
@@ -376,8 +376,8 @@
 		if (ret_val != 0)
 			goto cfg_cipsov4_map_add_failure;
 
-		entry->type_def.addrsel = addrmap;
-		entry->type = NETLBL_NLTYPE_ADDRSELECT;
+		entry->def.addrsel = addrmap;
+		entry->def.type = NETLBL_NLTYPE_ADDRSELECT;
 	} else {
 		ret_val = -EINVAL;
 		goto out_addrmap;
@@ -657,14 +657,14 @@
 	}
 	switch (family) {
 	case AF_INET:
-		switch (dom_entry->type) {
+		switch (dom_entry->def.type) {
 		case NETLBL_NLTYPE_ADDRSELECT:
 			ret_val = -EDESTADDRREQ;
 			break;
 		case NETLBL_NLTYPE_CIPSOV4:
 			ret_val = cipso_v4_sock_setattr(sk,
-						    dom_entry->type_def.cipsov4,
-						    secattr);
+							dom_entry->def.cipso,
+							secattr);
 			break;
 		case NETLBL_NLTYPE_UNLABELED:
 			ret_val = 0;
@@ -754,23 +754,22 @@
 {
 	int ret_val;
 	struct sockaddr_in *addr4;
-	struct netlbl_domaddr4_map *af4_entry;
+	struct netlbl_dommap_def *entry;
 
 	rcu_read_lock();
 	switch (addr->sa_family) {
 	case AF_INET:
 		addr4 = (struct sockaddr_in *)addr;
-		af4_entry = netlbl_domhsh_getentry_af4(secattr->domain,
-						       addr4->sin_addr.s_addr);
-		if (af4_entry == NULL) {
+		entry = netlbl_domhsh_getentry_af4(secattr->domain,
+						   addr4->sin_addr.s_addr);
+		if (entry == NULL) {
 			ret_val = -ENOENT;
 			goto conn_setattr_return;
 		}
-		switch (af4_entry->type) {
+		switch (entry->type) {
 		case NETLBL_NLTYPE_CIPSOV4:
 			ret_val = cipso_v4_sock_setattr(sk,
-						   af4_entry->type_def.cipsov4,
-						   secattr);
+							entry->cipso, secattr);
 			break;
 		case NETLBL_NLTYPE_UNLABELED:
 			/* just delete the protocols we support for right now
@@ -812,36 +811,21 @@
 		       const struct netlbl_lsm_secattr *secattr)
 {
 	int ret_val;
-	struct netlbl_dom_map *dom_entry;
-	struct netlbl_domaddr4_map *af4_entry;
-	u32 proto_type;
-	struct cipso_v4_doi *proto_cv4;
+	struct netlbl_dommap_def *entry;
 
 	rcu_read_lock();
-	dom_entry = netlbl_domhsh_getentry(secattr->domain);
-	if (dom_entry == NULL) {
-		ret_val = -ENOENT;
-		goto req_setattr_return;
-	}
 	switch (req->rsk_ops->family) {
 	case AF_INET:
-		if (dom_entry->type == NETLBL_NLTYPE_ADDRSELECT) {
-			struct inet_request_sock *req_inet = inet_rsk(req);
-			af4_entry = netlbl_domhsh_getentry_af4(secattr->domain,
-							    req_inet->rmt_addr);
-			if (af4_entry == NULL) {
-				ret_val = -ENOENT;
-				goto req_setattr_return;
-			}
-			proto_type = af4_entry->type;
-			proto_cv4 = af4_entry->type_def.cipsov4;
-		} else {
-			proto_type = dom_entry->type;
-			proto_cv4 = dom_entry->type_def.cipsov4;
+		entry = netlbl_domhsh_getentry_af4(secattr->domain,
+						   inet_rsk(req)->rmt_addr);
+		if (entry == NULL) {
+			ret_val = -ENOENT;
+			goto req_setattr_return;
 		}
-		switch (proto_type) {
+		switch (entry->type) {
 		case NETLBL_NLTYPE_CIPSOV4:
-			ret_val = cipso_v4_req_setattr(req, proto_cv4, secattr);
+			ret_val = cipso_v4_req_setattr(req,
+						       entry->cipso, secattr);
 			break;
 		case NETLBL_NLTYPE_UNLABELED:
 			/* just delete the protocols we support for right now
@@ -899,23 +883,21 @@
 {
 	int ret_val;
 	struct iphdr *hdr4;
-	struct netlbl_domaddr4_map *af4_entry;
+	struct netlbl_dommap_def *entry;
 
 	rcu_read_lock();
 	switch (family) {
 	case AF_INET:
 		hdr4 = ip_hdr(skb);
-		af4_entry = netlbl_domhsh_getentry_af4(secattr->domain,
-						       hdr4->daddr);
-		if (af4_entry == NULL) {
+		entry = netlbl_domhsh_getentry_af4(secattr->domain,hdr4->daddr);
+		if (entry == NULL) {
 			ret_val = -ENOENT;
 			goto skbuff_setattr_return;
 		}
-		switch (af4_entry->type) {
+		switch (entry->type) {
 		case NETLBL_NLTYPE_CIPSOV4:
-			ret_val = cipso_v4_skbuff_setattr(skb,
-						   af4_entry->type_def.cipsov4,
-						   secattr);
+			ret_val = cipso_v4_skbuff_setattr(skb, entry->cipso,
+							  secattr);
 			break;
 		case NETLBL_NLTYPE_UNLABELED:
 			/* just delete the protocols we support for right now
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index c5384ff..dd1c37d 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -104,7 +104,7 @@
 		ret_val = -ENOMEM;
 		goto add_failure;
 	}
-	entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]);
+	entry->def.type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]);
 	if (info->attrs[NLBL_MGMT_A_DOMAIN]) {
 		size_t tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]);
 		entry->domain = kmalloc(tmp_size, GFP_KERNEL);
@@ -116,12 +116,12 @@
 			    info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size);
 	}
 
-	/* NOTE: internally we allow/use a entry->type value of
+	/* NOTE: internally we allow/use a entry->def.type value of
 	 *       NETLBL_NLTYPE_ADDRSELECT but we don't currently allow users
 	 *       to pass that as a protocol value because we need to know the
 	 *       "real" protocol */
 
-	switch (entry->type) {
+	switch (entry->def.type) {
 	case NETLBL_NLTYPE_UNLABELED:
 		break;
 	case NETLBL_NLTYPE_CIPSOV4:
@@ -132,7 +132,7 @@
 		cipsov4 = cipso_v4_doi_getdef(tmp_val);
 		if (cipsov4 == NULL)
 			goto add_failure;
-		entry->type_def.cipsov4 = cipsov4;
+		entry->def.cipso = cipsov4;
 		break;
 	default:
 		goto add_failure;
@@ -172,9 +172,9 @@
 		map->list.addr = addr->s_addr & mask->s_addr;
 		map->list.mask = mask->s_addr;
 		map->list.valid = 1;
-		map->type = entry->type;
+		map->def.type = entry->def.type;
 		if (cipsov4)
-			map->type_def.cipsov4 = cipsov4;
+			map->def.cipso = cipsov4;
 
 		ret_val = netlbl_af4list_add(&map->list, &addrmap->list4);
 		if (ret_val != 0) {
@@ -182,8 +182,8 @@
 			goto add_failure;
 		}
 
-		entry->type = NETLBL_NLTYPE_ADDRSELECT;
-		entry->type_def.addrsel = addrmap;
+		entry->def.type = NETLBL_NLTYPE_ADDRSELECT;
+		entry->def.addrsel = addrmap;
 #if IS_ENABLED(CONFIG_IPV6)
 	} else if (info->attrs[NLBL_MGMT_A_IPV6ADDR]) {
 		struct in6_addr *addr;
@@ -223,7 +223,7 @@
 		map->list.addr.s6_addr32[3] &= mask->s6_addr32[3];
 		map->list.mask = *mask;
 		map->list.valid = 1;
-		map->type = entry->type;
+		map->def.type = entry->def.type;
 
 		ret_val = netlbl_af6list_add(&map->list, &addrmap->list6);
 		if (ret_val != 0) {
@@ -231,8 +231,8 @@
 			goto add_failure;
 		}
 
-		entry->type = NETLBL_NLTYPE_ADDRSELECT;
-		entry->type_def.addrsel = addrmap;
+		entry->def.type = NETLBL_NLTYPE_ADDRSELECT;
+		entry->def.addrsel = addrmap;
 #endif /* IPv6 */
 	}
 
@@ -281,14 +281,13 @@
 			return ret_val;
 	}
 
-	switch (entry->type) {
+	switch (entry->def.type) {
 	case NETLBL_NLTYPE_ADDRSELECT:
 		nla_a = nla_nest_start(skb, NLBL_MGMT_A_SELECTORLIST);
 		if (nla_a == NULL)
 			return -ENOMEM;
 
-		netlbl_af4list_foreach_rcu(iter4,
-					   &entry->type_def.addrsel->list4) {
+		netlbl_af4list_foreach_rcu(iter4, &entry->def.addrsel->list4) {
 			struct netlbl_domaddr4_map *map4;
 			struct in_addr addr_struct;
 
@@ -310,13 +309,13 @@
 				return ret_val;
 			map4 = netlbl_domhsh_addr4_entry(iter4);
 			ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL,
-					      map4->type);
+					      map4->def.type);
 			if (ret_val != 0)
 				return ret_val;
-			switch (map4->type) {
+			switch (map4->def.type) {
 			case NETLBL_NLTYPE_CIPSOV4:
 				ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI,
-						  map4->type_def.cipsov4->doi);
+						      map4->def.cipso->doi);
 				if (ret_val != 0)
 					return ret_val;
 				break;
@@ -325,8 +324,7 @@
 			nla_nest_end(skb, nla_b);
 		}
 #if IS_ENABLED(CONFIG_IPV6)
-		netlbl_af6list_foreach_rcu(iter6,
-					   &entry->type_def.addrsel->list6) {
+		netlbl_af6list_foreach_rcu(iter6, &entry->def.addrsel->list6) {
 			struct netlbl_domaddr6_map *map6;
 
 			nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR);
@@ -345,7 +343,7 @@
 				return ret_val;
 			map6 = netlbl_domhsh_addr6_entry(iter6);
 			ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL,
-					      map6->type);
+					      map6->def.type);
 			if (ret_val != 0)
 				return ret_val;
 
@@ -356,14 +354,14 @@
 		nla_nest_end(skb, nla_a);
 		break;
 	case NETLBL_NLTYPE_UNLABELED:
-		ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->type);
+		ret_val = nla_put_u32(skb,NLBL_MGMT_A_PROTOCOL,entry->def.type);
 		break;
 	case NETLBL_NLTYPE_CIPSOV4:
-		ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->type);
+		ret_val = nla_put_u32(skb,NLBL_MGMT_A_PROTOCOL,entry->def.type);
 		if (ret_val != 0)
 			return ret_val;
 		ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI,
-				      entry->type_def.cipsov4->doi);
+				      entry->def.cipso->doi);
 		break;
 	}
 
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index af35319..8f08974 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -1541,7 +1541,7 @@
 	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
 	if (entry == NULL)
 		return -ENOMEM;
-	entry->type = NETLBL_NLTYPE_UNLABELED;
+	entry->def.type = NETLBL_NLTYPE_UNLABELED;
 	ret_val = netlbl_domhsh_add_default(entry, &audit_info);
 	if (ret_val != 0)
 		return ret_val;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 2fd6dbe..512718a 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -571,7 +571,7 @@
 	    !capable(CAP_NET_ADMIN))
 		return -EPERM;
 
-	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+	if ((nlh->nlmsg_flags & NLM_F_DUMP) == NLM_F_DUMP) {
 		struct netlink_dump_control c = {
 			.dump = ops->dumpit,
 			.done = ops->done,
@@ -877,8 +877,10 @@
 #ifdef CONFIG_MODULES
 		if (res == NULL) {
 			genl_unlock();
+			up_read(&cb_lock);
 			request_module("net-pf-%d-proto-%d-family-%s",
 				       PF_NETLINK, NETLINK_GENERIC, name);
+			down_read(&cb_lock);
 			genl_lock();
 			res = genl_family_find_byname(name);
 		}
diff --git a/net/nfc/core.c b/net/nfc/core.c
index dc96a83..1d074dd 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -44,7 +44,7 @@
 /* NFC device ID bitmap */
 static DEFINE_IDA(nfc_index_ida);
 
-int nfc_fw_upload(struct nfc_dev *dev, const char *firmware_name)
+int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name)
 {
 	int rc = 0;
 
@@ -62,28 +62,28 @@
 		goto error;
 	}
 
-	if (!dev->ops->fw_upload) {
+	if (!dev->ops->fw_download) {
 		rc = -EOPNOTSUPP;
 		goto error;
 	}
 
-	dev->fw_upload_in_progress = true;
-	rc = dev->ops->fw_upload(dev, firmware_name);
+	dev->fw_download_in_progress = true;
+	rc = dev->ops->fw_download(dev, firmware_name);
 	if (rc)
-		dev->fw_upload_in_progress = false;
+		dev->fw_download_in_progress = false;
 
 error:
 	device_unlock(&dev->dev);
 	return rc;
 }
 
-int nfc_fw_upload_done(struct nfc_dev *dev, const char *firmware_name)
+int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name)
 {
-	dev->fw_upload_in_progress = false;
+	dev->fw_download_in_progress = false;
 
-	return nfc_genl_fw_upload_done(dev, firmware_name);
+	return nfc_genl_fw_download_done(dev, firmware_name);
 }
-EXPORT_SYMBOL(nfc_fw_upload_done);
+EXPORT_SYMBOL(nfc_fw_download_done);
 
 /**
  * nfc_dev_up - turn on the NFC device
@@ -110,7 +110,7 @@
 		goto error;
 	}
 
-	if (dev->fw_upload_in_progress) {
+	if (dev->fw_download_in_progress) {
 		rc = -EBUSY;
 		goto error;
 	}
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index 7b1c186..fe66908 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -809,14 +809,14 @@
 	}
 }
 
-static int hci_fw_upload(struct nfc_dev *nfc_dev, const char *firmware_name)
+static int hci_fw_download(struct nfc_dev *nfc_dev, const char *firmware_name)
 {
 	struct nfc_hci_dev *hdev = nfc_get_drvdata(nfc_dev);
 
-	if (!hdev->ops->fw_upload)
+	if (!hdev->ops->fw_download)
 		return -ENOTSUPP;
 
-	return hdev->ops->fw_upload(hdev, firmware_name);
+	return hdev->ops->fw_download(hdev, firmware_name);
 }
 
 static struct nfc_ops hci_nfc_ops = {
@@ -831,7 +831,7 @@
 	.im_transceive = hci_transceive,
 	.tm_send = hci_tm_send,
 	.check_presence = hci_check_presence,
-	.fw_upload = hci_fw_upload,
+	.fw_download = hci_fw_download,
 	.discover_se = hci_discover_se,
 	.enable_se = hci_enable_se,
 	.disable_se = hci_disable_se,
diff --git a/net/nfc/nci/Kconfig b/net/nfc/nci/Kconfig
index 2a24160..a4f1e42 100644
--- a/net/nfc/nci/Kconfig
+++ b/net/nfc/nci/Kconfig
@@ -11,6 +11,7 @@
 
 config NFC_NCI_SPI
 	depends on NFC_NCI && SPI
+	select CRC_CCITT
 	bool "NCI over SPI protocol support"
 	default n
 	help
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index b05ad90..f16fd59 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -1089,7 +1089,7 @@
 	return rc;
 }
 
-static int nfc_genl_fw_upload(struct sk_buff *skb, struct genl_info *info)
+static int nfc_genl_fw_download(struct sk_buff *skb, struct genl_info *info)
 {
 	struct nfc_dev *dev;
 	int rc;
@@ -1108,13 +1108,13 @@
 	nla_strlcpy(firmware_name, info->attrs[NFC_ATTR_FIRMWARE_NAME],
 		    sizeof(firmware_name));
 
-	rc = nfc_fw_upload(dev, firmware_name);
+	rc = nfc_fw_download(dev, firmware_name);
 
 	nfc_put_device(dev);
 	return rc;
 }
 
-int nfc_genl_fw_upload_done(struct nfc_dev *dev, const char *firmware_name)
+int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name)
 {
 	struct sk_buff *msg;
 	void *hdr;
@@ -1124,7 +1124,7 @@
 		return -ENOMEM;
 
 	hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
-			  NFC_CMD_FW_UPLOAD);
+			  NFC_CMD_FW_DOWNLOAD);
 	if (!hdr)
 		goto free_msg;
 
@@ -1251,8 +1251,8 @@
 		.policy = nfc_genl_policy,
 	},
 	{
-		.cmd = NFC_CMD_FW_UPLOAD,
-		.doit = nfc_genl_fw_upload,
+		.cmd = NFC_CMD_FW_DOWNLOAD,
+		.doit = nfc_genl_fw_download,
 		.policy = nfc_genl_policy,
 	},
 	{
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index ee85a1f..820a785 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -123,10 +123,10 @@
 	class_dev_iter_exit(iter);
 }
 
-int nfc_fw_upload(struct nfc_dev *dev, const char *firmware_name);
-int nfc_genl_fw_upload_done(struct nfc_dev *dev, const char *firmware_name);
+int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name);
+int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name);
 
-int nfc_fw_upload_done(struct nfc_dev *dev, const char *firmware_name);
+int nfc_fw_download_done(struct nfc_dev *dev, const char *firmware_name);
 
 int nfc_dev_up(struct nfc_dev *dev);
 
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 22c5f39..ab101f7 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -535,6 +535,7 @@
 {
 	struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
 
+	OVS_CB(skb)->tun_key = NULL;
 	return do_execute_actions(dp, skb, acts->actions,
 					 acts->actions_len, false);
 }
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index f7e3a0d..f2ed760 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -2076,9 +2076,6 @@
 	ovs_notify(reply, info, &ovs_dp_vport_multicast_group);
 	return 0;
 
-	rtnl_unlock();
-	return 0;
-
 exit_free:
 	kfree_skb(reply);
 exit_unlock:
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 5c519b1..1aa84dc 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -240,7 +240,7 @@
 	struct flex_array *buckets;
 	int i, err;
 
-	buckets = flex_array_alloc(sizeof(struct hlist_head *),
+	buckets = flex_array_alloc(sizeof(struct hlist_head),
 				   n_buckets, GFP_KERNEL);
 	if (!buckets)
 		return NULL;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 4b66c75..75c8bbf 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3259,9 +3259,11 @@
 
 		if (po->tp_version == TPACKET_V3) {
 			lv = sizeof(struct tpacket_stats_v3);
+			st.stats3.tp_packets += st.stats3.tp_drops;
 			data = &st.stats3;
 		} else {
 			lv = sizeof(struct tpacket_stats);
+			st.stats1.tp_packets += st.stats1.tp_drops;
 			data = &st.stats1;
 		}
 
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 281c1bd..51b968d 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -285,6 +285,45 @@
 	return q;
 }
 
+/* The linklayer setting were not transferred from iproute2, in older
+ * versions, and the rate tables lookup systems have been dropped in
+ * the kernel. To keep backward compatible with older iproute2 tc
+ * utils, we detect the linklayer setting by detecting if the rate
+ * table were modified.
+ *
+ * For linklayer ATM table entries, the rate table will be aligned to
+ * 48 bytes, thus some table entries will contain the same value.  The
+ * mpu (min packet unit) is also encoded into the old rate table, thus
+ * starting from the mpu, we find low and high table entries for
+ * mapping this cell.  If these entries contain the same value, when
+ * the rate tables have been modified for linklayer ATM.
+ *
+ * This is done by rounding mpu to the nearest 48 bytes cell/entry,
+ * and then roundup to the next cell, calc the table entry one below,
+ * and compare.
+ */
+static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
+{
+	int low       = roundup(r->mpu, 48);
+	int high      = roundup(low+1, 48);
+	int cell_low  = low >> r->cell_log;
+	int cell_high = (high >> r->cell_log) - 1;
+
+	/* rtab is too inaccurate at rates > 100Mbit/s */
+	if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
+		pr_debug("TC linklayer: Giving up ATM detection\n");
+		return TC_LINKLAYER_ETHERNET;
+	}
+
+	if ((cell_high > cell_low) && (cell_high < 256)
+	    && (rtab[cell_low] == rtab[cell_high])) {
+		pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
+			 cell_low, cell_high, rtab[cell_high]);
+		return TC_LINKLAYER_ATM;
+	}
+	return TC_LINKLAYER_ETHERNET;
+}
+
 static struct qdisc_rate_table *qdisc_rtab_list;
 
 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
@@ -308,6 +347,8 @@
 		rtab->rate = *r;
 		rtab->refcnt = 1;
 		memcpy(rtab->data, nla_data(tab), 1024);
+		if (r->linklayer == TC_LINKLAYER_UNAWARE)
+			r->linklayer = __detect_linklayer(r, rtab->data);
 		rtab->next = qdisc_rtab_list;
 		qdisc_rtab_list = rtab;
 	}
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index ca8e0a5..1f9c314 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -605,6 +605,7 @@
 		struct sockaddr_atmpvc pvc;
 		int state;
 
+		memset(&pvc, 0, sizeof(pvc));
 		pvc.sap_family = AF_ATMPVC;
 		pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1;
 		pvc.sap_addr.vpi = flow->vcc->vpi;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 71a5688..7a42c81 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1465,6 +1465,7 @@
 	unsigned char *b = skb_tail_pointer(skb);
 	struct tc_cbq_wrropt opt;
 
+	memset(&opt, 0, sizeof(opt));
 	opt.flags = 0;
 	opt.allot = cl->allot;
 	opt.priority = cl->priority + 1;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 4626cef..48be3d5 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -25,6 +25,7 @@
 #include <linux/rcupdate.h>
 #include <linux/list.h>
 #include <linux/slab.h>
+#include <linux/if_vlan.h>
 #include <net/sch_generic.h>
 #include <net/pkt_sched.h>
 #include <net/dst.h>
@@ -207,15 +208,19 @@
 
 unsigned long dev_trans_start(struct net_device *dev)
 {
-	unsigned long val, res = dev->trans_start;
+	unsigned long val, res;
 	unsigned int i;
 
+	if (is_vlan_dev(dev))
+		dev = vlan_dev_real_dev(dev);
+	res = dev->trans_start;
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		val = netdev_get_tx_queue(dev, i)->trans_start;
 		if (val && time_after(val, res))
 			res = val;
 	}
 	dev->trans_start = res;
+
 	return res;
 }
 EXPORT_SYMBOL(dev_trans_start);
@@ -904,6 +909,7 @@
 	memset(r, 0, sizeof(*r));
 	r->overhead = conf->overhead;
 	r->rate_bytes_ps = conf->rate;
+	r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
 	r->mult = 1;
 	/*
 	 * The deal here is to replace a divide by a reciprocal one
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index c2124ea..c2178b1 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -100,7 +100,7 @@
 	struct psched_ratecfg	ceil;
 	s64			buffer, cbuffer;/* token bucket depth/rate */
 	s64			mbuffer;	/* max wait time */
-	int			prio;		/* these two are used only by leaves... */
+	u32			prio;		/* these two are used only by leaves... */
 	int			quantum;	/* but stored for parent-to-leaf return */
 
 	struct tcf_proto	*filter_list;	/* class attached filters */
@@ -1329,6 +1329,7 @@
 	struct htb_sched *q = qdisc_priv(sch);
 	struct htb_class *cl = (struct htb_class *)*arg, *parent;
 	struct nlattr *opt = tca[TCA_OPTIONS];
+	struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
 	struct nlattr *tb[TCA_HTB_MAX + 1];
 	struct tc_htb_opt *hopt;
 
@@ -1350,6 +1351,18 @@
 	if (!hopt->rate.rate || !hopt->ceil.rate)
 		goto failure;
 
+	/* Keeping backward compatible with rate_table based iproute2 tc */
+	if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) {
+		rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
+		if (rtab)
+			qdisc_put_rtab(rtab);
+	}
+	if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE) {
+		ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
+		if (ctab)
+			qdisc_put_rtab(ctab);
+	}
+
 	if (!cl) {		/* new class */
 		struct Qdisc *new_q;
 		int prio;
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index bce5b79..ab67efc 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -846,12 +846,12 @@
 		else
 			spc_state = SCTP_ADDR_AVAILABLE;
 		/* Don't inform ULP about transition from PF to
-		 * active state and set cwnd to 1, see SCTP
+		 * active state and set cwnd to 1 MTU, see SCTP
 		 * Quick failover draft section 5.1, point 5
 		 */
 		if (transport->state == SCTP_PF) {
 			ulp_notify = false;
-			transport->cwnd = 1;
+			transport->cwnd = asoc->pathmtu;
 		}
 		transport->state = SCTP_ACTIVE;
 		break;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index bdbbc3f..8fdd160 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -181,12 +181,12 @@
 		return;
 	}
 
-	call_rcu(&transport->rcu, sctp_transport_destroy_rcu);
-
 	sctp_packet_free(&transport->packet);
 
 	if (transport->asoc)
 		sctp_association_put(transport->asoc);
+
+	call_rcu(&transport->rcu, sctp_transport_destroy_rcu);
 }
 
 /* Start T3_rtx timer if it is not already running and update the heartbeat
diff --git a/net/socket.c b/net/socket.c
index 829b460..b2d7c62 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -106,7 +106,7 @@
 #include <linux/atalk.h>
 #include <net/busy_poll.h>
 
-#ifdef CONFIG_NET_LL_RX_POLL
+#ifdef CONFIG_NET_RX_BUSY_POLL
 unsigned int sysctl_net_busy_read __read_mostly;
 unsigned int sysctl_net_busy_poll __read_mostly;
 #endif
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index d304f41..af7ffd4 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -120,7 +120,7 @@
 	if (IS_ERR(clnt)) {
 		dprintk("RPC:       failed to create AF_LOCAL gssproxy "
 				"client (errno %ld).\n", PTR_ERR(clnt));
-		result = -PTR_ERR(clnt);
+		result = PTR_ERR(clnt);
 		*_clnt = NULL;
 		goto out;
 	}
@@ -328,7 +328,6 @@
 	kfree(data->in_handle.data);
 	kfree(data->out_handle.data);
 	kfree(data->out_token.data);
-	kfree(data->mech_oid.data);
 	free_svc_cred(&data->creds);
 }
 
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index 357f613..3c85d1c 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -430,7 +430,7 @@
 static int dummy_dec_nameattr_array(struct xdr_stream *xdr,
 				    struct gssx_name_attr_array *naa)
 {
-	struct gssx_name_attr dummy;
+	struct gssx_name_attr dummy = { .attr = {.len = 0} };
 	u32 count, i;
 	__be32 *p;
 
@@ -493,12 +493,13 @@
 	return err;
 }
 
+
 static int gssx_dec_name(struct xdr_stream *xdr,
 			 struct gssx_name *name)
 {
-	struct xdr_netobj dummy_netobj;
-	struct gssx_name_attr_array dummy_name_attr_array;
-	struct gssx_option_array dummy_option_array;
+	struct xdr_netobj dummy_netobj = { .len = 0 };
+	struct gssx_name_attr_array dummy_name_attr_array = { .count = 0 };
+	struct gssx_option_array dummy_option_array = { .count = 0 };
 	int err;
 
 	/* name->display_name */
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index d0347d1..09fb638 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1180,6 +1180,7 @@
 		gm = gss_mech_get_by_OID(&ud->mech_oid);
 		if (!gm)
 			goto out;
+		rsci.cred.cr_gss_mech = gm;
 
 		status = -EINVAL;
 		/* mech-specific data: */
@@ -1195,7 +1196,6 @@
 	rscp = rsc_update(cd, &rsci, rscp);
 	status = 0;
 out:
-	gss_mech_put(gm);
 	rsc_free(&rsci);
 	if (rscp)
 		cache_put(&rscp->h, cd);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 74f6a70..ecbc4e3 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1660,6 +1660,10 @@
 		task->tk_action = call_connect_status;
 		if (task->tk_status < 0)
 			return;
+		if (task->tk_flags & RPC_TASK_NOCONNECT) {
+			rpc_exit(task, -ENOTCONN);
+			return;
+		}
 		xprt_connect(task);
 	}
 }
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
index 74d948f..779742c 100644
--- a/net/sunrpc/netns.h
+++ b/net/sunrpc/netns.h
@@ -23,6 +23,7 @@
 	struct rpc_clnt *rpcb_local_clnt4;
 	spinlock_t rpcb_clnt_lock;
 	unsigned int rpcb_users;
+	unsigned int rpcb_is_af_local : 1;
 
 	struct mutex gssp_lock;
 	wait_queue_head_t gssp_wq;
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 3df764d..1891a10 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -204,13 +204,15 @@
 }
 
 static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt,
-			struct rpc_clnt *clnt4)
+			struct rpc_clnt *clnt4,
+			bool is_af_local)
 {
 	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
 
 	/* Protected by rpcb_create_local_mutex */
 	sn->rpcb_local_clnt = clnt;
 	sn->rpcb_local_clnt4 = clnt4;
+	sn->rpcb_is_af_local = is_af_local ? 1 : 0;
 	smp_wmb(); 
 	sn->rpcb_users = 1;
 	dprintk("RPC:       created new rpcb local clients (rpcb_local_clnt: "
@@ -238,6 +240,14 @@
 		.program	= &rpcb_program,
 		.version	= RPCBVERS_2,
 		.authflavor	= RPC_AUTH_NULL,
+		/*
+		 * We turn off the idle timeout to prevent the kernel
+		 * from automatically disconnecting the socket.
+		 * Otherwise, we'd have to cache the mount namespace
+		 * of the caller and somehow pass that to the socket
+		 * reconnect code.
+		 */
+		.flags		= RPC_CLNT_CREATE_NO_IDLE_TIMEOUT,
 	};
 	struct rpc_clnt *clnt, *clnt4;
 	int result = 0;
@@ -263,7 +273,7 @@
 		clnt4 = NULL;
 	}
 
-	rpcb_set_local(net, clnt, clnt4);
+	rpcb_set_local(net, clnt, clnt4, true);
 
 out:
 	return result;
@@ -315,7 +325,7 @@
 		clnt4 = NULL;
 	}
 
-	rpcb_set_local(net, clnt, clnt4);
+	rpcb_set_local(net, clnt, clnt4, false);
 
 out:
 	return result;
@@ -376,13 +386,16 @@
 	return rpc_create(&args);
 }
 
-static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg)
+static int rpcb_register_call(struct sunrpc_net *sn, struct rpc_clnt *clnt, struct rpc_message *msg, bool is_set)
 {
-	int result, error = 0;
+	int flags = RPC_TASK_NOCONNECT;
+	int error, result = 0;
 
+	if (is_set || !sn->rpcb_is_af_local)
+		flags = RPC_TASK_SOFTCONN;
 	msg->rpc_resp = &result;
 
-	error = rpc_call_sync(clnt, msg, RPC_TASK_SOFTCONN);
+	error = rpc_call_sync(clnt, msg, flags);
 	if (error < 0) {
 		dprintk("RPC:       failed to contact local rpcbind "
 				"server (errno %d).\n", -error);
@@ -439,16 +452,19 @@
 		.rpc_argp	= &map,
 	};
 	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+	bool is_set = false;
 
 	dprintk("RPC:       %sregistering (%u, %u, %d, %u) with local "
 			"rpcbind\n", (port ? "" : "un"),
 			prog, vers, prot, port);
 
 	msg.rpc_proc = &rpcb_procedures2[RPCBPROC_UNSET];
-	if (port)
+	if (port != 0) {
 		msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET];
+		is_set = true;
+	}
 
-	return rpcb_register_call(sn->rpcb_local_clnt, &msg);
+	return rpcb_register_call(sn, sn->rpcb_local_clnt, &msg, is_set);
 }
 
 /*
@@ -461,6 +477,7 @@
 	const struct sockaddr_in *sin = (const struct sockaddr_in *)sap;
 	struct rpcbind_args *map = msg->rpc_argp;
 	unsigned short port = ntohs(sin->sin_port);
+	bool is_set = false;
 	int result;
 
 	map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL);
@@ -471,10 +488,12 @@
 			map->r_addr, map->r_netid);
 
 	msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
-	if (port)
+	if (port != 0) {
 		msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
+		is_set = true;
+	}
 
-	result = rpcb_register_call(sn->rpcb_local_clnt4, msg);
+	result = rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, is_set);
 	kfree(map->r_addr);
 	return result;
 }
@@ -489,6 +508,7 @@
 	const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap;
 	struct rpcbind_args *map = msg->rpc_argp;
 	unsigned short port = ntohs(sin6->sin6_port);
+	bool is_set = false;
 	int result;
 
 	map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL);
@@ -499,10 +519,12 @@
 			map->r_addr, map->r_netid);
 
 	msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
-	if (port)
+	if (port != 0) {
 		msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
+		is_set = true;
+	}
 
-	result = rpcb_register_call(sn->rpcb_local_clnt4, msg);
+	result = rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, is_set);
 	kfree(map->r_addr);
 	return result;
 }
@@ -519,7 +541,7 @@
 	map->r_addr = "";
 	msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
 
-	return rpcb_register_call(sn->rpcb_local_clnt4, msg);
+	return rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, false);
 }
 
 /**
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 305374d..7762b9f 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1193,7 +1193,9 @@
 	if (test_bit(XPT_LISTENER, &xprt->xpt_flags))
 		return 1;
 	required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg;
-	if (sk_stream_wspace(svsk->sk_sk) >= required)
+	if (sk_stream_wspace(svsk->sk_sk) >= required ||
+	    (sk_stream_min_wspace(svsk->sk_sk) == 0 &&
+	     atomic_read(&xprt->xpt_reserved) == 0))
 		return 1;
 	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
 	return 0;
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index cb29ef7..609c30c 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -460,6 +460,7 @@
 {
 	struct tipc_link *l_ptr;
 	struct tipc_link *temp_l_ptr;
+	struct tipc_link_req *temp_req;
 
 	pr_info("Disabling bearer <%s>\n", b_ptr->name);
 	spin_lock_bh(&b_ptr->lock);
@@ -468,9 +469,13 @@
 	list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
 		tipc_link_delete(l_ptr);
 	}
-	if (b_ptr->link_req)
-		tipc_disc_delete(b_ptr->link_req);
+	temp_req = b_ptr->link_req;
+	b_ptr->link_req = NULL;
 	spin_unlock_bh(&b_ptr->lock);
+
+	if (temp_req)
+		tipc_disc_delete(temp_req);
+
 	memset(b_ptr, 0, sizeof(struct tipc_bearer));
 }
 
diff --git a/net/tipc/server.c b/net/tipc/server.c
index 19da5abe..fd3fa57 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -355,8 +355,12 @@
 		return PTR_ERR(con);
 
 	sock = tipc_create_listen_sock(con);
-	if (!sock)
+	if (!sock) {
+		idr_remove(&s->conn_idr, con->conid);
+		s->idr_in_use--;
+		kfree(con);
 		return -EINVAL;
+	}
 
 	tipc_register_callbacks(sock, con);
 	return 0;
@@ -563,9 +567,14 @@
 		kmem_cache_destroy(s->rcvbuf_cache);
 		return ret;
 	}
+	ret = tipc_open_listening_sock(s);
+	if (ret < 0) {
+		tipc_work_stop(s);
+		kmem_cache_destroy(s->rcvbuf_cache);
+		return ret;
+	}
 	s->enabled = 1;
-
-	return tipc_open_listening_sock(s);
+	return ret;
 }
 
 void tipc_server_stop(struct tipc_server *s)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 593071d..4d93346 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -347,7 +347,7 @@
 	for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) {
 		struct vsock_sock *vsk;
 		list_for_each_entry(vsk, &vsock_connected_table[i],
-				    connected_table);
+				    connected_table)
 			fn(sk_vsock(vsk));
 	}
 
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 4f9f216..a8c29fa 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -765,6 +765,7 @@
 		cfg80211_leave_mesh(rdev, dev);
 		break;
 	case NL80211_IFTYPE_AP:
+	case NL80211_IFTYPE_P2P_GO:
 		cfg80211_stop_ap(rdev, dev);
 		break;
 	default:
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 1cc47ac..5f6e982 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -441,10 +441,12 @@
 			goto out_unlock;
 		}
 		*rdev = wiphy_to_dev((*wdev)->wiphy);
-		cb->args[0] = (*rdev)->wiphy_idx;
+		/* 0 is the first index - add 1 to parse only once */
+		cb->args[0] = (*rdev)->wiphy_idx + 1;
 		cb->args[1] = (*wdev)->identifier;
 	} else {
-		struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0]);
+		/* subtract the 1 again here */
+		struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1);
 		struct wireless_dev *tmp;
 
 		if (!wiphy) {
@@ -2620,8 +2622,8 @@
 
 	hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
 			     NL80211_CMD_NEW_KEY);
-	if (IS_ERR(hdr))
-		return PTR_ERR(hdr);
+	if (!hdr)
+		return -ENOBUFS;
 
 	cookie.msg = msg;
 	cookie.idx = key_idx;
@@ -4770,9 +4772,9 @@
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshForwarding, 0, 1,
 				  mask, NL80211_MESHCONF_FORWARDING,
 				  nla_get_u8);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, 1, 255,
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, -255, 0,
 				  mask, NL80211_MESHCONF_RSSI_THRESHOLD,
-				  nla_get_u32);
+				  nla_get_s32);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode, 0, 16,
 				  mask, NL80211_MESHCONF_HT_OPMODE,
 				  nla_get_u16);
@@ -6505,6 +6507,9 @@
 					   NL80211_CMD_TESTMODE);
 		struct nlattr *tmdata;
 
+		if (!hdr)
+			break;
+
 		if (nla_put_u32(skb, NL80211_ATTR_WIPHY, phy_idx)) {
 			genlmsg_cancel(skb, hdr);
 			break;
@@ -6613,12 +6618,14 @@
 
 void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp)
 {
+	struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0];
 	void *hdr = ((void **)skb->cb)[1];
 	struct nlattr *data = ((void **)skb->cb)[2];
 
 	nla_nest_end(skb, data);
 	genlmsg_end(skb, hdr);
-	genlmsg_multicast(skb, 0, nl80211_testmode_mcgrp.id, gfp);
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), skb, 0,
+				nl80211_testmode_mcgrp.id, gfp);
 }
 EXPORT_SYMBOL(cfg80211_testmode_event);
 #endif
@@ -6947,9 +6954,8 @@
 
 	hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
 			     NL80211_CMD_REMAIN_ON_CHANNEL);
-
-	if (IS_ERR(hdr)) {
-		err = PTR_ERR(hdr);
+	if (!hdr) {
+		err = -ENOBUFS;
 		goto free_msg;
 	}
 
@@ -7247,9 +7253,8 @@
 
 		hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
 				     NL80211_CMD_FRAME);
-
-		if (IS_ERR(hdr)) {
-			err = PTR_ERR(hdr);
+		if (!hdr) {
+			err = -ENOBUFS;
 			goto free_msg;
 		}
 	}
@@ -8128,9 +8133,8 @@
 
 	hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
 			     NL80211_CMD_PROBE_CLIENT);
-
-	if (IS_ERR(hdr)) {
-		err = PTR_ERR(hdr);
+	if (!hdr) {
+		err = -ENOBUFS;
 		goto free_msg;
 	}
 
@@ -10064,7 +10068,8 @@
 
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp);
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_mlme_mcgrp.id, gfp);
 	return;
 
  nla_put_failure:
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 5a24c98..de06d5d 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2247,10 +2247,13 @@
 
 void wiphy_regulatory_register(struct wiphy *wiphy)
 {
+	struct regulatory_request *lr;
+
 	if (!reg_dev_ignore_cell_hint(wiphy))
 		reg_num_devs_support_basehint++;
 
-	wiphy_update_regulatory(wiphy, NL80211_REGDOM_SET_BY_CORE);
+	lr = get_last_request();
+	wiphy_update_regulatory(wiphy, lr->initiator);
 }
 
 void wiphy_regulatory_deregister(struct wiphy *wiphy)
@@ -2279,7 +2282,9 @@
 static void reg_timeout_work(struct work_struct *work)
 {
 	REG_DBG_PRINT("Timeout while waiting for CRDA to reply, restoring regulatory settings\n");
+	rtnl_lock();
 	restore_regulatory_settings(true);
+	rtnl_unlock();
 }
 
 int __init regulatory_init(void)
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 1d3cfb1..20e86a9 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -34,8 +34,10 @@
 		CFG80211_CONN_SCAN_AGAIN,
 		CFG80211_CONN_AUTHENTICATE_NEXT,
 		CFG80211_CONN_AUTHENTICATING,
+		CFG80211_CONN_AUTH_FAILED,
 		CFG80211_CONN_ASSOCIATE_NEXT,
 		CFG80211_CONN_ASSOCIATING,
+		CFG80211_CONN_ASSOC_FAILED,
 		CFG80211_CONN_DEAUTH,
 		CFG80211_CONN_CONNECTED,
 	} state;
@@ -164,6 +166,8 @@
 					  NULL, 0,
 					  params->key, params->key_len,
 					  params->key_idx, NULL, 0);
+	case CFG80211_CONN_AUTH_FAILED:
+		return -ENOTCONN;
 	case CFG80211_CONN_ASSOCIATE_NEXT:
 		BUG_ON(!rdev->ops->assoc);
 		wdev->conn->state = CFG80211_CONN_ASSOCIATING;
@@ -188,10 +192,17 @@
 					     WLAN_REASON_DEAUTH_LEAVING,
 					     false);
 		return err;
+	case CFG80211_CONN_ASSOC_FAILED:
+		cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid,
+				     NULL, 0,
+				     WLAN_REASON_DEAUTH_LEAVING, false);
+		return -ENOTCONN;
 	case CFG80211_CONN_DEAUTH:
 		cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid,
 				     NULL, 0,
 				     WLAN_REASON_DEAUTH_LEAVING, false);
+		/* free directly, disconnected event already sent */
+		cfg80211_sme_free(wdev);
 		return 0;
 	default:
 		return 0;
@@ -371,7 +382,7 @@
 		return true;
 	}
 
-	wdev->conn->state = CFG80211_CONN_DEAUTH;
+	wdev->conn->state = CFG80211_CONN_ASSOC_FAILED;
 	schedule_work(&rdev->conn_work);
 	return false;
 }
@@ -383,7 +394,13 @@
 
 void cfg80211_sme_auth_timeout(struct wireless_dev *wdev)
 {
-	cfg80211_sme_free(wdev);
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+
+	if (!wdev->conn)
+		return;
+
+	wdev->conn->state = CFG80211_CONN_AUTH_FAILED;
+	schedule_work(&rdev->conn_work);
 }
 
 void cfg80211_sme_disassoc(struct wireless_dev *wdev)
@@ -399,7 +416,13 @@
 
 void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev)
 {
-	cfg80211_sme_disassoc(wdev);
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+
+	if (!wdev->conn)
+		return;
+
+	wdev->conn->state = CFG80211_CONN_ASSOC_FAILED;
+	schedule_work(&rdev->conn_work);
 }
 
 static int cfg80211_sme_connect(struct wireless_dev *wdev,
@@ -953,21 +976,19 @@
 			struct net_device *dev, u16 reason, bool wextev)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	int err;
+	int err = 0;
 
 	ASSERT_WDEV_LOCK(wdev);
 
 	kfree(wdev->connect_keys);
 	wdev->connect_keys = NULL;
 
-	if (wdev->conn) {
+	if (wdev->conn)
 		err = cfg80211_sme_disconnect(wdev, reason);
-	} else if (!rdev->ops->disconnect) {
+	else if (!rdev->ops->disconnect)
 		cfg80211_mlme_down(rdev, dev);
-		err = 0;
-	} else {
+	else if (wdev->current_bss)
 		err = rdev_disconnect(rdev, dev, reason);
-	}
 
 	return err;
 }
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 3f7682a..eefbd10 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -1998,12 +1998,11 @@
  *
  * Create or update the port list entry
  */
-static int smk_ipv6_port_check(struct sock *sk, struct sockaddr *address,
+static int smk_ipv6_port_check(struct sock *sk, struct sockaddr_in6 *address,
 				int act)
 {
 	__be16 *bep;
 	__be32 *be32p;
-	struct sockaddr_in6 *addr6;
 	struct smk_port_label *spp;
 	struct socket_smack *ssp = sk->sk_security;
 	struct smack_known *skp;
@@ -2025,10 +2024,9 @@
 	/*
 	 * Get the IP address and port from the address.
 	 */
-	addr6 = (struct sockaddr_in6 *)address;
-	port = ntohs(addr6->sin6_port);
-	bep = (__be16 *)(&addr6->sin6_addr);
-	be32p = (__be32 *)(&addr6->sin6_addr);
+	port = ntohs(address->sin6_port);
+	bep = (__be16 *)(&address->sin6_addr);
+	be32p = (__be32 *)(&address->sin6_addr);
 
 	/*
 	 * It's remote, so port lookup does no good.
@@ -2060,9 +2058,9 @@
 	ad.a.u.net->family = sk->sk_family;
 	ad.a.u.net->dport = port;
 	if (act == SMK_RECEIVING)
-		ad.a.u.net->v6info.saddr = addr6->sin6_addr;
+		ad.a.u.net->v6info.saddr = address->sin6_addr;
 	else
-		ad.a.u.net->v6info.daddr = addr6->sin6_addr;
+		ad.a.u.net->v6info.daddr = address->sin6_addr;
 #endif
 	return smk_access(skp, object, MAY_WRITE, &ad);
 }
@@ -2201,7 +2199,8 @@
 	case PF_INET6:
 		if (addrlen < sizeof(struct sockaddr_in6))
 			return -EINVAL;
-		rc = smk_ipv6_port_check(sock->sk, sap, SMK_CONNECTING);
+		rc = smk_ipv6_port_check(sock->sk, (struct sockaddr_in6 *)sap,
+						SMK_CONNECTING);
 		break;
 	}
 	return rc;
@@ -3034,7 +3033,7 @@
 				int size)
 {
 	struct sockaddr_in *sip = (struct sockaddr_in *) msg->msg_name;
-	struct sockaddr *sap = (struct sockaddr *) msg->msg_name;
+	struct sockaddr_in6 *sap = (struct sockaddr_in6 *) msg->msg_name;
 	int rc = 0;
 
 	/*
@@ -3121,9 +3120,8 @@
 	return smack_net_ambient;
 }
 
-static int smk_skb_to_addr_ipv6(struct sk_buff *skb, struct sockaddr *sap)
+static int smk_skb_to_addr_ipv6(struct sk_buff *skb, struct sockaddr_in6 *sip)
 {
-	struct sockaddr_in6 *sip = (struct sockaddr_in6 *)sap;
 	u8 nexthdr;
 	int offset;
 	int proto = -EINVAL;
@@ -3181,7 +3179,7 @@
 	struct netlbl_lsm_secattr secattr;
 	struct socket_smack *ssp = sk->sk_security;
 	struct smack_known *skp;
-	struct sockaddr sadd;
+	struct sockaddr_in6 sadd;
 	int rc = 0;
 	struct smk_audit_info ad;
 #ifdef CONFIG_AUDIT
diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c
index 99db892..9896954 100644
--- a/sound/core/compress_offload.c
+++ b/sound/core/compress_offload.c
@@ -743,7 +743,7 @@
 	mutex_lock(&stream->device->lock);
 	switch (_IOC_NR(cmd)) {
 	case _IOC_NR(SNDRV_COMPRESS_IOCTL_VERSION):
-		put_user(SNDRV_COMPRESS_VERSION,
+		retval = put_user(SNDRV_COMPRESS_VERSION,
 				(int __user *)arg) ? -EFAULT : 0;
 		break;
 	case _IOC_NR(SNDRV_COMPRESS_GET_CAPS):
diff --git a/sound/pci/hda/hda_auto_parser.c b/sound/pci/hda/hda_auto_parser.c
index 7c11d46..48a9d00 100644
--- a/sound/pci/hda/hda_auto_parser.c
+++ b/sound/pci/hda/hda_auto_parser.c
@@ -860,7 +860,7 @@
 		}
 	}
 	if (id < 0 && quirk) {
-		for (q = quirk; q->subvendor; q++) {
+		for (q = quirk; q->subvendor || q->subdevice; q++) {
 			unsigned int vendorid =
 				q->subdevice | (q->subvendor << 16);
 			unsigned int mask = 0xffff0000 | q->subdevice_mask;
diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index 8e77cbb..e3c7ba8 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c
@@ -522,7 +522,7 @@
 }
 
 #define nid_has_mute(codec, nid, dir) \
-	check_amp_caps(codec, nid, dir, AC_AMPCAP_MUTE)
+	check_amp_caps(codec, nid, dir, (AC_AMPCAP_MUTE | AC_AMPCAP_MIN_MUTE))
 #define nid_has_volume(codec, nid, dir) \
 	check_amp_caps(codec, nid, dir, AC_AMPCAP_NUM_STEPS)
 
@@ -624,7 +624,7 @@
 		if (enable)
 			val = (caps & AC_AMPCAP_OFFSET) >> AC_AMPCAP_OFFSET_SHIFT;
 	}
-	if (caps & AC_AMPCAP_MUTE) {
+	if (caps & (AC_AMPCAP_MUTE | AC_AMPCAP_MIN_MUTE)) {
 		if (!enable)
 			val |= HDA_AMP_MUTE;
 	}
@@ -648,7 +648,7 @@
 {
 	unsigned int mask = 0xff;
 
-	if (caps & AC_AMPCAP_MUTE) {
+	if (caps & (AC_AMPCAP_MUTE | AC_AMPCAP_MIN_MUTE)) {
 		if (is_ctl_associated(codec, nid, dir, idx, NID_PATH_MUTE_CTL))
 			mask &= ~0x80;
 	}
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 8860dd52..bf5e58e 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -555,6 +555,9 @@
 #ifdef CONFIG_SND_HDA_DSP_LOADER
 	struct azx_dev saved_azx_dev;
 #endif
+
+	/* secondary power domain for hdmi audio under vga device */
+	struct dev_pm_domain hdmi_pm_domain;
 };
 
 #define CREATE_TRACE_POINTS
@@ -1397,8 +1400,9 @@
 	int i, ok;
 
 #ifdef CONFIG_PM_RUNTIME
-	if (chip->pci->dev.power.runtime_status != RPM_ACTIVE)
-		return IRQ_NONE;
+	if (chip->driver_caps & AZX_DCAPS_PM_RUNTIME)
+		if (chip->pci->dev.power.runtime_status != RPM_ACTIVE)
+			return IRQ_NONE;
 #endif
 
 	spin_lock(&chip->reg_lock);
@@ -1409,7 +1413,7 @@
 	}
 
 	status = azx_readl(chip, INTSTS);
-	if (status == 0) {
+	if (status == 0 || status == 0xffffffff) {
 		spin_unlock(&chip->reg_lock);
 		return IRQ_NONE;
 	}
@@ -2971,6 +2975,12 @@
 	struct snd_card *card = dev_get_drvdata(dev);
 	struct azx *chip = card->private_data;
 
+	if (chip->disabled)
+		return 0;
+
+	if (!(chip->driver_caps & AZX_DCAPS_PM_RUNTIME))
+		return 0;
+
 	azx_stop_chip(chip);
 	azx_enter_link_reset(chip);
 	azx_clear_irq_pending(chip);
@@ -2984,6 +2994,12 @@
 	struct snd_card *card = dev_get_drvdata(dev);
 	struct azx *chip = card->private_data;
 
+	if (chip->disabled)
+		return 0;
+
+	if (!(chip->driver_caps & AZX_DCAPS_PM_RUNTIME))
+		return 0;
+
 	if (chip->driver_caps & AZX_DCAPS_I915_POWERWELL)
 		hda_display_power(true);
 	azx_init_pci(chip);
@@ -2996,6 +3012,9 @@
 	struct snd_card *card = dev_get_drvdata(dev);
 	struct azx *chip = card->private_data;
 
+	if (chip->disabled)
+		return 0;
+
 	if (!power_save_controller ||
 	    !(chip->driver_caps & AZX_DCAPS_PM_RUNTIME))
 		return -EBUSY;
@@ -3078,13 +3097,19 @@
 			   "%s: %s via VGA-switcheroo\n", pci_name(chip->pci),
 			   disabled ? "Disabling" : "Enabling");
 		if (disabled) {
+			pm_runtime_put_sync_suspend(&pci->dev);
 			azx_suspend(&pci->dev);
+			/* when we get suspended by vga switcheroo we end up in D3cold,
+			 * however we have no ACPI handle, so pci/acpi can't put us there,
+			 * put ourselves there */
+			pci->current_state = PCI_D3cold;
 			chip->disabled = true;
 			if (snd_hda_lock_devices(chip->bus))
 				snd_printk(KERN_WARNING SFX "%s: Cannot lock devices!\n",
 					   pci_name(chip->pci));
 		} else {
 			snd_hda_unlock_devices(chip->bus);
+			pm_runtime_get_noresume(&pci->dev);
 			chip->disabled = false;
 			azx_resume(&pci->dev);
 		}
@@ -3139,6 +3164,9 @@
 	if (err < 0)
 		return err;
 	chip->vga_switcheroo_registered = 1;
+
+	/* register as an optimus hdmi audio power domain */
+	vga_switcheroo_init_domain_pm_optimus_hdmi_audio(&chip->pci->dev, &chip->hdmi_pm_domain);
 	return 0;
 }
 #else
@@ -3887,7 +3915,7 @@
 	power_down_all_codecs(chip);
 	azx_notifier_register(chip);
 	azx_add_card_list(chip);
-	if (chip->driver_caps & AZX_DCAPS_PM_RUNTIME)
+	if ((chip->driver_caps & AZX_DCAPS_PM_RUNTIME) || chip->use_vga_switcheroo)
 		pm_runtime_put_noidle(&pci->dev);
 
 	return 0;
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 8bd2261..f303cd8 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -1031,6 +1031,7 @@
 	ALC880_FIXUP_GPIO2,
 	ALC880_FIXUP_MEDION_RIM,
 	ALC880_FIXUP_LG,
+	ALC880_FIXUP_LG_LW25,
 	ALC880_FIXUP_W810,
 	ALC880_FIXUP_EAPD_COEF,
 	ALC880_FIXUP_TCL_S700,
@@ -1089,6 +1090,14 @@
 			{ }
 		}
 	},
+	[ALC880_FIXUP_LG_LW25] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x1a, 0x0181344f }, /* line-in */
+			{ 0x1b, 0x0321403f }, /* headphone */
+			{ }
+		}
+	},
 	[ALC880_FIXUP_W810] = {
 		.type = HDA_FIXUP_PINS,
 		.v.pins = (const struct hda_pintbl[]) {
@@ -1341,6 +1350,7 @@
 	SND_PCI_QUIRK(0x1854, 0x003b, "LG", ALC880_FIXUP_LG),
 	SND_PCI_QUIRK(0x1854, 0x005f, "LG P1 Express", ALC880_FIXUP_LG),
 	SND_PCI_QUIRK(0x1854, 0x0068, "LG w1", ALC880_FIXUP_LG),
+	SND_PCI_QUIRK(0x1854, 0x0077, "LG LW25", ALC880_FIXUP_LG_LW25),
 	SND_PCI_QUIRK(0x19db, 0x4188, "TCL S700", ALC880_FIXUP_TCL_S700),
 
 	/* Below is the copied entries from alc880_quirks.c.
@@ -4329,6 +4339,7 @@
 	SND_PCI_QUIRK(0x1025, 0x0308, "Acer Aspire 8942G", ALC662_FIXUP_ASPIRE),
 	SND_PCI_QUIRK(0x1025, 0x031c, "Gateway NV79", ALC662_FIXUP_SKU_IGNORE),
 	SND_PCI_QUIRK(0x1025, 0x0349, "eMachines eM250", ALC662_FIXUP_INV_DMIC),
+	SND_PCI_QUIRK(0x1025, 0x034a, "Gateway LT27", ALC662_FIXUP_INV_DMIC),
 	SND_PCI_QUIRK(0x1025, 0x038b, "Acer Aspire 8943G", ALC662_FIXUP_ASPIRE),
 	SND_PCI_QUIRK(0x1028, 0x05d8, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x05db, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index e2f8359..6d1924c 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -417,9 +417,11 @@
 			val &= ~spec->eapd_mask;
 		else
 			val |= spec->eapd_mask;
-		if (spec->gpio_data != val)
+		if (spec->gpio_data != val) {
+			spec->gpio_data = val;
 			stac_gpio_set(codec, spec->gpio_mask, spec->gpio_dir,
 				      val);
+		}
 	}
 }
 
@@ -2817,6 +2819,7 @@
 
 /* codec SSIDs for Intel Mac sharing the same PCI SSID 8384:7680 */
 static const struct snd_pci_quirk stac922x_intel_mac_fixup_tbl[] = {
+	SND_PCI_QUIRK(0x0000, 0x0100, "Mac Mini", STAC_INTEL_MAC_V3),
 	SND_PCI_QUIRK(0x106b, 0x0800, "Mac", STAC_INTEL_MAC_V1),
 	SND_PCI_QUIRK(0x106b, 0x0600, "Mac", STAC_INTEL_MAC_V2),
 	SND_PCI_QUIRK(0x106b, 0x0700, "Mac", STAC_INTEL_MAC_V2),
@@ -3231,7 +3234,7 @@
 			/* configure the analog microphone on some laptops */
 			{ 0x0c, 0x90a79130 },
 			/* correct the front output jack as a hp out */
-			{ 0x0f, 0x0227011f },
+			{ 0x0f, 0x0221101f },
 			/* correct the front input jack as a mic */
 			{ 0x0e, 0x02a79130 },
 			{}
@@ -3612,20 +3615,18 @@
 static int stac_init(struct hda_codec *codec)
 {
 	struct sigmatel_spec *spec = codec->spec;
-	unsigned int gpio;
 	int i;
 
 	/* override some hints */
 	stac_store_hints(codec);
 
 	/* set up GPIO */
-	gpio = spec->gpio_data;
 	/* turn on EAPD statically when spec->eapd_switch isn't set.
 	 * otherwise, unsol event will turn it on/off dynamically
 	 */
 	if (!spec->eapd_switch)
-		gpio |= spec->eapd_mask;
-	stac_gpio_set(codec, spec->gpio_mask, spec->gpio_dir, gpio);
+		spec->gpio_data |= spec->eapd_mask;
+	stac_gpio_set(codec, spec->gpio_mask, spec->gpio_dir, spec->gpio_data);
 
 	snd_hda_gen_init(codec);
 
@@ -3915,6 +3916,7 @@
 {
 	struct sigmatel_spec *spec = codec->spec;
 
+	spec->gpio_mask |= spec->eapd_mask;
 	if (spec->gpio_led) {
 		if (!spec->vref_mute_led_nid) {
 			spec->gpio_mask |= spec->gpio_led;
diff --git a/sound/soc/au1x/ac97c.c b/sound/soc/au1x/ac97c.c
index d6f7694..c8a2de1 100644
--- a/sound/soc/au1x/ac97c.c
+++ b/sound/soc/au1x/ac97c.c
@@ -341,7 +341,7 @@
 	.remove		= au1xac97c_drvremove,
 };
 
-module_platform_driver(&au1xac97c_driver);
+module_platform_driver(au1xac97c_driver);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Au1000/1500/1100 AC97C ASoC driver");
diff --git a/sound/soc/blackfin/bf5xx-ac97.c b/sound/soc/blackfin/bf5xx-ac97.c
index efb1dae..e82eb37 100644
--- a/sound/soc/blackfin/bf5xx-ac97.c
+++ b/sound/soc/blackfin/bf5xx-ac97.c
@@ -294,11 +294,12 @@
 	/* Request PB3 as reset pin */
 	ret = devm_gpio_request_one(&pdev->dev,
 				    CONFIG_SND_BF5XX_RESET_GPIO_NUM,
-				    GPIOF_OUT_INIT_HIGH, "SND_AD198x RESET") {
+				    GPIOF_OUT_INIT_HIGH, "SND_AD198x RESET");
+	if (ret) {
 		dev_err(&pdev->dev,
 			"Failed to request GPIO_%d for reset: %d\n",
 			CONFIG_SND_BF5XX_RESET_GPIO_NUM, ret);
-		goto gpio_err;
+		return ret;
 	}
 #endif
 
diff --git a/sound/soc/blackfin/bf5xx-ac97.h b/sound/soc/blackfin/bf5xx-ac97.h
index 15c635e..0c3e22d 100644
--- a/sound/soc/blackfin/bf5xx-ac97.h
+++ b/sound/soc/blackfin/bf5xx-ac97.h
@@ -9,7 +9,6 @@
 #ifndef _BF5XX_AC97_H
 #define _BF5XX_AC97_H
 
-extern struct snd_ac97_bus_ops bf5xx_ac97_ops;
 extern struct snd_ac97 *ac97;
 /* Frame format in memory, only support stereo currently */
 struct ac97_frame {
diff --git a/sound/soc/cirrus/ep93xx-ac97.c b/sound/soc/cirrus/ep93xx-ac97.c
index ac73c60..04491f0 100644
--- a/sound/soc/cirrus/ep93xx-ac97.c
+++ b/sound/soc/cirrus/ep93xx-ac97.c
@@ -102,13 +102,13 @@
 
 static struct ep93xx_dma_data ep93xx_ac97_pcm_out = {
 	.name		= "ac97-pcm-out",
-	.dma_port	= EP93XX_DMA_AAC1,
+	.port		= EP93XX_DMA_AAC1,
 	.direction	= DMA_MEM_TO_DEV,
 };
 
 static struct ep93xx_dma_data ep93xx_ac97_pcm_in = {
 	.name		= "ac97-pcm-in",
-	.dma_port	= EP93XX_DMA_AAC1,
+	.port		= EP93XX_DMA_AAC1,
 	.direction	= DMA_DEV_TO_MEM,
 };
 
diff --git a/sound/soc/codecs/cs42l52.c b/sound/soc/codecs/cs42l52.c
index 987f728..be2ba1b 100644
--- a/sound/soc/codecs/cs42l52.c
+++ b/sound/soc/codecs/cs42l52.c
@@ -195,6 +195,8 @@
 
 static DECLARE_TLV_DB_SCALE(mix_tlv, -50, 50, 0);
 
+static DECLARE_TLV_DB_SCALE(beep_tlv, -56, 200, 0);
+
 static const unsigned int limiter_tlv[] = {
 	TLV_DB_RANGE_HEAD(2),
 	0, 2, TLV_DB_SCALE_ITEM(-3000, 600, 0),
@@ -451,7 +453,8 @@
 	SOC_ENUM("Beep Pitch", beep_pitch_enum),
 	SOC_ENUM("Beep on Time", beep_ontime_enum),
 	SOC_ENUM("Beep off Time", beep_offtime_enum),
-	SOC_SINGLE_TLV("Beep Volume", CS42L52_BEEP_VOL, 0, 0x1f, 0x07, hl_tlv),
+	SOC_SINGLE_SX_TLV("Beep Volume", CS42L52_BEEP_VOL,
+			0, 0x07, 0x1f, beep_tlv),
 	SOC_SINGLE("Beep Mixer Switch", CS42L52_BEEP_TONE_CTL, 5, 1, 1),
 	SOC_ENUM("Beep Treble Corner Freq", beep_treble_enum),
 	SOC_ENUM("Beep Bass Corner Freq", beep_bass_enum),
diff --git a/sound/soc/codecs/max98088.c b/sound/soc/codecs/max98088.c
index 3eeada5..566a367 100644
--- a/sound/soc/codecs/max98088.c
+++ b/sound/soc/codecs/max98088.c
@@ -1612,7 +1612,7 @@
 
 static void max98088_sync_cache(struct snd_soc_codec *codec)
 {
-       u16 *reg_cache = codec->reg_cache;
+       u8 *reg_cache = codec->reg_cache;
        int i;
 
        if (!codec->cache_sync)
diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c
index d659d3a..760e8bf 100644
--- a/sound/soc/codecs/sgtl5000.c
+++ b/sound/soc/codecs/sgtl5000.c
@@ -153,6 +153,8 @@
 static int power_vag_event(struct snd_soc_dapm_widget *w,
 	struct snd_kcontrol *kcontrol, int event)
 {
+	const u32 mask = SGTL5000_DAC_POWERUP | SGTL5000_ADC_POWERUP;
+
 	switch (event) {
 	case SND_SOC_DAPM_POST_PMU:
 		snd_soc_update_bits(w->codec, SGTL5000_CHIP_ANA_POWER,
@@ -160,9 +162,17 @@
 		break;
 
 	case SND_SOC_DAPM_PRE_PMD:
-		snd_soc_update_bits(w->codec, SGTL5000_CHIP_ANA_POWER,
-			SGTL5000_VAG_POWERUP, 0);
-		msleep(400);
+		/*
+		 * Don't clear VAG_POWERUP, when both DAC and ADC are
+		 * operational to prevent inadvertently starving the
+		 * other one of them.
+		 */
+		if ((snd_soc_read(w->codec, SGTL5000_CHIP_ANA_POWER) &
+				mask) != mask) {
+			snd_soc_update_bits(w->codec, SGTL5000_CHIP_ANA_POWER,
+				SGTL5000_VAG_POWERUP, 0);
+			msleep(400);
+		}
 		break;
 	default:
 		break;
@@ -388,7 +398,7 @@
 	SOC_DOUBLE("Capture Volume", SGTL5000_CHIP_ANA_ADC_CTRL, 0, 4, 0xf, 0),
 	SOC_SINGLE_TLV("Capture Attenuate Switch (-6dB)",
 			SGTL5000_CHIP_ANA_ADC_CTRL,
-			8, 2, 0, capture_6db_attenuate),
+			8, 1, 0, capture_6db_attenuate),
 	SOC_SINGLE("Capture ZC Switch", SGTL5000_CHIP_ANA_CTRL, 1, 1, 0),
 
 	SOC_DOUBLE_TLV("Headphone Playback Volume",
@@ -1527,6 +1537,9 @@
 	if (IS_ERR(sgtl5000->mclk)) {
 		ret = PTR_ERR(sgtl5000->mclk);
 		dev_err(&client->dev, "Failed to get mclock: %d\n", ret);
+		/* Defer the probe to see if the clk will be provided later */
+		if (ret == -ENOENT)
+			return -EPROBE_DEFER;
 		return ret;
 	}
 
diff --git a/sound/soc/codecs/wm0010.c b/sound/soc/codecs/wm0010.c
index f5e8356..10adc41 100644
--- a/sound/soc/codecs/wm0010.c
+++ b/sound/soc/codecs/wm0010.c
@@ -410,6 +410,16 @@
 			rec->command, rec->length);
 		len = rec->length + 8;
 
+		xfer = kzalloc(sizeof(*xfer), GFP_KERNEL);
+		if (!xfer) {
+			dev_err(codec->dev, "Failed to allocate xfer\n");
+			ret = -ENOMEM;
+			goto abort;
+		}
+
+		xfer->codec = codec;
+		list_add_tail(&xfer->list, &xfer_list);
+
 		out = kzalloc(len, GFP_KERNEL);
 		if (!out) {
 			dev_err(codec->dev,
@@ -417,6 +427,7 @@
 			ret = -ENOMEM;
 			goto abort1;
 		}
+		xfer->t.rx_buf = out;
 
 		img = kzalloc(len, GFP_KERNEL);
 		if (!img) {
@@ -425,24 +436,13 @@
 			ret = -ENOMEM;
 			goto abort1;
 		}
+		xfer->t.tx_buf = img;
 
 		byte_swap_64((u64 *)&rec->command, img, len);
 
-		xfer = kzalloc(sizeof(*xfer), GFP_KERNEL);
-		if (!xfer) {
-			dev_err(codec->dev, "Failed to allocate xfer\n");
-			ret = -ENOMEM;
-			goto abort1;
-		}
-
-		xfer->codec = codec;
-		list_add_tail(&xfer->list, &xfer_list);
-
 		spi_message_init(&xfer->m);
 		xfer->m.complete = wm0010_boot_xfer_complete;
 		xfer->m.context = xfer;
-		xfer->t.tx_buf = img;
-		xfer->t.rx_buf = out;
 		xfer->t.len = len;
 		xfer->t.bits_per_word = 8;
 
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 0ec070c..d82ee38 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -3908,10 +3908,8 @@
 {
 	/* create platform component name */
 	platform->name = fmt_single_name(dev, &platform->id);
-	if (platform->name == NULL) {
-		kfree(platform);
+	if (platform->name == NULL)
 		return -ENOMEM;
-	}
 
 	platform->dev = dev;
 	platform->driver = platform_drv;
diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c
index b941908..4375c9f 100644
--- a/sound/soc/soc-dapm.c
+++ b/sound/soc/soc-dapm.c
@@ -679,13 +679,14 @@
 		return -EINVAL;
 	}
 
-	path = list_first_entry(&w->sources, struct snd_soc_dapm_path,
-				list_sink);
-	if (!path) {
+	if (list_empty(&w->sources)) {
 		dev_err(dapm->dev, "ASoC: mux %s has no paths\n", w->name);
 		return -EINVAL;
 	}
 
+	path = list_first_entry(&w->sources, struct snd_soc_dapm_path,
+				list_sink);
+
 	ret = dapm_create_or_share_mixmux_kcontrol(w, 0, path);
 	if (ret < 0)
 		return ret;
@@ -2733,7 +2734,7 @@
 	}
 
 	mutex_unlock(&card->dapm_mutex);
-	return 0;
+	return change;
 }
 EXPORT_SYMBOL_GPL(snd_soc_dapm_put_volsw);
 
@@ -2861,7 +2862,6 @@
 	struct soc_enum *e =
 		(struct soc_enum *)kcontrol->private_value;
 	int change;
-	int ret = 0;
 	int wi;
 
 	if (ucontrol->value.enumerated.item[0] >= e->max)
@@ -2881,7 +2881,7 @@
 	}
 
 	mutex_unlock(&card->dapm_mutex);
-	return ret;
+	return change;
 }
 EXPORT_SYMBOL_GPL(snd_soc_dapm_put_enum_virt);
 
diff --git a/sound/soc/tegra/tegra20_ac97.c b/sound/soc/tegra/tegra20_ac97.c
index e58233f..6c48662 100644
--- a/sound/soc/tegra/tegra20_ac97.c
+++ b/sound/soc/tegra/tegra20_ac97.c
@@ -389,9 +389,9 @@
 	ac97->capture_dma_data.slave_id = of_dma[1];
 
 	ac97->playback_dma_data.addr = mem->start + TEGRA20_AC97_FIFO_TX1;
-	ac97->capture_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-	ac97->capture_dma_data.maxburst = 4;
-	ac97->capture_dma_data.slave_id = of_dma[0];
+	ac97->playback_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	ac97->playback_dma_data.maxburst = 4;
+	ac97->playback_dma_data.slave_id = of_dma[1];
 
 	ret = tegra_asoc_utils_init(&ac97->util_data, &pdev->dev);
 	if (ret)
diff --git a/sound/soc/tegra/tegra20_spdif.c b/sound/soc/tegra/tegra20_spdif.c
index 5eaa12c..551b3c9 100644
--- a/sound/soc/tegra/tegra20_spdif.c
+++ b/sound/soc/tegra/tegra20_spdif.c
@@ -323,8 +323,8 @@
 	}
 
 	spdif->playback_dma_data.addr = mem->start + TEGRA20_SPDIF_DATA_OUT;
-	spdif->capture_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
-	spdif->capture_dma_data.maxburst = 4;
+	spdif->playback_dma_data.addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	spdif->playback_dma_data.maxburst = 4;
 	spdif->playback_dma_data.slave_id = dmareq->start;
 
 	pm_runtime_enable(&pdev->dev);
diff --git a/sound/soc/tegra/tegra30_i2s.c b/sound/soc/tegra/tegra30_i2s.c
index d04146c..47565fd04 100644
--- a/sound/soc/tegra/tegra30_i2s.c
+++ b/sound/soc/tegra/tegra30_i2s.c
@@ -228,7 +228,7 @@
 		reg = TEGRA30_I2S_CIF_RX_CTRL;
 	} else {
 		val |= TEGRA30_AUDIOCIF_CTRL_DIRECTION_TX;
-		reg = TEGRA30_I2S_CIF_RX_CTRL;
+		reg = TEGRA30_I2S_CIF_TX_CTRL;
 	}
 
 	regmap_write(i2s->regmap, reg, val);
diff --git a/sound/usb/6fire/comm.c b/sound/usb/6fire/comm.c
index 9e6e3ff..23452ee 100644
--- a/sound/usb/6fire/comm.c
+++ b/sound/usb/6fire/comm.c
@@ -110,19 +110,37 @@
 static int usb6fire_comm_write8(struct comm_runtime *rt, u8 request,
 		u8 reg, u8 value)
 {
-	u8 buffer[13]; /* 13: maximum length of message */
+	u8 *buffer;
+	int ret;
+
+	/* 13: maximum length of message */
+	buffer = kmalloc(13, GFP_KERNEL);
+	if (!buffer)
+		return -ENOMEM;
 
 	usb6fire_comm_init_buffer(buffer, 0x00, request, reg, value, 0x00);
-	return usb6fire_comm_send_buffer(buffer, rt->chip->dev);
+	ret = usb6fire_comm_send_buffer(buffer, rt->chip->dev);
+
+	kfree(buffer);
+	return ret;
 }
 
 static int usb6fire_comm_write16(struct comm_runtime *rt, u8 request,
 		u8 reg, u8 vl, u8 vh)
 {
-	u8 buffer[13]; /* 13: maximum length of message */
+	u8 *buffer;
+	int ret;
+
+	/* 13: maximum length of message */
+	buffer = kmalloc(13, GFP_KERNEL);
+	if (!buffer)
+		return -ENOMEM;
 
 	usb6fire_comm_init_buffer(buffer, 0x00, request, reg, vl, vh);
-	return usb6fire_comm_send_buffer(buffer, rt->chip->dev);
+	ret = usb6fire_comm_send_buffer(buffer, rt->chip->dev);
+
+	kfree(buffer);
+	return ret;
 }
 
 int usb6fire_comm_init(struct sfire_chip *chip)
@@ -135,6 +153,12 @@
 	if (!rt)
 		return -ENOMEM;
 
+	rt->receiver_buffer = kzalloc(COMM_RECEIVER_BUFSIZE, GFP_KERNEL);
+	if (!rt->receiver_buffer) {
+		kfree(rt);
+		return -ENOMEM;
+	}
+
 	urb = &rt->receiver;
 	rt->serial = 1;
 	rt->chip = chip;
@@ -153,6 +177,7 @@
 	urb->interval = 1;
 	ret = usb_submit_urb(urb, GFP_KERNEL);
 	if (ret < 0) {
+		kfree(rt->receiver_buffer);
 		kfree(rt);
 		snd_printk(KERN_ERR PREFIX "cannot create comm data receiver.");
 		return ret;
@@ -171,6 +196,9 @@
 
 void usb6fire_comm_destroy(struct sfire_chip *chip)
 {
-	kfree(chip->comm);
+	struct comm_runtime *rt = chip->comm;
+
+	kfree(rt->receiver_buffer);
+	kfree(rt);
 	chip->comm = NULL;
 }
diff --git a/sound/usb/6fire/comm.h b/sound/usb/6fire/comm.h
index 6a0840b..780d5ed 100644
--- a/sound/usb/6fire/comm.h
+++ b/sound/usb/6fire/comm.h
@@ -24,7 +24,7 @@
 	struct sfire_chip *chip;
 
 	struct urb receiver;
-	u8 receiver_buffer[COMM_RECEIVER_BUFSIZE];
+	u8 *receiver_buffer;
 
 	u8 serial; /* urb serial */
 
diff --git a/sound/usb/6fire/midi.c b/sound/usb/6fire/midi.c
index 2672242..f3dd726 100644
--- a/sound/usb/6fire/midi.c
+++ b/sound/usb/6fire/midi.c
@@ -19,6 +19,10 @@
 #include "chip.h"
 #include "comm.h"
 
+enum {
+	MIDI_BUFSIZE = 64
+};
+
 static void usb6fire_midi_out_handler(struct urb *urb)
 {
 	struct midi_runtime *rt = urb->context;
@@ -156,6 +160,12 @@
 	if (!rt)
 		return -ENOMEM;
 
+	rt->out_buffer = kzalloc(MIDI_BUFSIZE, GFP_KERNEL);
+	if (!rt->out_buffer) {
+		kfree(rt);
+		return -ENOMEM;
+	}
+
 	rt->chip = chip;
 	rt->in_received = usb6fire_midi_in_received;
 	rt->out_buffer[0] = 0x80; /* 'send midi' command */
@@ -169,6 +179,7 @@
 
 	ret = snd_rawmidi_new(chip->card, "6FireUSB", 0, 1, 1, &rt->instance);
 	if (ret < 0) {
+		kfree(rt->out_buffer);
 		kfree(rt);
 		snd_printk(KERN_ERR PREFIX "unable to create midi.\n");
 		return ret;
@@ -197,6 +208,9 @@
 
 void usb6fire_midi_destroy(struct sfire_chip *chip)
 {
-	kfree(chip->midi);
+	struct midi_runtime *rt = chip->midi;
+
+	kfree(rt->out_buffer);
+	kfree(rt);
 	chip->midi = NULL;
 }
diff --git a/sound/usb/6fire/midi.h b/sound/usb/6fire/midi.h
index c321006..84851b9 100644
--- a/sound/usb/6fire/midi.h
+++ b/sound/usb/6fire/midi.h
@@ -16,10 +16,6 @@
 
 #include "common.h"
 
-enum {
-	MIDI_BUFSIZE = 64
-};
-
 struct midi_runtime {
 	struct sfire_chip *chip;
 	struct snd_rawmidi *instance;
@@ -32,7 +28,7 @@
 	struct snd_rawmidi_substream *out;
 	struct urb out_urb;
 	u8 out_serial; /* serial number of out packet */
-	u8 out_buffer[MIDI_BUFSIZE];
+	u8 *out_buffer;
 	int buffer_offset;
 
 	void (*in_received)(struct midi_runtime *rt, u8 *data, int length);
diff --git a/sound/usb/6fire/pcm.c b/sound/usb/6fire/pcm.c
index 2aa4e13..b5eb97f 100644
--- a/sound/usb/6fire/pcm.c
+++ b/sound/usb/6fire/pcm.c
@@ -543,7 +543,7 @@
 	snd_pcm_uframes_t ret;
 
 	if (rt->panic || !sub)
-		return SNDRV_PCM_STATE_XRUN;
+		return SNDRV_PCM_POS_XRUN;
 
 	spin_lock_irqsave(&sub->lock, flags);
 	ret = sub->dma_off;
@@ -582,6 +582,33 @@
 	urb->instance.number_of_packets = PCM_N_PACKETS_PER_URB;
 }
 
+static int usb6fire_pcm_buffers_init(struct pcm_runtime *rt)
+{
+	int i;
+
+	for (i = 0; i < PCM_N_URBS; i++) {
+		rt->out_urbs[i].buffer = kzalloc(PCM_N_PACKETS_PER_URB
+				* PCM_MAX_PACKET_SIZE, GFP_KERNEL);
+		if (!rt->out_urbs[i].buffer)
+			return -ENOMEM;
+		rt->in_urbs[i].buffer = kzalloc(PCM_N_PACKETS_PER_URB
+				* PCM_MAX_PACKET_SIZE, GFP_KERNEL);
+		if (!rt->in_urbs[i].buffer)
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+static void usb6fire_pcm_buffers_destroy(struct pcm_runtime *rt)
+{
+	int i;
+
+	for (i = 0; i < PCM_N_URBS; i++) {
+		kfree(rt->out_urbs[i].buffer);
+		kfree(rt->in_urbs[i].buffer);
+	}
+}
+
 int usb6fire_pcm_init(struct sfire_chip *chip)
 {
 	int i;
@@ -593,6 +620,13 @@
 	if (!rt)
 		return -ENOMEM;
 
+	ret = usb6fire_pcm_buffers_init(rt);
+	if (ret) {
+		usb6fire_pcm_buffers_destroy(rt);
+		kfree(rt);
+		return ret;
+	}
+
 	rt->chip = chip;
 	rt->stream_state = STREAM_DISABLED;
 	rt->rate = ARRAY_SIZE(rates);
@@ -614,6 +648,7 @@
 
 	ret = snd_pcm_new(chip->card, "DMX6FireUSB", 0, 1, 1, &pcm);
 	if (ret < 0) {
+		usb6fire_pcm_buffers_destroy(rt);
 		kfree(rt);
 		snd_printk(KERN_ERR PREFIX "cannot create pcm instance.\n");
 		return ret;
@@ -625,6 +660,7 @@
 	snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &pcm_ops);
 
 	if (ret) {
+		usb6fire_pcm_buffers_destroy(rt);
 		kfree(rt);
 		snd_printk(KERN_ERR PREFIX
 				"error preallocating pcm buffers.\n");
@@ -669,6 +705,9 @@
 
 void usb6fire_pcm_destroy(struct sfire_chip *chip)
 {
-	kfree(chip->pcm);
+	struct pcm_runtime *rt = chip->pcm;
+
+	usb6fire_pcm_buffers_destroy(rt);
+	kfree(rt);
 	chip->pcm = NULL;
 }
diff --git a/sound/usb/6fire/pcm.h b/sound/usb/6fire/pcm.h
index 9b01133..f5779d6 100644
--- a/sound/usb/6fire/pcm.h
+++ b/sound/usb/6fire/pcm.h
@@ -32,7 +32,7 @@
 	struct urb instance;
 	struct usb_iso_packet_descriptor packets[PCM_N_PACKETS_PER_URB];
 	/* END DO NOT SEPARATE */
-	u8 buffer[PCM_N_PACKETS_PER_URB * PCM_MAX_PACKET_SIZE];
+	u8 *buffer;
 
 	struct pcm_urb *peer;
 };
diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c
index 7a444b5..659950e 100644
--- a/sound/usb/endpoint.c
+++ b/sound/usb/endpoint.c
@@ -591,17 +591,16 @@
 	ep->stride = frame_bits >> 3;
 	ep->silence_value = pcm_format == SNDRV_PCM_FORMAT_U8 ? 0x80 : 0;
 
-	/* calculate max. frequency */
-	if (ep->maxpacksize) {
+	/* assume max. frequency is 25% higher than nominal */
+	ep->freqmax = ep->freqn + (ep->freqn >> 2);
+	maxsize = ((ep->freqmax + 0xffff) * (frame_bits >> 3))
+				>> (16 - ep->datainterval);
+	/* but wMaxPacketSize might reduce this */
+	if (ep->maxpacksize && ep->maxpacksize < maxsize) {
 		/* whatever fits into a max. size packet */
 		maxsize = ep->maxpacksize;
 		ep->freqmax = (maxsize / (frame_bits >> 3))
 				<< (16 - ep->datainterval);
-	} else {
-		/* no max. packet size: just take 25% higher than nominal */
-		ep->freqmax = ep->freqn + (ep->freqn >> 2);
-		maxsize = ((ep->freqmax + 0xffff) * (frame_bits >> 3))
-				>> (16 - ep->datainterval);
 	}
 
 	if (ep->fill_max)
diff --git a/sound/usb/hiface/pcm.c b/sound/usb/hiface/pcm.c
index 6430ed2..c21a3df 100644
--- a/sound/usb/hiface/pcm.c
+++ b/sound/usb/hiface/pcm.c
@@ -503,7 +503,7 @@
 	snd_pcm_uframes_t dma_offset;
 
 	if (rt->panic || !sub)
-		return SNDRV_PCM_STATE_XRUN;
+		return SNDRV_PCM_POS_XRUN;
 
 	spin_lock_irqsave(&sub->lock, flags);
 	dma_offset = sub->dma_off;
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index d543808..95558ef 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -888,6 +888,7 @@
 	case USB_ID(0x046d, 0x081b): /* HD Webcam c310 */
 	case USB_ID(0x046d, 0x081d): /* HD Webcam c510 */
 	case USB_ID(0x046d, 0x0825): /* HD Webcam c270 */
+	case USB_ID(0x046d, 0x0826): /* HD Webcam c525 */
 	case USB_ID(0x046d, 0x0991):
 	/* Most audio usb devices lie about volume resolution.
 	 * Most Logitech webcams have res = 384.
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 1bc45e7..0df9ede 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -319,19 +319,19 @@
 	if (altsd->bNumEndpoints < 1)
 		return -ENODEV;
 	epd = get_endpoint(alts, 0);
-	if (!usb_endpoint_xfer_bulk(epd) ||
+	if (!usb_endpoint_xfer_bulk(epd) &&
 	    !usb_endpoint_xfer_int(epd))
 		return -ENODEV;
 
 	switch (USB_ID_VENDOR(chip->usb_id)) {
 	case 0x0499: /* Yamaha */
 		err = create_yamaha_midi_quirk(chip, iface, driver, alts);
-		if (err < 0 && err != -ENODEV)
+		if (err != -ENODEV)
 			return err;
 		break;
 	case 0x0582: /* Roland */
 		err = create_roland_midi_quirk(chip, iface, driver, alts);
-		if (err < 0 && err != -ENODEV)
+		if (err != -ENODEV)
 			return err;
 		break;
 	}
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index ca9fa4d..07819bf 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -1026,9 +1026,10 @@
 
 				if (sn_offset == 0)
 					strcpy(sn_str, cidr_mask);
-				else
+				else {
+					strcat((char *)ip_buffer->sub_net, ";");
 					strcat(sn_str, cidr_mask);
-				strcat((char *)ip_buffer->sub_net, ";");
+				}
 				sn_offset += strlen(sn_str) + 1;
 			}